Merge remote-tracking branch 'vendor/master'
This commit is contained in:
commit
bf9e7d2760
44 changed files with 517 additions and 568 deletions
6
.github/workflows/ci.yaml
vendored
6
.github/workflows/ci.yaml
vendored
|
@ -13,7 +13,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
|
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -32,7 +32,7 @@ jobs:
|
||||||
if: ${{ matrix.python-version != env.DEFAULT_PYTHON }}
|
if: ${{ matrix.python-version != env.DEFAULT_PYTHON }}
|
||||||
|
|
||||||
- name: Test with tox (and upload coverage)
|
- name: Test with tox (and upload coverage)
|
||||||
uses: paambaati/codeclimate-action@v5.0.0
|
uses: paambaati/codeclimate-action@v8.0.0
|
||||||
if: ${{ matrix.python-version == env.DEFAULT_PYTHON }}
|
if: ${{ matrix.python-version == env.DEFAULT_PYTHON }}
|
||||||
env:
|
env:
|
||||||
CC_TEST_REPORTER_ID: 2a411f596959fc32f5d73f3ba7cef8cc4d5733299d742dbfc97fd6c190b9010c
|
CC_TEST_REPORTER_ID: 2a411f596959fc32f5d73f3ba7cef8cc4d5733299d742dbfc97fd6c190b9010c
|
||||||
|
@ -42,6 +42,6 @@ jobs:
|
||||||
${{ github.workspace }}/.tox/reports/*/coverage.xml:coverage.py
|
${{ github.workspace }}/.tox/reports/*/coverage.xml:coverage.py
|
||||||
prefix: ${{ github.workspace }}/.tox/py39/lib/python3.9/site-packages
|
prefix: ${{ github.workspace }}/.tox/py39/lib/python3.9/site-packages
|
||||||
|
|
||||||
- uses: codecov/codecov-action@v3
|
- uses: codecov/codecov-action@v4
|
||||||
with:
|
with:
|
||||||
directory: '.tox/reports'
|
directory: '.tox/reports'
|
||||||
|
|
35
.github/workflows/pages.yml
vendored
35
.github/workflows/pages.yml
vendored
|
@ -5,12 +5,19 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: read
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: "pages"
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
deploy:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -28,10 +35,24 @@ jobs:
|
||||||
pip install wheel
|
pip install wheel
|
||||||
pip install git+https://github.com/spanezz/staticsite.git@v2.3
|
pip install git+https://github.com/spanezz/staticsite.git@v2.3
|
||||||
ssite build --output public
|
ssite build --output public
|
||||||
|
cd public
|
||||||
|
rm -rf Jenkinsfile dosagelib scripts tests
|
||||||
|
|
||||||
- name: Deploy
|
- name: Setup Pages
|
||||||
uses: peaceiris/actions-gh-pages@v3
|
id: pages
|
||||||
|
uses: actions/configure-pages@v5
|
||||||
|
- name: Upload artifact
|
||||||
|
uses: actions/upload-pages-artifact@v3
|
||||||
with:
|
with:
|
||||||
cname: dosage.rocks
|
path: public
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
exclude_assets: 'Jenkinsfile,dosagelib,scripts,setup.*,tests,*.ini'
|
deploy:
|
||||||
|
environment:
|
||||||
|
name: github-pages
|
||||||
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
steps:
|
||||||
|
- name: Deploy to GitHub Pages
|
||||||
|
id: deployment
|
||||||
|
uses: actions/deploy-pages@v4
|
||||||
|
|
2
COPYING
2
COPYING
|
@ -1,6 +1,6 @@
|
||||||
Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
Copyright (C) 2012-2014 Bastian Kleineidam
|
Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
Copyright (C) 2015-2022 Tobias Gruetzmacher
|
Copyright (C) 2015-2024 Tobias Gruetzmacher
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
a copy of this software and associated documentation files (the
|
a copy of this software and associated documentation files (the
|
||||||
|
|
3
Jenkinsfile
vendored
3
Jenkinsfile
vendored
|
@ -4,7 +4,6 @@ def pys = [
|
||||||
[name: 'Python 3.10', docker: '3.10-bookworm', tox:'py310', main: false],
|
[name: 'Python 3.10', docker: '3.10-bookworm', tox:'py310', main: false],
|
||||||
[name: 'Python 3.9', docker: '3.9-bookworm', tox:'py39', main: false],
|
[name: 'Python 3.9', docker: '3.9-bookworm', tox:'py39', main: false],
|
||||||
[name: 'Python 3.8', docker: '3.8-bookworm', tox:'py38', main: false],
|
[name: 'Python 3.8', docker: '3.8-bookworm', tox:'py38', main: false],
|
||||||
[name: 'Python 3.7', docker: '3.7-bookworm', tox:'py37', main: false],
|
|
||||||
]
|
]
|
||||||
|
|
||||||
properties([
|
properties([
|
||||||
|
@ -75,7 +74,7 @@ pys.each { py ->
|
||||||
parallel(tasks)
|
parallel(tasks)
|
||||||
parallel modern: {
|
parallel modern: {
|
||||||
stage('Modern Windows binary') {
|
stage('Modern Windows binary') {
|
||||||
windowsBuild('3.11', 'dosage.exe')
|
windowsBuild('3.12', 'dosage.exe')
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
legacy: {
|
legacy: {
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
# Dosage
|
# Dosage
|
||||||
|
|
||||||
[![Tests](https://github.com/webcomics/dosage/actions/workflows/test.yml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/test.yml)
|
[![CI](https://github.com/webcomics/dosage/actions/workflows/ci.yaml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/ci.yaml)
|
||||||
[![Code Climate](https://codeclimate.com/github/webcomics/dosage/badges/gpa.svg)](https://codeclimate.com/github/webcomics/dosage)
|
[![Code Climate](https://codeclimate.com/github/webcomics/dosage/badges/gpa.svg)](https://codeclimate.com/github/webcomics/dosage)
|
||||||
[![codecov](https://codecov.io/gh/webcomics/dosage/branch/master/graph/badge.svg)](https://codecov.io/gh/webcomics/dosage)
|
[![codecov](https://codecov.io/gh/webcomics/dosage/branch/master/graph/badge.svg)](https://codecov.io/gh/webcomics/dosage)
|
||||||
![Maintenance](https://img.shields.io/maintenance/yes/2023.svg)
|
![Maintenance](https://img.shields.io/maintenance/yes/2024.svg)
|
||||||
![License](https://img.shields.io/github/license/webcomics/dosage)
|
![License](https://img.shields.io/github/license/webcomics/dosage)
|
||||||
|
|
||||||
Dosage is designed to keep a local copy of specific webcomics and other
|
Dosage is designed to keep a local copy of specific webcomics and other
|
||||||
|
@ -72,7 +72,7 @@ are old enough to view them.
|
||||||
### Dependencies
|
### Dependencies
|
||||||
|
|
||||||
Since dosage is written in [Python](http://www.python.org/), a Python
|
Since dosage is written in [Python](http://www.python.org/), a Python
|
||||||
installation is required: Dosage needs at least Python 3.7. Dosage requires
|
installation is required: Dosage needs at least Python 3.8. Dosage requires
|
||||||
some Python modules from PyPI, so installation with `pip` is recommended.
|
some Python modules from PyPI, so installation with `pip` is recommended.
|
||||||
|
|
||||||
### Using the Windows binary
|
### Using the Windows binary
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
"""
|
"""
|
||||||
Automated comic downloader. Dosage traverses comic websites in
|
Automated comic downloader. Dosage traverses comic websites in
|
||||||
order to download each strip of the comic. The intended use is for
|
order to download each strip of the comic. The intended use is for
|
||||||
|
@ -14,14 +14,11 @@ The primary interface is the 'dosage' commandline script.
|
||||||
Comic modules for each comic are located in L{dosagelib.plugins}.
|
Comic modules for each comic are located in L{dosagelib.plugins}.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
|
||||||
from importlib.metadata import version, PackageNotFoundError
|
from importlib.metadata import version, PackageNotFoundError
|
||||||
except ImportError:
|
|
||||||
from importlib_metadata import version, PackageNotFoundError
|
|
||||||
|
|
||||||
from .output import out
|
from .output import out
|
||||||
|
|
||||||
AppName = u'dosage'
|
AppName = 'dosage'
|
||||||
try:
|
try:
|
||||||
__version__ = version(AppName) # PEP 396
|
__version__ = version(AppName) # PEP 396
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
import codecs
|
import codecs
|
||||||
import contextlib
|
import contextlib
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
from .output import out
|
from .output import out
|
||||||
from .util import unquote, getFilename, urlopen, strsize
|
from .util import unquote, getFilename, urlopen, strsize
|
||||||
|
@ -14,27 +17,27 @@ from .events import getHandler
|
||||||
|
|
||||||
|
|
||||||
# Maximum content size for images
|
# Maximum content size for images
|
||||||
MaxImageBytes = 1024 * 1024 * 20 # 20 MB
|
MAX_IMAGE_BYTES = 1024 * 1024 * 20 # 20 MB
|
||||||
# RFC 1123 format, as preferred by RFC 2616
|
# RFC 1123 format, as preferred by RFC 2616
|
||||||
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
|
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
|
||||||
|
|
||||||
|
|
||||||
class ComicStrip(object):
|
class ComicStrip:
|
||||||
"""A list of comic image URLs."""
|
"""A list of comic image URLs."""
|
||||||
|
|
||||||
def __init__(self, scraper, strip_url, image_urls, text=None):
|
def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None:
|
||||||
"""Store the image URL list."""
|
"""Store the image URL list."""
|
||||||
self.scraper = scraper
|
self.scraper = scraper
|
||||||
self.strip_url = strip_url
|
self.strip_url = strip_url
|
||||||
self.image_urls = image_urls
|
self.image_urls = image_urls
|
||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def getImages(self):
|
def getImages(self) -> Iterator[ComicImage]:
|
||||||
"""Get a list of image downloaders."""
|
"""Get a list of image downloaders."""
|
||||||
for image_url in self.image_urls:
|
for image_url in self.image_urls:
|
||||||
yield self.getDownloader(image_url)
|
yield self.getDownloader(image_url)
|
||||||
|
|
||||||
def getDownloader(self, url):
|
def getDownloader(self, url: str) -> ComicImage:
|
||||||
"""Get an image downloader."""
|
"""Get an image downloader."""
|
||||||
filename = self.scraper.namer(url, self.strip_url)
|
filename = self.scraper.namer(url, self.strip_url)
|
||||||
if filename is None:
|
if filename is None:
|
||||||
|
@ -43,7 +46,7 @@ class ComicStrip(object):
|
||||||
text=self.text)
|
text=self.text)
|
||||||
|
|
||||||
|
|
||||||
class ComicImage(object):
|
class ComicImage:
|
||||||
"""A comic image downloader."""
|
"""A comic image downloader."""
|
||||||
|
|
||||||
ChunkBytes = 1024 * 100 # 100KB
|
ChunkBytes = 1024 * 100 # 100KB
|
||||||
|
@ -64,7 +67,7 @@ class ComicImage(object):
|
||||||
headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
|
headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
|
||||||
self.urlobj = urlopen(self.url, self.scraper.session,
|
self.urlobj = urlopen(self.url, self.scraper.session,
|
||||||
referrer=self.referrer,
|
referrer=self.referrer,
|
||||||
max_content_bytes=MaxImageBytes, stream=True,
|
max_content_bytes=MAX_IMAGE_BYTES, stream=True,
|
||||||
headers=headers)
|
headers=headers)
|
||||||
if self.urlobj.status_code == 304: # Not modified
|
if self.urlobj.status_code == 304: # Not modified
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,39 +1,49 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
from .util import getQueryParams
|
from .util import getQueryParams
|
||||||
|
from .scraper import Scraper
|
||||||
|
|
||||||
|
|
||||||
def queryNamer(param, use_page_url=False):
|
class Namer(Protocol):
|
||||||
|
"""A protocol for generic callbacks to name web comic images."""
|
||||||
|
def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def queryNamer(param, use_page_url=False) -> Namer:
|
||||||
"""Get name from URL query part."""
|
"""Get name from URL query part."""
|
||||||
def _namer(self, image_url, page_url):
|
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||||
"""Get URL query part."""
|
"""Get URL query part."""
|
||||||
url = page_url if use_page_url else image_url
|
url = page_url if use_page_url else image_url
|
||||||
return getQueryParams(url)[param][0]
|
return getQueryParams(url)[param][0]
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def regexNamer(regex, use_page_url=False):
|
def regexNamer(regex, use_page_url=False) -> Namer:
|
||||||
"""Get name from regular expression."""
|
"""Get name from regular expression."""
|
||||||
def _namer(self, image_url, page_url):
|
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||||
"""Get first regular expression group."""
|
"""Get first regular expression group."""
|
||||||
url = page_url if use_page_url else image_url
|
url = page_url if use_page_url else image_url
|
||||||
mo = regex.search(url)
|
mo = regex.search(url)
|
||||||
if mo:
|
return mo.group(1) if mo else None
|
||||||
return mo.group(1)
|
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer:
|
||||||
"""Get name by mashing path parts together with underscores."""
|
"""Get name by mashing path parts together with underscores."""
|
||||||
def _namer(self, imageurl, pageurl):
|
def _namer(self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||||
# Split and drop host name
|
# Split and drop host name
|
||||||
pageurlsplit = pageurl.split('/')[3:]
|
pagesplit = page_url.split('/')[3:]
|
||||||
imageurlsplit = imageurl.split('/')[3:]
|
imagesplit = image_url.split('/')[3:]
|
||||||
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
joinparts = ([pagesplit[i] for i in pageparts] +
|
||||||
[imageurlsplit[i] for i in imageurlparts])
|
[imagesplit[i] for i in imageparts])
|
||||||
return joinchar.join(joinparts)
|
return joinchar.join(joinparts)
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape, sub, MULTILINE
|
from re import compile, escape, sub, MULTILINE
|
||||||
|
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..scraper import BasicScraper, ParserScraper, _BasicScraper, _ParserScraper
|
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
from ..helpers import joinPathPartsNamer, bounceStarter, indirectStarter
|
||||||
from .common import WordPressScraper, WordPressNavi, WordPressWebcomic
|
from .common import WordPressScraper, WordPressNavi, WordPressWebcomic
|
||||||
|
|
||||||
|
|
||||||
class AbstruseGoose(_ParserScraper):
|
class AbstruseGoose(ParserScraper):
|
||||||
url = 'https://abstrusegoose.com/'
|
url = 'https://web.archive.org/web/20230930172141/https://abstrusegoose.com/'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
|
@ -41,24 +41,16 @@ class AbsurdNotions(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class AcademyVale(_BasicScraper):
|
class Achewood(ParserScraper):
|
||||||
url = 'http://www.imagerie.com/vale/'
|
baseUrl = 'https://achewood.com/'
|
||||||
stripUrl = url + 'avarch.cgi?%s'
|
stripUrl = baseUrl + '%s/title.html'
|
||||||
firstStripUrl = stripUrl % '001'
|
url = stripUrl % '2016/12/25'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
|
firstStripUrl = stripUrl % '2001/10/01'
|
||||||
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") +
|
imageSearch = '//img[d:class("comicImage")]'
|
||||||
tagre('img', 'src', r'AVNavBack\.gif'))
|
prevSearch = '//a[d:class("comic_prev")]'
|
||||||
help = 'Index format: nnn'
|
namer = joinPathPartsNamer(pageparts=range(0, 2))
|
||||||
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
endOfLife = True
|
||||||
class Achewood(_ParserScraper):
|
|
||||||
url = 'https://www.achewood.com/'
|
|
||||||
stripUrl = url + 'index.php?date=%s'
|
|
||||||
firstStripUrl = stripUrl % '10012001'
|
|
||||||
imageSearch = '//p[@id="comic_body"]//img'
|
|
||||||
prevSearch = '//span[d:class("left")]/a[d:class("dateNav")]'
|
|
||||||
help = 'Index format: mmddyyyy'
|
|
||||||
namer = regexNamer(compile(r'date=(\d+)'))
|
|
||||||
|
|
||||||
|
|
||||||
class AdventuresOfFifne(_ParserScraper):
|
class AdventuresOfFifne(_ParserScraper):
|
||||||
|
@ -117,12 +109,8 @@ class AhoiPolloi(_ParserScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class AhoyEarth(WordPressNavi):
|
|
||||||
url = 'http://www.ahoyearth.com/'
|
|
||||||
|
|
||||||
|
|
||||||
class AirForceBlues(WordPressScraper):
|
class AirForceBlues(WordPressScraper):
|
||||||
url = 'http://farvatoons.com/'
|
url = 'https://web.archive.org/web/20210102113825/http://farvatoons.com/'
|
||||||
firstStripUrl = url + 'comic/in-texas-there-are-texans/'
|
firstStripUrl = url + 'comic/in-texas-there-are-texans/'
|
||||||
|
|
||||||
|
|
||||||
|
@ -235,14 +223,11 @@ class AltermetaOld(_ParserScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class AmazingSuperPowers(_BasicScraper):
|
class AmazingSuperPowers(WordPressNavi):
|
||||||
url = 'http://www.amazingsuperpowers.com/'
|
url = 'https://www.amazingsuperpowers.com/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2007/09/heredity'
|
firstStripUrl = stripUrl % '2007/09/heredity'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = '//div[d:class("comicpane")]/img'
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
|
||||||
help = 'Index format: yyyy/mm/name'
|
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
|
@ -271,19 +256,7 @@ class Amya(WordPressScraper):
|
||||||
url = 'http://www.amyachronicles.com/'
|
url = 'http://www.amyachronicles.com/'
|
||||||
|
|
||||||
|
|
||||||
class Anaria(_ParserScraper):
|
class Angband(ParserScraper):
|
||||||
url = 'https://www.leahbriere.com/anaria-the-witchs-dream/'
|
|
||||||
firstStripUrl = url
|
|
||||||
imageSearch = '//div[contains(@class, "gallery")]//a'
|
|
||||||
multipleImagesPerStrip = True
|
|
||||||
endOfLife = True
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
filename = imageUrl.rsplit('/', 1)[-1]
|
|
||||||
return filename.replace('00.jpg', 'new00.jpg').replace('new', '1')
|
|
||||||
|
|
||||||
|
|
||||||
class Angband(_ParserScraper):
|
|
||||||
url = 'http://angband.calamarain.net/'
|
url = 'http://angband.calamarain.net/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
imageSearch = '//img'
|
imageSearch = '//img'
|
||||||
|
@ -292,7 +265,7 @@ class Angband(_ParserScraper):
|
||||||
|
|
||||||
def starter(self):
|
def starter(self):
|
||||||
page = self.getPage(self.url)
|
page = self.getPage(self.url)
|
||||||
self.pages = page.xpath('//p/a[not(contains(@href, "cast"))]/@href')
|
self.pages = self.match(page, '//p/a[not(contains(@href, "cast"))]/@href')
|
||||||
self.firstStripUrl = self.pages[0]
|
self.firstStripUrl = self.pages[0]
|
||||||
return self.pages[-1]
|
return self.pages[-1]
|
||||||
|
|
||||||
|
@ -300,14 +273,6 @@ class Angband(_ParserScraper):
|
||||||
return self.pages[self.pages.index(url) - 1]
|
return self.pages[self.pages.index(url) - 1]
|
||||||
|
|
||||||
|
|
||||||
class Angels2200(_BasicScraper):
|
|
||||||
url = 'http://www.janahoffmann.com/angels/'
|
|
||||||
stripUrl = url + '%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'"))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous")
|
|
||||||
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
|
|
||||||
|
|
||||||
|
|
||||||
class Annyseed(_ParserScraper):
|
class Annyseed(_ParserScraper):
|
||||||
baseUrl = ('https://web.archive.org/web/20190511031451/'
|
baseUrl = ('https://web.archive.org/web/20190511031451/'
|
||||||
'http://www.mirrorwoodcomics.com/')
|
'http://www.mirrorwoodcomics.com/')
|
||||||
|
@ -330,7 +295,7 @@ class Annyseed(_ParserScraper):
|
||||||
return tourl
|
return tourl
|
||||||
|
|
||||||
|
|
||||||
class AntiheroForHire(_ParserScraper):
|
class AntiheroForHire(ParserScraper):
|
||||||
stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s'
|
stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s'
|
||||||
firstStripUrl = stripUrl % '2016/6/8/entrance-vigil'
|
firstStripUrl = stripUrl % '2016/6/8/entrance-vigil'
|
||||||
url = firstStripUrl
|
url = firstStripUrl
|
||||||
|
@ -341,7 +306,7 @@ class AntiheroForHire(_ParserScraper):
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Build list of chapters for navigation
|
# Build list of chapters for navigation
|
||||||
page = self.getPage(self.url)
|
page = self.getPage(self.url)
|
||||||
self.chapters = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]/@href')
|
self.chapters = self.match(page, '//ul[d:class("archive-group-list")]//a[d:class("archive-item-link")]/@href')
|
||||||
return self.chapters[0]
|
return self.chapters[0]
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
|
@ -377,7 +342,7 @@ class ArtificialIncident(WordPressWebcomic):
|
||||||
firstStripUrl = stripUrl % 'issue-one-life-changing'
|
firstStripUrl = stripUrl % 'issue-one-life-changing'
|
||||||
|
|
||||||
|
|
||||||
class AstronomyPOTD(_ParserScraper):
|
class AstronomyPOTD(ParserScraper):
|
||||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||||
url = baseUrl + 'astropix.html'
|
url = baseUrl + 'astropix.html'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
@ -391,7 +356,7 @@ class AstronomyPOTD(_ParserScraper):
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return data.xpath('//iframe') # videos
|
return self.match(data, '//iframe') # videos
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
||||||
|
|
|
@ -34,11 +34,11 @@ class CaptainSNES(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
||||||
|
|
||||||
|
|
||||||
class CarryOn(_ParserScraper):
|
class CarryOn(ParserScraper):
|
||||||
url = 'http://www.hirezfox.com/km/co/'
|
url = 'http://www.hirezfox.com/km/co/'
|
||||||
stripUrl = url + 'd/%s.html'
|
stripUrl = url + 'd/%s.html'
|
||||||
firstStripUrl = stripUrl % '20040701'
|
firstStripUrl = stripUrl % '20040701'
|
||||||
imageSearch = '//div[@class="strip"]/img'
|
imageSearch = '//div[d:class("strip")]/img'
|
||||||
prevSearch = '//a[text()="Previous Day"]'
|
prevSearch = '//a[text()="Previous Day"]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
@ -122,13 +122,13 @@ class CatAndGirl(_ParserScraper):
|
||||||
prevSearch = '//a[d:class("pager--prev")]'
|
prevSearch = '//a[d:class("pager--prev")]'
|
||||||
|
|
||||||
|
|
||||||
class CatenaManor(_ParserScraper):
|
class CatenaManor(ParserScraper):
|
||||||
baseUrl = ('https://web.archive.org/web/20141027141116/'
|
baseUrl = ('https://web.archive.org/web/20141027141116/'
|
||||||
'http://catenamanor.com/')
|
'http://catenamanor.com/')
|
||||||
url = baseUrl + 'archives'
|
url = baseUrl + 'archives'
|
||||||
stripUrl = baseUrl + '%s/'
|
stripUrl = baseUrl + '%s/'
|
||||||
firstStripUrl = stripUrl % '2003/07'
|
firstStripUrl = stripUrl % '2003/07'
|
||||||
imageSearch = '//img[@class="comicthumbnail"]'
|
imageSearch = '//img[d:class("comicthumbnail")]'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
strips: List[str] = []
|
strips: List[str] = []
|
||||||
|
@ -136,7 +136,7 @@ class CatenaManor(_ParserScraper):
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Retrieve archive links and select valid range
|
# Retrieve archive links and select valid range
|
||||||
archivePage = self.getPage(self.url)
|
archivePage = self.getPage(self.url)
|
||||||
archiveStrips = archivePage.xpath('//div[@id="archivepage"]//a')
|
archiveStrips = self.match(archivePage, '//div[@id="archivepage"]//a')
|
||||||
valid = False
|
valid = False
|
||||||
for link in archiveStrips:
|
for link in archiveStrips:
|
||||||
if self.stripUrl % '2012/01' in link.get('href'):
|
if self.stripUrl % '2012/01' in link.get('href'):
|
||||||
|
@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper):
|
||||||
class CSectionComics(WordPressScraper):
|
class CSectionComics(WordPressScraper):
|
||||||
url = 'https://www.csectioncomics.com/'
|
url = 'https://www.csectioncomics.com/'
|
||||||
firstStripUrl = url + 'comics/one-day-in-country'
|
firstStripUrl = url + 'comics/one-day-in-country'
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper):
|
||||||
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
||||||
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
namer = joinPathPartsNamer((), range(-4, 0))
|
namer = joinPathPartsNamer(imageparts=range(-4, 0))
|
||||||
|
|
||||||
|
|
||||||
class CynWolf(_ParserScraper):
|
class CynWolf(_ParserScraper):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
|
@ -79,7 +79,7 @@ class ComicFury(ParserScraper):
|
||||||
num = parts[-1]
|
num = parts[-1]
|
||||||
if self.multipleImagesPerStrip:
|
if self.multipleImagesPerStrip:
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
images = page.xpath('//img[@class="comicsegmentimage"]/@src')
|
images = self.match(page, '//img[d:class("comicsegmentimage")]/@src')
|
||||||
if len(images) > 1:
|
if len(images) > 1:
|
||||||
imageIndex = images.index(imageUrl) + 1
|
imageIndex = images.index(imageUrl) + 1
|
||||||
return "%s_%s-%d%s" % (self.prefix, num, imageIndex, ext)
|
return "%s_%s-%d%s" % (self.prefix, num, imageIndex, ext)
|
||||||
|
@ -88,8 +88,8 @@ class ComicFury(ParserScraper):
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
# Videos on Underverse
|
# Videos on Underverse
|
||||||
return (data.xpath('//div[@id="comicimagewrap"]//video') and
|
return (self.match(data, '//div[@id="comicimagewrap"]//video') and
|
||||||
not data.xpath('//div[@id="comicimagewrap"]//img'))
|
not self.match(data, '//div[@id="comicimagewrap"]//img'))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls): # noqa: CFQ001
|
def getmodules(cls): # noqa: CFQ001
|
||||||
|
|
|
@ -1,41 +1,35 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||||
try:
|
from ..helpers import indirectStarter
|
||||||
from importlib_resources import as_file, files
|
|
||||||
except ImportError:
|
|
||||||
from importlib.resources import as_file, files
|
|
||||||
|
|
||||||
from ..helpers import bounceStarter, joinPathPartsNamer
|
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
|
|
||||||
|
|
||||||
class ComicsKingdom(ParserScraper):
|
class ComicsKingdom(ParserScraper):
|
||||||
imageSearch = '//img[@id="theComicImage"]'
|
partDiv = '//div[d:class("comic-reader-item")]'
|
||||||
prevSearch = '//a[./img[contains(@alt, "Previous")]]'
|
imageSearch = '//meta[@property="og:image"]/@content'
|
||||||
nextSearch = '//a[./img[contains(@alt, "Next")]]'
|
prevSearch = partDiv + '[2]/@data-link'
|
||||||
starter = bounceStarter
|
starter = indirectStarter
|
||||||
namer = joinPathPartsNamer((-2, -1), ())
|
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
def __init__(self, name, path, lang=None):
|
def __init__(self, name, path, lang=None):
|
||||||
super().__init__('ComicsKingdom/' + name)
|
super().__init__('ComicsKingdom/' + name)
|
||||||
self.url = 'https://comicskingdom.com/' + path
|
self.url = 'https://comicskingdom.com/' + path
|
||||||
self.stripUrl = self.url + '/%s'
|
self.stripUrl = self.url + '/%s'
|
||||||
|
self.latestSearch = f'//a[re:test(@href, "/{path}/[0-9-]+$")]'
|
||||||
if lang:
|
if lang:
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
||||||
|
def link_modifier(self, fromurl, tourl):
|
||||||
|
return tourl.replace('//wp.', '//', 1)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls): # noqa: CFQ001
|
def getmodules(cls): # noqa: CFQ001
|
||||||
return (
|
return (
|
||||||
# Some comics are not listed on the "all" page (too old?)
|
|
||||||
cls('Retail', 'retail'),
|
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/comicskingdom.py
|
# scripts/comicskingdom.py
|
||||||
# START AUTOUPDATE
|
# START AUTOUPDATE
|
||||||
cls('AmazingSpiderman', 'amazing-spider-man'),
|
cls('Alice', 'alice'),
|
||||||
cls('AmazingSpidermanSpanish', 'hombre-arana', lang='es'),
|
|
||||||
cls('Apartment3G', 'apartment-3-g_1'),
|
cls('Apartment3G', 'apartment-3-g_1'),
|
||||||
cls('ArcticCircle', 'arctic-circle'),
|
cls('ArcticCircle', 'arctic-circle'),
|
||||||
cls('ATodaVelocidadSpanish', 'a-toda-velocidad', lang='es'),
|
cls('ATodaVelocidadSpanish', 'a-toda-velocidad', lang='es'),
|
||||||
|
@ -43,22 +37,25 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('BarneyGoogleAndSnuffySmithSpanish', 'tapon', lang='es'),
|
cls('BarneyGoogleAndSnuffySmithSpanish', 'tapon', lang='es'),
|
||||||
cls('BeetleBailey', 'beetle-bailey-1'),
|
cls('BeetleBailey', 'beetle-bailey-1'),
|
||||||
cls('BeetleBaileySpanish', 'beto-el-recluta', lang='es'),
|
cls('BeetleBaileySpanish', 'beto-el-recluta', lang='es'),
|
||||||
|
cls('BeetleMoses', 'beetle-moses'),
|
||||||
cls('BetweenFriends', 'between-friends'),
|
cls('BetweenFriends', 'between-friends'),
|
||||||
|
cls('BewareOfToddler', 'beware-of-toddler'),
|
||||||
cls('BigBenBolt', 'big-ben-bolt'),
|
cls('BigBenBolt', 'big-ben-bolt'),
|
||||||
cls('BigBenBoltSundays', 'big-ben-bolt-sundays'),
|
|
||||||
cls('Bizarro', 'bizarro'),
|
cls('Bizarro', 'bizarro'),
|
||||||
cls('Blondie', 'blondie'),
|
cls('Blondie', 'blondie'),
|
||||||
cls('BlondieSpanish', 'pepita', lang='es'),
|
cls('BlondieSpanish', 'pepita', lang='es'),
|
||||||
|
cls('BobMankoffPresentsShowMeTheFunny', 'show-me-the-funny'),
|
||||||
|
cls('BobMankoffPresentsShowMeTheFunnyAnimalEdition', 'show-me-the-funny-pets'),
|
||||||
cls('BonersArk', 'boners-ark'),
|
cls('BonersArk', 'boners-ark'),
|
||||||
cls('BonersArkSundays', 'boners-ark-sundays'),
|
cls('BreakOfDay', 'break-of-day'),
|
||||||
cls('BrianDuffy', 'brian-duffy'),
|
|
||||||
cls('BrickBradford', 'brick-bradford'),
|
cls('BrickBradford', 'brick-bradford'),
|
||||||
cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'),
|
cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'),
|
||||||
cls('BringingUpFather', 'bringing-up-father'),
|
cls('BringingUpFather', 'bringing-up-father'),
|
||||||
cls('BringingUpFatherSpanish', 'educando-a-papa', lang='es'),
|
cls('BringingUpFatherSpanish', 'educando-a-papa', lang='es'),
|
||||||
cls('BuzSawyer', 'buz-sawyer'),
|
cls('BuzSawyer', 'buz-sawyer'),
|
||||||
|
cls('Candorville', 'candorville'),
|
||||||
cls('CarpeDiem', 'carpe-diem'),
|
cls('CarpeDiem', 'carpe-diem'),
|
||||||
cls('Crankshaft', 'crankshaft'),
|
cls('Comiclicious', 'comiclicious'),
|
||||||
cls('Crock', 'crock'),
|
cls('Crock', 'crock'),
|
||||||
cls('CrockSpanish', 'crock-spanish', lang='es'),
|
cls('CrockSpanish', 'crock-spanish', lang='es'),
|
||||||
cls('Curtis', 'curtis'),
|
cls('Curtis', 'curtis'),
|
||||||
|
@ -67,6 +64,7 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('DavidMHitch', 'david-m-hitch'),
|
cls('DavidMHitch', 'david-m-hitch'),
|
||||||
cls('DennisTheMenace', 'dennis-the-menace'),
|
cls('DennisTheMenace', 'dennis-the-menace'),
|
||||||
cls('DennisTheMenaceSpanish', 'daniel-el-travieso', lang='es'),
|
cls('DennisTheMenaceSpanish', 'daniel-el-travieso', lang='es'),
|
||||||
|
cls('Dumplings', 'dumplings'),
|
||||||
cls('Dustin', 'dustin'),
|
cls('Dustin', 'dustin'),
|
||||||
cls('EdGamble', 'ed-gamble'),
|
cls('EdGamble', 'ed-gamble'),
|
||||||
# EdgeCity has a duplicate in GoComics/EdgeCity
|
# EdgeCity has a duplicate in GoComics/EdgeCity
|
||||||
|
@ -74,18 +72,15 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('FamilyCircusSpanish', 'circulo-familiar', lang='es'),
|
cls('FamilyCircusSpanish', 'circulo-familiar', lang='es'),
|
||||||
cls('FlashForward', 'flash-forward'),
|
cls('FlashForward', 'flash-forward'),
|
||||||
cls('FlashGordon', 'flash-gordon'),
|
cls('FlashGordon', 'flash-gordon'),
|
||||||
cls('FlashGordonSundays', 'flash-gordon-sundays'),
|
cls('FunnyOnlineAnimals', 'funny-online-animals'),
|
||||||
cls('FunkyWinkerbean', 'funky-winkerbean'),
|
cls('GearheadGertie', 'gearhead-gertie'),
|
||||||
cls('FunkyWinkerbeanSunday', 'funky-winkerbean-sundays'),
|
cls('GodsHands', 'gods-hands'),
|
||||||
cls('FunkyWinkerbeanVintage', 'funky-winkerbean-1'),
|
|
||||||
cls('FunnyOnlineAnimals', 'Funny-Online-Animals'),
|
|
||||||
cls('GearheadGertie', 'Gearhead-Gertie'),
|
|
||||||
cls('HagarTheHorrible', 'hagar-the-horrible'),
|
cls('HagarTheHorrible', 'hagar-the-horrible'),
|
||||||
cls('HagarTheHorribleSpanish', 'olafo', lang='es'),
|
cls('HagarTheHorribleSpanish', 'olafo', lang='es'),
|
||||||
cls('HeartOfJulietJones', 'heart-of-juliet-jones'),
|
cls('HeartOfJulietJones', 'heart-of-juliet-jones'),
|
||||||
cls('HeartOfJulietJonesSundays', 'heart-of-juliet-jones-sundays'),
|
|
||||||
cls('HiAndLois', 'hi-and-lois'),
|
cls('HiAndLois', 'hi-and-lois'),
|
||||||
cls('IntelligentLife', 'Intelligent'),
|
cls('InsanityStreak', 'insanity-streak'),
|
||||||
|
cls('IntelligentLife', 'intelligent'),
|
||||||
cls('JimmyMargulies', 'jimmy-margulies'),
|
cls('JimmyMargulies', 'jimmy-margulies'),
|
||||||
cls('JohnBranch', 'john-branch'),
|
cls('JohnBranch', 'john-branch'),
|
||||||
cls('JohnnyHazard', 'johnny-hazard'),
|
cls('JohnnyHazard', 'johnny-hazard'),
|
||||||
|
@ -93,7 +88,6 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('JungleJimSundays', 'jungle-jim-sundays'),
|
cls('JungleJimSundays', 'jungle-jim-sundays'),
|
||||||
cls('KatzenjammerKids', 'katzenjammer-kids'),
|
cls('KatzenjammerKids', 'katzenjammer-kids'),
|
||||||
cls('KatzenjammerKidsSpanish', 'maldades-de-dos-pilluelos', lang='es'),
|
cls('KatzenjammerKidsSpanish', 'maldades-de-dos-pilluelos', lang='es'),
|
||||||
cls('KatzenjammerKidsSundays', 'katzenjammer-kids-sundays'),
|
|
||||||
cls('KevinAndKell', 'kevin-and-kell'),
|
cls('KevinAndKell', 'kevin-and-kell'),
|
||||||
cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'),
|
cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'),
|
||||||
cls('KirkWalters', 'kirk-walters'),
|
cls('KirkWalters', 'kirk-walters'),
|
||||||
|
@ -101,44 +95,42 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('LaloYLolaSpanish', 'lalo-y-lola', lang='es'),
|
cls('LaloYLolaSpanish', 'lalo-y-lola', lang='es'),
|
||||||
cls('LeeJudge', 'lee-judge'),
|
cls('LeeJudge', 'lee-judge'),
|
||||||
cls('LegalizationNation', 'legalization-nation'),
|
cls('LegalizationNation', 'legalization-nation'),
|
||||||
cls('LegendOfBill', 'Legend-of-Bill'),
|
cls('LegendOfBill', 'legend-of-bill'),
|
||||||
cls('LittleIodineSundays', 'little-iodine-sundays'),
|
cls('LittleIodineSundays', 'little-iodine-sundays'),
|
||||||
cls('LittleKing', 'the-little-king'),
|
cls('LittleKing', 'the-little-king'),
|
||||||
cls('Lockhorns', 'lockhorns'),
|
cls('Macanudo', 'macanudo'),
|
||||||
cls('Macanudo', 'Macanudo'),
|
|
||||||
cls('MacanudoSpanish', 'macanudo-spanish', lang='es'),
|
cls('MacanudoSpanish', 'macanudo-spanish', lang='es'),
|
||||||
cls('MallardFillmore', 'mallard-fillmore'),
|
cls('MallardFillmore', 'mallard-fillmore'),
|
||||||
cls('MandrakeTheMagician', 'mandrake-the-magician-1'),
|
cls('MandrakeTheMagician', 'mandrake-the-magician'),
|
||||||
cls('MandrakeTheMagicianSpanish', 'mandrake-the-magician-spanish', lang='es'),
|
cls('MandrakeTheMagicianSpanish', 'mandrake-the-magician-spanish', lang='es'),
|
||||||
cls('MandrakeTheMagicianSundays', 'mandrake-the-magician-sundays'),
|
cls('MaraLlaveKeeperOfTime', 'mara-llave-keeper-of-time'),
|
||||||
cls('MarkTrail', 'mark-trail'),
|
cls('MarkTrail', 'mark-trail'),
|
||||||
cls('MarkTrailSpanish', 'mark-trail-spanish', lang='es'),
|
cls('MarkTrailSpanish', 'mark-trail-spanish', lang='es'),
|
||||||
cls('MarkTrailVintage', 'Mark-Trail-Vintage'),
|
|
||||||
cls('Marvin', 'marvin'),
|
cls('Marvin', 'marvin'),
|
||||||
cls('MarvinSpanish', 'marvin-spanish', lang='es'),
|
cls('MarvinSpanish', 'marvin-spanish', lang='es'),
|
||||||
cls('MaryWorth', 'mary-worth'),
|
cls('MaryWorth', 'mary-worth'),
|
||||||
cls('MaryWorthSpanish', 'maria-de-oro', lang='es'),
|
cls('MaryWorthSpanish', 'maria-de-oro', lang='es'),
|
||||||
cls('MikePeters', 'mike-peters'),
|
cls('Mazetoons', 'mazetoons'),
|
||||||
cls('MikeShelton', 'mike-shelton'),
|
cls('MikeShelton', 'mike-shelton'),
|
||||||
cls('MikeSmith', 'mike-smith'),
|
cls('MikeSmith', 'mike-smith'),
|
||||||
cls('MooseAndMolly', 'moose-and-molly'),
|
cls('MooseAndMolly', 'moose-and-molly'),
|
||||||
cls('MooseAndMollySpanish', 'quintin', lang='es'),
|
cls('MooseAndMollySpanish', 'quintin', lang='es'),
|
||||||
cls('MotherGooseAndGrimm', 'mother-goose-grimm'),
|
|
||||||
cls('MrAbernathySpanish', 'don-abundio', lang='es'),
|
cls('MrAbernathySpanish', 'don-abundio', lang='es'),
|
||||||
cls('Mutts', 'mutts'),
|
cls('Mutts', 'mutts'),
|
||||||
cls('MuttsSpanish', 'motas', lang='es'),
|
cls('MuttsSpanish', 'motas', lang='es'),
|
||||||
|
cls('NeverBeenDeader', 'never-been-deader'),
|
||||||
cls('OfficeHours', 'office-hours'),
|
cls('OfficeHours', 'office-hours'),
|
||||||
|
cls('OliveAndPopeye', 'olive-popeye'),
|
||||||
cls('OnTheFastrack', 'on-the-fastrack'),
|
cls('OnTheFastrack', 'on-the-fastrack'),
|
||||||
cls('PajamaDiaries', 'pajama-diaries'),
|
cls('PajamaDiaries', 'pajama-diaries'),
|
||||||
cls('PardonMyPlanet', 'pardon-my-planet'),
|
cls('PardonMyPlanet', 'pardon-my-planet'),
|
||||||
cls('Phantom', 'phantom'),
|
cls('Phantom', 'phantom'),
|
||||||
cls('PhantomSpanish', 'el-fantasma', lang='es'),
|
cls('PhantomSpanish', 'el-fantasma', lang='es'),
|
||||||
cls('PhantomSundays', 'phantom-sundays'),
|
cls('PlanetSyndicate', 'the_planet_syndicate'),
|
||||||
cls('Popeye', 'popeye'),
|
cls('Popeye', 'popeye'),
|
||||||
cls('PopeyesCartoonClub', 'popeyes-cartoon-club'),
|
cls('PopeyesCartoonClub', 'popeyes-cartoon-club'),
|
||||||
cls('PopeyeSpanish', 'popeye-spanish', lang='es'),
|
cls('PopeyeSpanish', 'popeye-spanish', lang='es'),
|
||||||
cls('PrinceValiant', 'prince-valiant'),
|
cls('PrinceValiant', 'prince-valiant'),
|
||||||
cls('PrinceValiantSundays', 'prince-valiant-sundays'),
|
|
||||||
cls('PrincipeValienteSpanish', 'principe-valiente', lang='es'),
|
cls('PrincipeValienteSpanish', 'principe-valiente', lang='es'),
|
||||||
cls('ProsAndCons', 'pros-cons'),
|
cls('ProsAndCons', 'pros-cons'),
|
||||||
cls('Quincy', 'quincy'),
|
cls('Quincy', 'quincy'),
|
||||||
|
@ -148,7 +140,9 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('RexMorganMDSpanish', 'rex-morgan-md-spanish', lang='es'),
|
cls('RexMorganMDSpanish', 'rex-morgan-md-spanish', lang='es'),
|
||||||
cls('RhymesWithOrange', 'rhymes-with-orange'),
|
cls('RhymesWithOrange', 'rhymes-with-orange'),
|
||||||
cls('RipKirby', 'rip-kirby'),
|
cls('RipKirby', 'rip-kirby'),
|
||||||
|
# Rosebuds has a duplicate in GoComics/Rosebuds
|
||||||
cls('SafeHavens', 'safe-havens'),
|
cls('SafeHavens', 'safe-havens'),
|
||||||
|
cls('SagaOfBrannBjornson', 'the-saga-of-brann-bjornson'),
|
||||||
cls('Sales', 'sales'),
|
cls('Sales', 'sales'),
|
||||||
cls('SallyForth', 'sally-forth'),
|
cls('SallyForth', 'sally-forth'),
|
||||||
cls('SamAndSilo', 'sam-and-silo'),
|
cls('SamAndSilo', 'sam-and-silo'),
|
||||||
|
@ -156,17 +150,18 @@ class ComicsKingdom(ParserScraper):
|
||||||
cls('SecretAgentX9', 'secret-agent-x-9'),
|
cls('SecretAgentX9', 'secret-agent-x-9'),
|
||||||
# Shoe has a duplicate in GoComics/Shoe
|
# Shoe has a duplicate in GoComics/Shoe
|
||||||
cls('SixChix', 'six-chix'),
|
cls('SixChix', 'six-chix'),
|
||||||
cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'),
|
cls('SlylockFox', 'slylock-fox-and-comics-for-kids'),
|
||||||
cls('SlylockFoxAndComicsForKidsSpanish', 'solo-para-ninos', lang='es'),
|
cls('SlylockFoxSpanish', 'solo-para-ninos', lang='es'),
|
||||||
|
cls('SuburbanFairyTales', 'suburban-fairy-tales'),
|
||||||
cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'),
|
cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'),
|
||||||
cls('TheyllDoItEveryTimeSpanish', 'nunca-falta-alguien-asi', lang='es'),
|
cls('TheyllDoItEveryTimeSpanish', 'nunca-falta-alguien-asi', lang='es'),
|
||||||
cls('ThimbleTheater', 'thimble-theater'),
|
cls('ThimbleTheater', 'thimble-theater'),
|
||||||
cls('Tiger', 'tiger'),
|
cls('Tiger', 'tiger'),
|
||||||
cls('TigerSpanish', 'tigrillo', lang='es'),
|
cls('TigerSpanish', 'tigrillo', lang='es'),
|
||||||
cls('TigerVintage', 'tiger-1'),
|
|
||||||
cls('TigerVintageSundays', 'tiger-sundays'),
|
|
||||||
cls('TinasGroove', 'tina-s-groove'),
|
cls('TinasGroove', 'tina-s-groove'),
|
||||||
cls('ToddTheDinosaur', 'todd-the-dinosaur'),
|
cls('ToddTheDinosaur', 'todd-the-dinosaur'),
|
||||||
|
cls('WillyBlack', 'willy-black'),
|
||||||
|
cls('WillyBlacksSpanish', 'willy-black-spanish', lang='es'),
|
||||||
cls('ZippyThePinhead', 'zippy-the-pinhead'),
|
cls('ZippyThePinhead', 'zippy-the-pinhead'),
|
||||||
cls('Zits', 'zits'),
|
cls('Zits', 'zits'),
|
||||||
cls('ZitsSpanish', 'jeremias', lang='es'),
|
cls('ZitsSpanish', 'jeremias', lang='es'),
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
|
@ -328,19 +328,14 @@ class DreamKeepersPrelude(_ParserScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
class DresdenCodak(_ParserScraper):
|
class DresdenCodak(ParserScraper):
|
||||||
url = 'http://dresdencodak.com/'
|
url = 'http://dresdencodak.com/'
|
||||||
startUrl = url + 'cat/comic/'
|
|
||||||
firstStripUrl = url + '2007/02/08/pom/'
|
firstStripUrl = url + '2007/02/08/pom/'
|
||||||
imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]'
|
imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]'
|
||||||
prevSearch = '//a[img[contains(@src, "prev")]]'
|
prevSearch = '//a[img[contains(@src, "prev")]]'
|
||||||
latestSearch = '//a[d:class("tc-grid-bg-link")]'
|
latestSearch = '//a[d:class("tc-grid-bg-link")]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
# Blog and comic are mixed...
|
|
||||||
def shouldSkipUrl(self, url, data):
|
|
||||||
return not data.xpath(self.imageSearch)
|
|
||||||
|
|
||||||
|
|
||||||
class DrFun(_ParserScraper):
|
class DrFun(_ParserScraper):
|
||||||
baseUrl = ('https://web.archive.org/web/20180726145737/'
|
baseUrl = ('https://web.archive.org/web/20180726145737/'
|
||||||
|
@ -355,14 +350,12 @@ class DrFun(_ParserScraper):
|
||||||
help = 'Index format: nnnnn'
|
help = 'Index format: nnnnn'
|
||||||
|
|
||||||
|
|
||||||
class Drive(_BasicScraper):
|
class Drive(ParserScraper):
|
||||||
url = 'http://www.drivecomic.com/'
|
url = 'http://www.drivecomic.com/'
|
||||||
rurl = escape(url)
|
firstStripUrl = url + 'comic/act-1-pg-001/'
|
||||||
stripUrl = url + 'archive/%s.html'
|
imageSearch = ('//div[@id="unspliced-comic"]//img/@data-src-img',
|
||||||
firstStripUrl = stripUrl % '090815'
|
'//div[@id="unspliced-comic"]//picture//img')
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.drivecomic\.com/strips/main/[^"]+)'))
|
prevSearch = '//a[d:class("previous-comic")]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl) + "Previous")
|
|
||||||
help = 'Index format: yymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class DrMcNinja(_ParserScraper):
|
class DrMcNinja(_ParserScraper):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class Derideal(ParserScraper):
|
||||||
|
|
||||||
def starter(self):
|
def starter(self):
|
||||||
indexPage = self.getPage(self.url)
|
indexPage = self.getPage(self.url)
|
||||||
self.chapters = indexPage.xpath('//a[contains(text(), "Read this episode")]/@href')
|
self.chapters = self.match(indexPage, '//a[contains(text(), "Read this episode")]/@href')
|
||||||
self.currentChapter = len(self.chapters)
|
self.currentChapter = len(self.chapters)
|
||||||
return indirectStarter(self)
|
return indirectStarter(self)
|
||||||
|
|
||||||
|
|
|
@ -113,7 +113,7 @@ class Erfworld(ParserScraper):
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return not data.xpath(self.imageSearch)
|
return not self.match(data, self.imageSearch)
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Fix inconsistent filenames
|
# Fix inconsistent filenames
|
||||||
|
@ -167,15 +167,6 @@ class Erstwhile(WordPressNavi):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class Everblue(ComicControlScraper):
|
|
||||||
url = 'http://www.everblue-comic.com/comic/'
|
|
||||||
stripUrl = url + '%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
|
|
||||||
|
|
||||||
|
|
||||||
class EverybodyLovesEricRaymond(_ParserScraper):
|
class EverybodyLovesEricRaymond(_ParserScraper):
|
||||||
url = 'http://geekz.co.uk/lovesraymond/'
|
url = 'http://geekz.co.uk/lovesraymond/'
|
||||||
firstStripUrl = url + 'archive/slashdotted'
|
firstStripUrl = url + 'archive/slashdotted'
|
||||||
|
@ -190,9 +181,10 @@ class EvilDiva(WordPressScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class EvilInc(_ParserScraper):
|
class EvilInc(ParserScraper):
|
||||||
url = 'https://www.evil-inc.com/'
|
url = 'https://www.evil-inc.com/'
|
||||||
imageSearch = '//div[@id="unspliced-comic"]/img/@data-src'
|
imageSearch = ('//div[@id="unspliced-comic"]/img',
|
||||||
|
'//div[@id="unspliced-comic"]/picture//img')
|
||||||
prevSearch = '//a[./i[d:class("fa-chevron-left")]]'
|
prevSearch = '//a[./i[d:class("fa-chevron-left")]]'
|
||||||
firstStripUrl = url + 'comic/monday-3/'
|
firstStripUrl = url + 'comic/monday-3/'
|
||||||
|
|
||||||
|
@ -263,7 +255,7 @@ class ExtraFabulousComics(WordPressScraper):
|
||||||
return '_'.join((pagepart, imagename))
|
return '_'.join((pagepart, imagename))
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
return data.xpath('//div[@id="comic"]//iframe')
|
return self.match(data, '//div[@id="comic"]//iframe')
|
||||||
|
|
||||||
|
|
||||||
class ExtraLife(_BasicScraper):
|
class ExtraLife(_BasicScraper):
|
||||||
|
|
|
@ -140,7 +140,7 @@ class FoxDad(ParserScraper):
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
post = page.xpath('//li[@class="timestamp"]/a/@href')[0]
|
post = self.match(page, '//li[d:class("timestamp")]/a/@href')[0]
|
||||||
post = post.replace('https://foxdad.com/post/', '')
|
post = post.replace('https://foxdad.com/post/', '')
|
||||||
if '-consider-support' in post:
|
if '-consider-support' in post:
|
||||||
post = post.split('-consider-support')[0]
|
post = post.split('-consider-support')[0]
|
||||||
|
@ -171,7 +171,7 @@ class Fragile(_ParserScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class FredoAndPidjin(_ParserScraper):
|
class FredoAndPidjin(ParserScraper):
|
||||||
url = 'https://www.pidjin.net/'
|
url = 'https://www.pidjin.net/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
||||||
|
@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper):
|
||||||
prevSearch = '//span[d:class("prev")]/a'
|
prevSearch = '//span[d:class("prev")]/a'
|
||||||
latestSearch = '//section[d:class("latest")]//a'
|
latestSearch = '//section[d:class("latest")]//a'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
namer = joinPathPartsNamer((0, 1, 2))
|
namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))
|
||||||
|
|
||||||
|
|
||||||
class Freefall(_ParserScraper):
|
class Freefall(_ParserScraper):
|
||||||
|
@ -216,7 +216,7 @@ class FriendsYouAreStuckWith(WordPressScraper):
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
strip = page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')
|
strip = self.match(page, '//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')
|
||||||
return strip + '_' + imageUrl.rstrip('/').rsplit('/', 1)[-1]
|
return strip + '_' + imageUrl.rstrip('/').rsplit('/', 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,11 @@
|
||||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from ..util import tagre
|
from ..util import tagre, getQueryParams
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressNavi
|
from .common import ComicControlScraper, WordPressScraper, WordPressNavi
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,13 +27,9 @@ class Garanos(WordPressScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class GastroPhobia(_ParserScraper):
|
class GastroPhobia(ComicControlScraper):
|
||||||
url = 'http://www.gastrophobia.com/'
|
url = 'https://gastrophobia.com/'
|
||||||
stripUrl = url + 'index.php?date=%s'
|
firstStripUrl = url + 'comix/the-mane-event'
|
||||||
firstStripUrl = stripUrl % '2008-07-30'
|
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
|
||||||
prevSearch = '//div[@id="prev"]/a'
|
|
||||||
help = 'Index format: yyyy-mm-dd'
|
|
||||||
|
|
||||||
|
|
||||||
class Geeks(_ParserScraper):
|
class Geeks(_ParserScraper):
|
||||||
|
@ -51,7 +47,7 @@ class GeeksNextDoor(_ParserScraper):
|
||||||
url = 'http://www.geeksnextcomic.com/'
|
url = 'http://www.geeksnextcomic.com/'
|
||||||
stripUrl = url + '%s.html'
|
stripUrl = url + '%s.html'
|
||||||
firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04'
|
firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04'
|
||||||
imageSearch = '//p/img'
|
imageSearch = ('//p/img', '//p/span/img')
|
||||||
prevSearch = (
|
prevSearch = (
|
||||||
'//a[img[contains(@src, "/nav_prev")]]',
|
'//a[img[contains(@src, "/nav_prev")]]',
|
||||||
'//a[contains(text(), "< prev")]', # start page is different
|
'//a[contains(text(), "< prev")]', # start page is different
|
||||||
|
@ -59,16 +55,12 @@ class GeeksNextDoor(_ParserScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class GirlGenius(_BasicScraper):
|
class GirlGenius(ParserScraper):
|
||||||
baseUrl = 'http://www.girlgeniusonline.com/'
|
url = 'https://www.girlgeniusonline.com/comic.php'
|
||||||
rurl = escape(baseUrl)
|
|
||||||
url = baseUrl + 'comic.php'
|
|
||||||
stripUrl = url + '?date=%s'
|
stripUrl = url + '?date=%s'
|
||||||
firstStripUrl = stripUrl % '20021104'
|
firstStripUrl = stripUrl % '20021104'
|
||||||
imageSearch = compile(
|
imageSearch = '//img[@alt="Comic"]'
|
||||||
tagre("img", "src", r"(%sggmain/strips/[^']*)" % rurl, quote="'"))
|
prevSearch = '//a[@id="topprev"]'
|
||||||
prevSearch = compile(tagre("a", "id", "topprev", quote="\"",
|
|
||||||
before=r"(%s[^\"']+)" % rurl))
|
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
@ -99,20 +91,18 @@ class GoGetARoomie(ComicControlScraper):
|
||||||
url = 'http://www.gogetaroomie.com'
|
url = 'http://www.gogetaroomie.com'
|
||||||
|
|
||||||
|
|
||||||
class GoneWithTheBlastwave(_BasicScraper):
|
class GoneWithTheBlastwave(ParserScraper):
|
||||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
stripUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s'
|
||||||
starter = indirectStarter
|
|
||||||
stripUrl = url[:-1] + '%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
url = firstStripUrl
|
||||||
prevSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
starter = indirectStarter
|
||||||
r'<img src="images/page/default/previous')
|
imageSearch = '//*[@id="comic_ruutu"]/center/img'
|
||||||
latestSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
prevSearch = '//a[img[contains(@src, "previous")]]'
|
||||||
r'<img src="images/page/default/latest')
|
latestSearch = '//a[img[contains(@src, "latest")]]'
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1))
|
return '%02d' % int(getQueryParams(page_url)['nro'][0])
|
||||||
|
|
||||||
|
|
||||||
class GrrlPower(WordPressScraper):
|
class GrrlPower(WordPressScraper):
|
||||||
|
@ -130,13 +120,12 @@ class GuildedAge(WordPressScraper):
|
||||||
firstStripUrl = url + 'comic/chapter-1-cover/'
|
firstStripUrl = url + 'comic/chapter-1-cover/'
|
||||||
|
|
||||||
|
|
||||||
class GUComics(_BasicScraper):
|
class GUComics(ParserScraper):
|
||||||
url = 'http://www.gucomics.com/'
|
stripUrl = 'https://www.gucomics.com/%s'
|
||||||
stripUrl = url + '%s'
|
url = stripUrl % 'comic/'
|
||||||
firstStripUrl = stripUrl % '20000710'
|
firstStripUrl = stripUrl % '20000710'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
|
imageSearch = '//img[contains(@src, "/comics/2")]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
|
prevSearch = '//a[img[contains(@alt, "previous")]]'
|
||||||
tagre("img", "src", r'/images/nav/prev\.png'))
|
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ class GoComics(ParserScraper):
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return data.xpath('//img[contains(@src, "content-error-missing")]')
|
return self.match(data, '//img[contains(@src, "content-error-missing")]')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getmodules(cls): # noqa: CFQ001
|
def getmodules(cls): # noqa: CFQ001
|
||||||
|
@ -44,7 +44,6 @@ class GoComics(ParserScraper):
|
||||||
# START AUTOUPDATE
|
# START AUTOUPDATE
|
||||||
cls('1AndDone', '1-and-done'),
|
cls('1AndDone', '1-and-done'),
|
||||||
cls('9ChickweedLane', '9chickweedlane'),
|
cls('9ChickweedLane', '9chickweedlane'),
|
||||||
cls('9ChickweedLaneClassics', '9-chickweed-lane-classics'),
|
|
||||||
cls('9To5', '9to5'),
|
cls('9To5', '9to5'),
|
||||||
cls('Aaggghhh', 'Aaggghhh', 'es'),
|
cls('Aaggghhh', 'Aaggghhh', 'es'),
|
||||||
cls('AdamAtHome', 'adamathome'),
|
cls('AdamAtHome', 'adamathome'),
|
||||||
|
@ -62,6 +61,7 @@ class GoComics(ParserScraper):
|
||||||
cls('Annie', 'annie'),
|
cls('Annie', 'annie'),
|
||||||
cls('AProblemLikeJamal', 'a-problem-like-jamal'),
|
cls('AProblemLikeJamal', 'a-problem-like-jamal'),
|
||||||
cls('ArloAndJanis', 'arloandjanis'),
|
cls('ArloAndJanis', 'arloandjanis'),
|
||||||
|
cls('ArtByMoga', 'artbymoga'),
|
||||||
cls('AskShagg', 'askshagg'),
|
cls('AskShagg', 'askshagg'),
|
||||||
cls('AtTavicat', 'tavicat'),
|
cls('AtTavicat', 'tavicat'),
|
||||||
cls('AuntyAcid', 'aunty-acid'),
|
cls('AuntyAcid', 'aunty-acid'),
|
||||||
|
@ -69,7 +69,6 @@ class GoComics(ParserScraper):
|
||||||
cls('BackInTheDay', 'backintheday'),
|
cls('BackInTheDay', 'backintheday'),
|
||||||
cls('BackToBC', 'back-to-bc'),
|
cls('BackToBC', 'back-to-bc'),
|
||||||
cls('Bacon', 'bacon'),
|
cls('Bacon', 'bacon'),
|
||||||
cls('Badlands', 'badlands'),
|
|
||||||
cls('BadMachinery', 'bad-machinery'),
|
cls('BadMachinery', 'bad-machinery'),
|
||||||
cls('Baldo', 'baldo'),
|
cls('Baldo', 'baldo'),
|
||||||
cls('BaldoEnEspanol', 'baldoespanol', 'es'),
|
cls('BaldoEnEspanol', 'baldoespanol', 'es'),
|
||||||
|
@ -90,8 +89,8 @@ class GoComics(ParserScraper):
|
||||||
cls('Betty', 'betty'),
|
cls('Betty', 'betty'),
|
||||||
cls('BFGFSyndrome', 'bfgf-syndrome'),
|
cls('BFGFSyndrome', 'bfgf-syndrome'),
|
||||||
cls('BigNate', 'bignate'),
|
cls('BigNate', 'bignate'),
|
||||||
cls('BigNateFirstClass', 'big-nate-first-class'),
|
|
||||||
cls('BigTop', 'bigtop'),
|
cls('BigTop', 'bigtop'),
|
||||||
|
cls('BillBramhall', 'bill-bramhall'),
|
||||||
cls('BirdAndMoon', 'bird-and-moon'),
|
cls('BirdAndMoon', 'bird-and-moon'),
|
||||||
cls('Birdbrains', 'birdbrains'),
|
cls('Birdbrains', 'birdbrains'),
|
||||||
cls('BleekerTheRechargeableDog', 'bleeker'),
|
cls('BleekerTheRechargeableDog', 'bleeker'),
|
||||||
|
@ -99,14 +98,14 @@ class GoComics(ParserScraper):
|
||||||
cls('BloomCounty', 'bloomcounty'),
|
cls('BloomCounty', 'bloomcounty'),
|
||||||
cls('BloomCounty2019', 'bloom-county'),
|
cls('BloomCounty2019', 'bloom-county'),
|
||||||
cls('BobGorrell', 'bobgorrell'),
|
cls('BobGorrell', 'bobgorrell'),
|
||||||
|
cls('BobTheAngryFlower', 'bob-the-angry-flower'),
|
||||||
cls('BobTheSquirrel', 'bobthesquirrel'),
|
cls('BobTheSquirrel', 'bobthesquirrel'),
|
||||||
cls('BoNanas', 'bonanas'),
|
cls('BoNanas', 'bonanas'),
|
||||||
cls('Boomerangs', 'boomerangs'),
|
cls('Boomerangs', 'boomerangs'),
|
||||||
cls('Bottomliners', 'bottomliners'),
|
cls('BottomLiners', 'bottomliners'),
|
||||||
cls('BoundAndGagged', 'boundandgagged'),
|
cls('BoundAndGagged', 'boundandgagged'),
|
||||||
cls('Bozo', 'bozo'),
|
cls('Bozo', 'bozo'),
|
||||||
cls('BreakingCatNews', 'breaking-cat-news'),
|
cls('BreakingCatNews', 'breaking-cat-news'),
|
||||||
cls('BreakOfDay', 'break-of-day'),
|
|
||||||
cls('Brevity', 'brevity'),
|
cls('Brevity', 'brevity'),
|
||||||
cls('BrewsterRockit', 'brewsterrockit'),
|
cls('BrewsterRockit', 'brewsterrockit'),
|
||||||
cls('BrianMcFadden', 'brian-mcfadden'),
|
cls('BrianMcFadden', 'brian-mcfadden'),
|
||||||
|
@ -116,7 +115,6 @@ class GoComics(ParserScraper):
|
||||||
cls('Buni', 'buni'),
|
cls('Buni', 'buni'),
|
||||||
cls('CalvinAndHobbes', 'calvinandhobbes'),
|
cls('CalvinAndHobbes', 'calvinandhobbes'),
|
||||||
cls('CalvinAndHobbesEnEspanol', 'calvinandhobbesespanol', 'es'),
|
cls('CalvinAndHobbesEnEspanol', 'calvinandhobbesespanol', 'es'),
|
||||||
cls('Candorville', 'candorville'),
|
|
||||||
cls('CatanaComics', 'little-moments-of-love'),
|
cls('CatanaComics', 'little-moments-of-love'),
|
||||||
cls('CathyClassics', 'cathy'),
|
cls('CathyClassics', 'cathy'),
|
||||||
cls('CathyCommiserations', 'cathy-commiserations'),
|
cls('CathyCommiserations', 'cathy-commiserations'),
|
||||||
|
@ -139,17 +137,18 @@ class GoComics(ParserScraper):
|
||||||
cls('CowAndBoyClassics', 'cowandboy'),
|
cls('CowAndBoyClassics', 'cowandboy'),
|
||||||
cls('CowTown', 'cowtown'),
|
cls('CowTown', 'cowtown'),
|
||||||
cls('Crabgrass', 'crabgrass'),
|
cls('Crabgrass', 'crabgrass'),
|
||||||
|
# Crankshaft has a duplicate in ComicsKingdom/Crankshaft
|
||||||
cls('Crumb', 'crumb'),
|
cls('Crumb', 'crumb'),
|
||||||
cls('CulDeSac', 'culdesac'),
|
cls('CulDeSac', 'culdesac'),
|
||||||
|
cls('Curses', 'curses'),
|
||||||
cls('DaddysHome', 'daddyshome'),
|
cls('DaddysHome', 'daddyshome'),
|
||||||
cls('DanaSummers', 'danasummers'),
|
cls('DanaSummers', 'danasummers'),
|
||||||
cls('DarkSideOfTheHorse', 'darksideofthehorse'),
|
cls('DarkSideOfTheHorse', 'darksideofthehorse'),
|
||||||
|
cls('DayByDave', 'day-by-dave'),
|
||||||
cls('DeepDarkFears', 'deep-dark-fears'),
|
cls('DeepDarkFears', 'deep-dark-fears'),
|
||||||
cls('DeFlocked', 'deflocked'),
|
cls('DeFlocked', 'deflocked'),
|
||||||
cls('DiamondLil', 'diamondlil'),
|
cls('DiamondLil', 'diamondlil'),
|
||||||
cls('DickTracy', 'dicktracy'),
|
cls('DickTracy', 'dicktracy'),
|
||||||
cls('DilbertClassics', 'dilbert-classics'),
|
|
||||||
cls('DilbertEnEspanol', 'dilbert-en-espanol', 'es'),
|
|
||||||
cls('DinosaurComics', 'dinosaur-comics'),
|
cls('DinosaurComics', 'dinosaur-comics'),
|
||||||
cls('DogEatDoug', 'dogeatdoug'),
|
cls('DogEatDoug', 'dogeatdoug'),
|
||||||
cls('DogsOfCKennel', 'dogsofckennel'),
|
cls('DogsOfCKennel', 'dogsofckennel'),
|
||||||
|
@ -160,15 +159,14 @@ class GoComics(ParserScraper):
|
||||||
cls('Doonesbury', 'doonesbury'),
|
cls('Doonesbury', 'doonesbury'),
|
||||||
cls('Drabble', 'drabble'),
|
cls('Drabble', 'drabble'),
|
||||||
cls('DrewSheneman', 'drewsheneman'),
|
cls('DrewSheneman', 'drewsheneman'),
|
||||||
cls('DumbwichCastle', 'dumbwich-castle'),
|
|
||||||
cls('EdgeCity', 'edge-city'),
|
cls('EdgeCity', 'edge-city'),
|
||||||
cls('Eek', 'eek'),
|
cls('Eek', 'eek'),
|
||||||
cls('ElCafDePoncho', 'el-cafe-de-poncho', 'es'),
|
cls('ElCafDePoncho', 'el-cafe-de-poncho', 'es'),
|
||||||
cls('EmmyLou', 'emmy-lou'),
|
cls('EmmyLou', 'emmy-lou'),
|
||||||
cls('Endtown', 'endtown'),
|
cls('Endtown', 'endtown'),
|
||||||
|
cls('EricAllie', 'eric-allie'),
|
||||||
cls('EverydayPeopleCartoons', 'everyday-people-cartoons'),
|
cls('EverydayPeopleCartoons', 'everyday-people-cartoons'),
|
||||||
cls('Eyebeam', 'eyebeam'),
|
cls('Eyebeam', 'eyebeam'),
|
||||||
cls('EyebeamClassic', 'eyebeam-classic'),
|
|
||||||
cls('FalseKnees', 'false-knees'),
|
cls('FalseKnees', 'false-knees'),
|
||||||
cls('FamilyTree', 'familytree'),
|
cls('FamilyTree', 'familytree'),
|
||||||
cls('Farcus', 'farcus'),
|
cls('Farcus', 'farcus'),
|
||||||
|
@ -191,8 +189,8 @@ class GoComics(ParserScraper):
|
||||||
cls('FreeRange', 'freerange'),
|
cls('FreeRange', 'freerange'),
|
||||||
cls('FreshlySqueezed', 'freshlysqueezed'),
|
cls('FreshlySqueezed', 'freshlysqueezed'),
|
||||||
cls('FrogApplause', 'frogapplause'),
|
cls('FrogApplause', 'frogapplause'),
|
||||||
|
cls('FurBabies', 'furbabies'),
|
||||||
cls('Garfield', 'garfield'),
|
cls('Garfield', 'garfield'),
|
||||||
cls('GarfieldClassics', 'garfield-classics'),
|
|
||||||
cls('GarfieldEnEspanol', 'garfieldespanol', 'es'),
|
cls('GarfieldEnEspanol', 'garfieldespanol', 'es'),
|
||||||
cls('GaryMarkstein', 'garymarkstein'),
|
cls('GaryMarkstein', 'garymarkstein'),
|
||||||
cls('GaryVarvel', 'garyvarvel'),
|
cls('GaryVarvel', 'garyvarvel'),
|
||||||
|
@ -222,6 +220,7 @@ class GoComics(ParserScraper):
|
||||||
cls('HerbAndJamaal', 'herbandjamaal'),
|
cls('HerbAndJamaal', 'herbandjamaal'),
|
||||||
cls('Herman', 'herman'),
|
cls('Herman', 'herman'),
|
||||||
cls('HomeAndAway', 'homeandaway'),
|
cls('HomeAndAway', 'homeandaway'),
|
||||||
|
cls('HomeFree', 'homefree'),
|
||||||
cls('HotComicsForCoolPeople', 'hot-comics-for-cool-people'),
|
cls('HotComicsForCoolPeople', 'hot-comics-for-cool-people'),
|
||||||
cls('HutchOwen', 'hutch-owen'),
|
cls('HutchOwen', 'hutch-owen'),
|
||||||
cls('ImagineThis', 'imaginethis'),
|
cls('ImagineThis', 'imaginethis'),
|
||||||
|
@ -238,10 +237,12 @@ class GoComics(ParserScraper):
|
||||||
cls('JeffDanziger', 'jeffdanziger'),
|
cls('JeffDanziger', 'jeffdanziger'),
|
||||||
cls('JeffStahler', 'jeffstahler'),
|
cls('JeffStahler', 'jeffstahler'),
|
||||||
cls('JenSorensen', 'jen-sorensen'),
|
cls('JenSorensen', 'jen-sorensen'),
|
||||||
|
cls('JerryKingComics', 'jerry-king-comics'),
|
||||||
cls('JimBentonCartoons', 'jim-benton-cartoons'),
|
cls('JimBentonCartoons', 'jim-benton-cartoons'),
|
||||||
cls('JimMorin', 'jimmorin'),
|
cls('JimMorin', 'jimmorin'),
|
||||||
cls('JoeHeller', 'joe-heller'),
|
cls('JoeHeller', 'joe-heller'),
|
||||||
cls('JoelPett', 'joelpett'),
|
cls('JoelPett', 'joelpett'),
|
||||||
|
cls('JoeyWeatherford', 'joey-weatherford'),
|
||||||
cls('JohnDeering', 'johndeering'),
|
cls('JohnDeering', 'johndeering'),
|
||||||
cls('JumpStart', 'jumpstart'),
|
cls('JumpStart', 'jumpstart'),
|
||||||
cls('JunkDrawer', 'junk-drawer'),
|
cls('JunkDrawer', 'junk-drawer'),
|
||||||
|
@ -287,7 +288,6 @@ class GoComics(ParserScraper):
|
||||||
cls('Lunarbaboon', 'lunarbaboon'),
|
cls('Lunarbaboon', 'lunarbaboon'),
|
||||||
cls('M2Bulls', 'm2bulls'),
|
cls('M2Bulls', 'm2bulls'),
|
||||||
cls('Maintaining', 'maintaining'),
|
cls('Maintaining', 'maintaining'),
|
||||||
cls('MakingIt', 'making-it'),
|
|
||||||
cls('MannequinOnTheMoon', 'mannequin-on-the-moon'),
|
cls('MannequinOnTheMoon', 'mannequin-on-the-moon'),
|
||||||
cls('MariasDay', 'marias-day'),
|
cls('MariasDay', 'marias-day'),
|
||||||
cls('Marmaduke', 'marmaduke'),
|
cls('Marmaduke', 'marmaduke'),
|
||||||
|
@ -299,6 +299,7 @@ class GoComics(ParserScraper):
|
||||||
cls('MessycowComics', 'messy-cow'),
|
cls('MessycowComics', 'messy-cow'),
|
||||||
cls('MexikidStories', 'mexikid-stories'),
|
cls('MexikidStories', 'mexikid-stories'),
|
||||||
cls('MichaelRamirez', 'michaelramirez'),
|
cls('MichaelRamirez', 'michaelramirez'),
|
||||||
|
cls('MikeBeckom', 'mike-beckom'),
|
||||||
cls('MikeDuJour', 'mike-du-jour'),
|
cls('MikeDuJour', 'mike-du-jour'),
|
||||||
cls('MikeLester', 'mike-lester'),
|
cls('MikeLester', 'mike-lester'),
|
||||||
cls('MikeLuckovich', 'mikeluckovich'),
|
cls('MikeLuckovich', 'mikeluckovich'),
|
||||||
|
@ -307,9 +308,9 @@ class GoComics(ParserScraper):
|
||||||
cls('Momma', 'momma'),
|
cls('Momma', 'momma'),
|
||||||
cls('Monty', 'monty'),
|
cls('Monty', 'monty'),
|
||||||
cls('MontyDiaros', 'monty-diaros', 'es'),
|
cls('MontyDiaros', 'monty-diaros', 'es'),
|
||||||
|
# MotherGooseAndGrimm has a duplicate in ComicsKingdom/MotherGooseAndGrimm
|
||||||
cls('MotleyClassics', 'motley-classics'),
|
cls('MotleyClassics', 'motley-classics'),
|
||||||
cls('MrLowe', 'mr-lowe'),
|
cls('MrLowe', 'mr-lowe'),
|
||||||
cls('MtPleasant', 'mtpleasant'),
|
|
||||||
cls('MuttAndJeff', 'muttandjeff'),
|
cls('MuttAndJeff', 'muttandjeff'),
|
||||||
cls('MyDadIsDracula', 'my-dad-is-dracula'),
|
cls('MyDadIsDracula', 'my-dad-is-dracula'),
|
||||||
cls('MythTickle', 'mythtickle'),
|
cls('MythTickle', 'mythtickle'),
|
||||||
|
@ -341,10 +342,10 @@ class GoComics(ParserScraper):
|
||||||
cls('OverTheHedge', 'overthehedge'),
|
cls('OverTheHedge', 'overthehedge'),
|
||||||
cls('OzyAndMillie', 'ozy-and-millie'),
|
cls('OzyAndMillie', 'ozy-and-millie'),
|
||||||
cls('PatOliphant', 'patoliphant'),
|
cls('PatOliphant', 'patoliphant'),
|
||||||
cls('PCAndPixel', 'pcandpixel'),
|
|
||||||
cls('Peanuts', 'peanuts'),
|
cls('Peanuts', 'peanuts'),
|
||||||
cls('PeanutsBegins', 'peanuts-begins'),
|
cls('PeanutsBegins', 'peanuts-begins'),
|
||||||
cls('PearlsBeforeSwine', 'pearlsbeforeswine'),
|
cls('PearlsBeforeSwine', 'pearlsbeforeswine'),
|
||||||
|
cls('PedroXMolina', 'pedroxmolina'),
|
||||||
cls('Periquita', 'periquita', 'es'),
|
cls('Periquita', 'periquita', 'es'),
|
||||||
cls('PerlasParaLosCerdos', 'perlas-para-los-cerdos', 'es'),
|
cls('PerlasParaLosCerdos', 'perlas-para-los-cerdos', 'es'),
|
||||||
cls('PerryBibleFellowship', 'perry-bible-fellowship'),
|
cls('PerryBibleFellowship', 'perry-bible-fellowship'),
|
||||||
|
@ -383,7 +384,6 @@ class GoComics(ParserScraper):
|
||||||
cls('RoseIsRose', 'roseisrose'),
|
cls('RoseIsRose', 'roseisrose'),
|
||||||
cls('Rubes', 'rubes'),
|
cls('Rubes', 'rubes'),
|
||||||
cls('RudyPark', 'rudypark'),
|
cls('RudyPark', 'rudypark'),
|
||||||
cls('SaltNPepper', 'salt-n-pepper'),
|
|
||||||
cls('SarahsScribbles', 'sarahs-scribbles'),
|
cls('SarahsScribbles', 'sarahs-scribbles'),
|
||||||
cls('SaturdayMorningBreakfastCereal', 'saturday-morning-breakfast-cereal'),
|
cls('SaturdayMorningBreakfastCereal', 'saturday-morning-breakfast-cereal'),
|
||||||
cls('SavageChickens', 'savage-chickens'),
|
cls('SavageChickens', 'savage-chickens'),
|
||||||
|
@ -394,13 +394,11 @@ class GoComics(ParserScraper):
|
||||||
cls('ShermansLagoon', 'shermanslagoon'),
|
cls('ShermansLagoon', 'shermanslagoon'),
|
||||||
cls('ShirleyAndSonClassics', 'shirley-and-son-classics'),
|
cls('ShirleyAndSonClassics', 'shirley-and-son-classics'),
|
||||||
cls('Shoe', 'shoe'),
|
cls('Shoe', 'shoe'),
|
||||||
cls('SigneWilkinson', 'signewilkinson'),
|
|
||||||
cls('SketchsharkComics', 'sketchshark-comics'),
|
cls('SketchsharkComics', 'sketchshark-comics'),
|
||||||
cls('SkinHorse', 'skinhorse'),
|
cls('SkinHorse', 'skinhorse'),
|
||||||
cls('Skippy', 'skippy'),
|
cls('Skippy', 'skippy'),
|
||||||
cls('SmallPotatoes', 'small-potatoes'),
|
cls('SmallPotatoes', 'small-potatoes'),
|
||||||
cls('SnoopyEnEspanol', 'peanuts-espanol', 'es'),
|
cls('SnoopyEnEspanol', 'peanuts-espanol', 'es'),
|
||||||
cls('Snowflakes', 'snowflakes'),
|
|
||||||
cls('SnowSez', 'snow-sez'),
|
cls('SnowSez', 'snow-sez'),
|
||||||
cls('SpeedBump', 'speedbump'),
|
cls('SpeedBump', 'speedbump'),
|
||||||
cls('SpiritOfTheStaircase', 'spirit-of-the-staircase'),
|
cls('SpiritOfTheStaircase', 'spirit-of-the-staircase'),
|
||||||
|
@ -410,9 +408,7 @@ class GoComics(ParserScraper):
|
||||||
cls('SteveKelley', 'stevekelley'),
|
cls('SteveKelley', 'stevekelley'),
|
||||||
cls('StickyComics', 'sticky-comics'),
|
cls('StickyComics', 'sticky-comics'),
|
||||||
cls('StoneSoup', 'stonesoup'),
|
cls('StoneSoup', 'stonesoup'),
|
||||||
cls('StoneSoupClassics', 'stone-soup-classics'),
|
|
||||||
cls('StrangeBrew', 'strangebrew'),
|
cls('StrangeBrew', 'strangebrew'),
|
||||||
cls('StuartCarlson', 'stuartcarlson'),
|
|
||||||
cls('StudioJantze', 'studio-jantze'),
|
cls('StudioJantze', 'studio-jantze'),
|
||||||
cls('SunnyStreet', 'sunny-street'),
|
cls('SunnyStreet', 'sunny-street'),
|
||||||
cls('SunshineState', 'sunshine-state'),
|
cls('SunshineState', 'sunshine-state'),
|
||||||
|
@ -425,6 +421,7 @@ class GoComics(ParserScraper):
|
||||||
cls('TarzanEnEspanol', 'tarzan-en-espanol', 'es'),
|
cls('TarzanEnEspanol', 'tarzan-en-espanol', 'es'),
|
||||||
cls('TedRall', 'ted-rall'),
|
cls('TedRall', 'ted-rall'),
|
||||||
cls('TenCats', 'ten-cats'),
|
cls('TenCats', 'ten-cats'),
|
||||||
|
cls('Tex', 'tex'),
|
||||||
cls('TextsFromMittens', 'texts-from-mittens'),
|
cls('TextsFromMittens', 'texts-from-mittens'),
|
||||||
cls('Thatababy', 'thatababy'),
|
cls('Thatababy', 'thatababy'),
|
||||||
cls('ThatIsPriceless', 'that-is-priceless'),
|
cls('ThatIsPriceless', 'that-is-priceless'),
|
||||||
|
@ -451,6 +448,7 @@ class GoComics(ParserScraper):
|
||||||
cls('TheHumbleStumble', 'humble-stumble'),
|
cls('TheHumbleStumble', 'humble-stumble'),
|
||||||
cls('TheKChronicles', 'thekchronicles'),
|
cls('TheKChronicles', 'thekchronicles'),
|
||||||
cls('TheKnightLife', 'theknightlife'),
|
cls('TheKnightLife', 'theknightlife'),
|
||||||
|
cls('TheLockhorns', 'lockhorns'),
|
||||||
cls('TheMartianConfederacy', 'the-martian-confederacy'),
|
cls('TheMartianConfederacy', 'the-martian-confederacy'),
|
||||||
cls('TheMeaningOfLila', 'meaningoflila'),
|
cls('TheMeaningOfLila', 'meaningoflila'),
|
||||||
cls('TheMiddleAge', 'the-middle-age'),
|
cls('TheMiddleAge', 'the-middle-age'),
|
||||||
|
@ -473,6 +471,7 @@ class GoComics(ParserScraper):
|
||||||
cls('TruthFacts', 'truth-facts'),
|
cls('TruthFacts', 'truth-facts'),
|
||||||
cls('Tutelandia', 'tutelandia', 'es'),
|
cls('Tutelandia', 'tutelandia', 'es'),
|
||||||
cls('TwoPartyOpera', 'two-party-opera'),
|
cls('TwoPartyOpera', 'two-party-opera'),
|
||||||
|
cls('UFO', 'ufo'),
|
||||||
cls('UnderpantsAndOverbites', 'underpants-and-overbites'),
|
cls('UnderpantsAndOverbites', 'underpants-and-overbites'),
|
||||||
cls('UnderstandingChaos', 'understanding-chaos'),
|
cls('UnderstandingChaos', 'understanding-chaos'),
|
||||||
cls('UnstrangePhenomena', 'unstrange-phenomena'),
|
cls('UnstrangePhenomena', 'unstrange-phenomena'),
|
||||||
|
@ -487,6 +486,7 @@ class GoComics(ParserScraper):
|
||||||
cls('ViiviAndWagner', 'viivi-and-wagner'),
|
cls('ViiviAndWagner', 'viivi-and-wagner'),
|
||||||
cls('WallaceTheBrave', 'wallace-the-brave'),
|
cls('WallaceTheBrave', 'wallace-the-brave'),
|
||||||
cls('WaltHandelsman', 'walthandelsman'),
|
cls('WaltHandelsman', 'walthandelsman'),
|
||||||
|
cls('Wannabe', 'wannabe'),
|
||||||
cls('Warped', 'warped'),
|
cls('Warped', 'warped'),
|
||||||
cls('WatchYourHead', 'watchyourhead'),
|
cls('WatchYourHead', 'watchyourhead'),
|
||||||
cls('Wawawiwa', 'wawawiwa'),
|
cls('Wawawiwa', 'wawawiwa'),
|
||||||
|
@ -505,6 +505,7 @@ class GoComics(ParserScraper):
|
||||||
cls('WuMo', 'wumo'),
|
cls('WuMo', 'wumo'),
|
||||||
cls('WumoEnEspanol', 'wumoespanol', 'es'),
|
cls('WumoEnEspanol', 'wumoespanol', 'es'),
|
||||||
cls('Yaffle', 'yaffle'),
|
cls('Yaffle', 'yaffle'),
|
||||||
|
cls('YeahItsChill', 'yeah-its-chill'),
|
||||||
cls('YesImHotInThis', 'yesimhotinthis'),
|
cls('YesImHotInThis', 'yesimhotinthis'),
|
||||||
cls('ZackHill', 'zackhill'),
|
cls('ZackHill', 'zackhill'),
|
||||||
cls('ZenPencils', 'zen-pencils'),
|
cls('ZenPencils', 'zen-pencils'),
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ class KemonoCafe(ParserScraper):
|
||||||
# Fix unordered filenames
|
# Fix unordered filenames
|
||||||
if 'addictivescience' in pageUrl:
|
if 'addictivescience' in pageUrl:
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
num = int(page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', ''))
|
num = int(self.match(page, '//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', ''))
|
||||||
filename = '%04d_%s' % (num, filename)
|
filename = '%04d_%s' % (num, filename)
|
||||||
elif 'CaughtInOrbit' in filename:
|
elif 'CaughtInOrbit' in filename:
|
||||||
filename = filename.replace('CaughtInOrbit', 'CIO')
|
filename = filename.replace('CaughtInOrbit', 'CIO')
|
||||||
|
|
|
@ -5,24 +5,7 @@
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..scraper import ParserScraper, _ParserScraper
|
from ..scraper import ParserScraper, _ParserScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
|
from .common import ComicControlScraper, WordPressScraper
|
||||||
|
|
||||||
|
|
||||||
class Lackadaisy(ParserScraper):
|
|
||||||
url = 'https://www.lackadaisy.com/comic.php'
|
|
||||||
stripUrl = url + '?comicid=%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = '//div[@id="exhibit"]/img[contains(@src, "comic/")]'
|
|
||||||
prevSearch = '//div[@class="prev"]/a'
|
|
||||||
nextSearch = '//div[@class="next"]/a'
|
|
||||||
help = 'Index format: n'
|
|
||||||
starter = bounceStarter
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
# Use comic id for filename
|
|
||||||
num = pageUrl.rsplit('=', 1)[-1]
|
|
||||||
ext = imageUrl.rsplit('.', 1)[-1]
|
|
||||||
return 'lackadaisy_%s.%s' % (num, ext)
|
|
||||||
|
|
||||||
|
|
||||||
class Lancer(WordPressScraper):
|
class Lancer(WordPressScraper):
|
||||||
|
@ -55,7 +38,7 @@ class LazJonesAndTheMayfieldRegulatorsSideStories(LazJonesAndTheMayfieldRegulato
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
# Fix broken navigation links
|
# Fix broken navigation links
|
||||||
if url == self.url and data.xpath(self.prevSearch + '/@href')[0] == self.stripUrl % 'summer00':
|
if url == self.url and self.match(data, self.prevSearch + '/@href')[0] == self.stripUrl % 'summer00':
|
||||||
return self.stripUrl % 'summer21'
|
return self.stripUrl % 'summer21'
|
||||||
return super(LazJonesAndTheMayfieldRegulators, self).getPrevUrl(url, data)
|
return super(LazJonesAndTheMayfieldRegulators, self).getPrevUrl(url, data)
|
||||||
|
|
||||||
|
|
|
@ -4,22 +4,18 @@
|
||||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
import json
|
import json
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..xml import NS
|
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressWebcomic
|
from .common import ComicControlScraper, WordPressScraper, WordPressWebcomic
|
||||||
|
|
||||||
|
|
||||||
class MacHall(_BasicScraper):
|
class MacHall(ComicControlScraper):
|
||||||
url = 'http://www.machall.com/'
|
url = 'https://www.machall.com/'
|
||||||
stripUrl = url + 'view.php?date=%s'
|
stripUrl = url + 'comic/%s'
|
||||||
firstStripUrl = stripUrl % '2000-11-07'
|
firstStripUrl = stripUrl % 'moving-in'
|
||||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
|
||||||
prevSearch = compile(r'<a href="(.+?)"><img[^>]+?src=\'drop_shadow/previous.gif\'>')
|
|
||||||
help = 'Index format: yyyy-mm-dd'
|
|
||||||
|
|
||||||
|
|
||||||
class MadamAndEve(_BasicScraper):
|
class MadamAndEve(_BasicScraper):
|
||||||
|
@ -58,12 +54,12 @@ class MareInternum(WordPressScraper):
|
||||||
firstStripUrl = stripUrl % 'intro-page-1'
|
firstStripUrl = stripUrl % 'intro-page-1'
|
||||||
|
|
||||||
|
|
||||||
class Marilith(_BasicScraper):
|
class Marilith(ParserScraper):
|
||||||
url = 'http://www.marilith.com/'
|
url = 'https://web.archive.org/web/20170619193143/http://www.marilith.com/'
|
||||||
stripUrl = url + 'archive.php?date=%s'
|
stripUrl = url + 'archive.php?date=%s'
|
||||||
firstStripUrl = stripUrl % '20041215'
|
firstStripUrl = stripUrl % '20041215'
|
||||||
imageSearch = compile(r'<img src="(comics/.+?)" border')
|
imageSearch = '//img[contains(@src, "comics/")]'
|
||||||
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
|
prevSearch = '//a[img[@name="previous_day"]]'
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,22 +76,14 @@ class MarriedToTheSea(_ParserScraper):
|
||||||
return '%s-%s' % (date, filename)
|
return '%s-%s' % (date, filename)
|
||||||
|
|
||||||
|
|
||||||
class MarryMe(_ParserScraper):
|
class MarryMe(ParserScraper):
|
||||||
url = 'http://marryme.keenspot.com/'
|
stripUrl = 'http://marryme.keenspot.com/d/%s.html'
|
||||||
stripUrl = url + 'd/%s.html'
|
url = stripUrl % '20191001'
|
||||||
firstStripUrl = stripUrl % '20120730'
|
firstStripUrl = stripUrl % '20120730'
|
||||||
imageSearch = '//img[@class="ksc"]'
|
imageSearch = '//img[@class="ksc"]'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
class MaxOveracts(_ParserScraper):
|
|
||||||
url = 'http://occasionalcomics.com/'
|
|
||||||
stripUrl = url + '%s/'
|
|
||||||
css = True
|
|
||||||
imageSearch = '#comic img'
|
|
||||||
prevSearch = '.nav-previous > a'
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class Meek(WordPressScraper):
|
class Meek(WordPressScraper):
|
||||||
|
@ -149,20 +137,22 @@ class MisfileHellHigh(Misfile):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
class MistyTheMouse(WordPressScraper):
|
class MistyTheMouse(ParserScraper):
|
||||||
url = 'http://www.mistythemouse.com/'
|
url = 'http://www.mistythemouse.com/'
|
||||||
prevSearch = '//a[@rel="prev"]'
|
imageSearch = '//center/p/img'
|
||||||
firstStripUrl = 'http://www.mistythemouse.com/?p=12'
|
prevSearch = '//a[img[contains(@src, "Previous")]]'
|
||||||
|
firstStripUrl = url + 'The_Live_In.html'
|
||||||
|
|
||||||
|
|
||||||
class MonkeyUser(_ParserScraper):
|
class MonkeyUser(ParserScraper):
|
||||||
url = 'https://www.monkeyuser.com/'
|
url = 'https://www.monkeyuser.com/'
|
||||||
prevSearch = '//div[@title="previous"]/a'
|
|
||||||
imageSearch = '//div[d:class("content")]/p/img'
|
imageSearch = '//div[d:class("content")]/p/img'
|
||||||
|
prevSearch = '//a[text()="Prev"]'
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
# videos
|
# videos
|
||||||
return data.xpath('//div[d:class("video-container")]', namespaces=NS)
|
return self.match(data, '//div[d:class("video-container")]')
|
||||||
|
|
||||||
|
|
||||||
class MonsieurLeChien(ParserScraper):
|
class MonsieurLeChien(ParserScraper):
|
||||||
|
@ -195,43 +185,10 @@ class Moonlace(WordPressWebcomic):
|
||||||
return indirectStarter(self)
|
return indirectStarter(self)
|
||||||
|
|
||||||
|
|
||||||
class Moonsticks(_ParserScraper):
|
class Moonsticks(ParserScraper):
|
||||||
url = "http://moonsticks.org/"
|
url = "https://moonsticks.org/"
|
||||||
imageSearch = "//div[@class='entry']//img"
|
imageSearch = "//div[d:class('entry-content')]//img"
|
||||||
prevSearch = u"//a[text()='\u00AB Prev']"
|
prevSearch = ('//a[@rel="prev"]', "//a[text()='\u00AB Prev']")
|
||||||
|
|
||||||
|
|
||||||
class MrLovenstein(_BasicScraper):
|
|
||||||
url = 'http://www.mrlovenstein.com/'
|
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % '1'
|
|
||||||
imageSearch = (
|
|
||||||
# captures rollover comic
|
|
||||||
compile(tagre("div", "class", r'comic_image') + r'\s*.*\s*' +
|
|
||||||
tagre("div", "style", r'display: none;') + r'\s*.*\s' +
|
|
||||||
tagre("img", "src", r'(/images/comics/[^"]+)')),
|
|
||||||
# captures standard comic
|
|
||||||
compile(tagre("img", "src", r'(/images/comics/[^"]+)',
|
|
||||||
before="comic_main_image")),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
|
||||||
tagre("img", "src", "/images/nav_left.png"))
|
|
||||||
textSearch = compile(r'<meta name="description" content="(.+?)" />')
|
|
||||||
help = 'Index Format: n'
|
|
||||||
|
|
||||||
|
|
||||||
class MyCartoons(_BasicScraper):
|
|
||||||
url = 'http://mycartoons.de/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'page/%s'
|
|
||||||
imageSearch = (
|
|
||||||
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
|
|
||||||
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) +
|
|
||||||
"«")
|
|
||||||
help = 'Index format: number'
|
|
||||||
lang = 'de'
|
|
||||||
|
|
||||||
|
|
||||||
class MyLifeWithFel(ParserScraper):
|
class MyLifeWithFel(ParserScraper):
|
||||||
|
|
|
@ -11,6 +11,12 @@ from ..util import tagre
|
||||||
from .common import WordPressScraper, WordPressNavi
|
from .common import WordPressScraper, WordPressNavi
|
||||||
|
|
||||||
|
|
||||||
|
class OccasionalComicsDisorder(WordPressScraper):
|
||||||
|
url = 'https://occasionalcomics.com/'
|
||||||
|
stripUrl = url + 'comic/%s/'
|
||||||
|
firstStripUrl = stripUrl % 'latest-comic-2'
|
||||||
|
|
||||||
|
|
||||||
class OctopusPie(_ParserScraper):
|
class OctopusPie(_ParserScraper):
|
||||||
url = 'http://www.octopuspie.com/'
|
url = 'http://www.octopuspie.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
|
|
|
@ -604,7 +604,6 @@ class Removed(Scraper):
|
||||||
cls('WotNow'),
|
cls('WotNow'),
|
||||||
|
|
||||||
# Removed in 3.0
|
# Removed in 3.0
|
||||||
cls('CatenaManor/CatenaCafe'),
|
|
||||||
cls('ComicFury/AdventuresOftheGreatCaptainMaggieandCrew'),
|
cls('ComicFury/AdventuresOftheGreatCaptainMaggieandCrew'),
|
||||||
cls('ComicFury/AWAKENING'),
|
cls('ComicFury/AWAKENING'),
|
||||||
cls('ComicFury/Beebleville'),
|
cls('ComicFury/Beebleville'),
|
||||||
|
@ -833,8 +832,6 @@ class Removed(Scraper):
|
||||||
cls('ComicsKingdom/Redeye'),
|
cls('ComicsKingdom/Redeye'),
|
||||||
cls('ComicsKingdom/RedeyeSundays'),
|
cls('ComicsKingdom/RedeyeSundays'),
|
||||||
cls('CrapIDrewOnMyLunchBreak'),
|
cls('CrapIDrewOnMyLunchBreak'),
|
||||||
cls('FalseStart'),
|
|
||||||
cls('Ginpu'),
|
|
||||||
cls('GoComics/060'),
|
cls('GoComics/060'),
|
||||||
cls('GoComics/2CowsAndAChicken'),
|
cls('GoComics/2CowsAndAChicken'),
|
||||||
cls('GoComics/ABitSketch'),
|
cls('GoComics/ABitSketch'),
|
||||||
|
@ -995,11 +992,9 @@ class Removed(Scraper):
|
||||||
cls('GoComics/Wrobbertcartoons'),
|
cls('GoComics/Wrobbertcartoons'),
|
||||||
cls('GoComics/Zootopia'),
|
cls('GoComics/Zootopia'),
|
||||||
cls('JustAnotherEscape'),
|
cls('JustAnotherEscape'),
|
||||||
cls('KemonoCafe/PrincessBunny'),
|
|
||||||
cls('Laiyu', 'brk'),
|
cls('Laiyu', 'brk'),
|
||||||
cls('MangaDex/DrStone', 'legal'),
|
cls('MangaDex/DrStone', 'legal'),
|
||||||
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
||||||
cls('MangaDex/ImTheMaxLevelNewbie', 'legal'),
|
|
||||||
cls('MangaDex/SPYxFAMILY', 'legal'),
|
cls('MangaDex/SPYxFAMILY', 'legal'),
|
||||||
cls('Ryugou'),
|
cls('Ryugou'),
|
||||||
cls('SeelPeel'),
|
cls('SeelPeel'),
|
||||||
|
@ -1573,22 +1568,82 @@ class Removed(Scraper):
|
||||||
cls('SnafuComics/Tin'),
|
cls('SnafuComics/Tin'),
|
||||||
cls('SnafuComics/Titan'),
|
cls('SnafuComics/Titan'),
|
||||||
cls('StudioKhimera/Eorah', 'mov'),
|
cls('StudioKhimera/Eorah', 'mov'),
|
||||||
cls('StudioKhimera/Mousechevious'),
|
|
||||||
cls('StuffNoOneToldMe'),
|
cls('StuffNoOneToldMe'),
|
||||||
cls('TaleOfTenThousand'),
|
cls('TaleOfTenThousand'),
|
||||||
cls('TalesAndTactics'),
|
|
||||||
cls('TheCyantianChronicles/CookieCaper'),
|
cls('TheCyantianChronicles/CookieCaper'),
|
||||||
cls('TheCyantianChronicles/Pawprints'),
|
cls('TheCyantianChronicles/Pawprints'),
|
||||||
cls('VampireHunterBoyfriends'),
|
|
||||||
cls('VGCats/Adventure'),
|
cls('VGCats/Adventure'),
|
||||||
cls('VGCats/Super'),
|
cls('VGCats/Super'),
|
||||||
cls('VictimsOfTheSystem'),
|
cls('VictimsOfTheSystem'),
|
||||||
cls('WebDesignerCOTW'),
|
cls('WebDesignerCOTW'),
|
||||||
cls('WebToons/Adamsville'),
|
cls('WebToons/Adamsville'),
|
||||||
cls('WebToons/CrapIDrewOnMyLunchBreak'),
|
cls('WebToons/CrapIDrewOnMyLunchBreak'),
|
||||||
|
cls('WintersLight'),
|
||||||
|
|
||||||
|
# Removed in 3.1
|
||||||
|
cls('AbbysAgency', 'brk'),
|
||||||
|
cls('AcademyVale'),
|
||||||
|
cls('AhoyEarth', 'block'),
|
||||||
|
cls('Anaria', 'del'),
|
||||||
|
cls('Angels2200', 'del'),
|
||||||
|
cls('BlackRose', 'brk'),
|
||||||
|
cls('CatenaManor/CatenaCafe'),
|
||||||
|
cls('ComicsKingdom/AmazingSpiderman'),
|
||||||
|
cls('ComicsKingdom/AmazingSpidermanSpanish'),
|
||||||
|
cls('ComicsKingdom/BigBenBoltSundays'),
|
||||||
|
cls('ComicsKingdom/BonersArkSundays'),
|
||||||
|
cls('ComicsKingdom/BrianDuffy'),
|
||||||
|
cls('ComicsKingdom/Crankshaft'),
|
||||||
|
cls('ComicsKingdom/FlashGordonSundays'),
|
||||||
|
cls('ComicsKingdom/FunkyWinkerbean'),
|
||||||
|
cls('ComicsKingdom/FunkyWinkerbeanSunday'),
|
||||||
|
cls('ComicsKingdom/FunkyWinkerbeanSundays'),
|
||||||
|
cls('ComicsKingdom/FunkyWinkerbeanVintage'),
|
||||||
|
cls('ComicsKingdom/HeartOfJulietJonesSundays'),
|
||||||
|
cls('ComicsKingdom/KatzenjammerKidsSundays'),
|
||||||
|
cls('ComicsKingdom/Lockhorns'),
|
||||||
|
cls('ComicsKingdom/MandrakeTheMagicianSundays'),
|
||||||
|
cls('ComicsKingdom/MarkTrailVintage'),
|
||||||
|
cls('ComicsKingdom/MikePeters'),
|
||||||
|
cls('ComicsKingdom/MotherGooseAndGrimm'),
|
||||||
|
cls('ComicsKingdom/PhantomSundays'),
|
||||||
|
cls('ComicsKingdom/PrinceValiantSundays'),
|
||||||
|
cls('ComicsKingdom/Retail'),
|
||||||
|
cls('ComicsKingdom/TigerSundays'),
|
||||||
|
cls('ComicsKingdom/TigerVintage'),
|
||||||
|
cls('ComicsKingdom/TigerVintageSundays'),
|
||||||
|
cls('Everblue', 'block'),
|
||||||
|
cls('FalseStart'),
|
||||||
|
cls('Ginpu'),
|
||||||
|
cls('GoComics/9ChickweedLaneClassics'),
|
||||||
|
cls('GoComics/Badlands'),
|
||||||
|
cls('GoComics/BigNateFirstClass'),
|
||||||
|
cls('GoComics/BreakOfDay'),
|
||||||
|
cls('GoComics/Candorville'),
|
||||||
|
cls('GoComics/DilbertClassics'),
|
||||||
|
cls('GoComics/DilbertEnEspanol'),
|
||||||
|
cls('GoComics/DumbwichCastle'),
|
||||||
|
cls('GoComics/EyebeamClassic'),
|
||||||
|
cls('GoComics/GarfieldClassics'),
|
||||||
|
cls('GoComics/MakingIt'),
|
||||||
|
cls('GoComics/MtPleasant'),
|
||||||
|
cls('GoComics/PCAndPixel'),
|
||||||
|
cls('GoComics/SaltNPepper'),
|
||||||
|
cls('GoComics/SigneWilkinson'),
|
||||||
|
cls('GoComics/Snowflakes'),
|
||||||
|
cls('GoComics/StoneSoupClassics'),
|
||||||
|
cls('GoComics/StuartCarlson'),
|
||||||
|
cls('KemonoCafe/PrincessBunny'),
|
||||||
|
cls('Lackadaisy', 'block'),
|
||||||
|
cls('MangaDex/ImTheMaxLevelNewbie', 'legal'),
|
||||||
|
cls('MrLovenstein', 'jsh'),
|
||||||
|
cls('MyCartoons'),
|
||||||
|
cls('Shivae/BlackRose', 'brk'),
|
||||||
|
cls('StudioKhimera/Mousechevious'),
|
||||||
|
cls('TalesAndTactics'),
|
||||||
|
cls('VampireHunterBoyfriends'),
|
||||||
cls('WebToons/CrystalVirus'),
|
cls('WebToons/CrystalVirus'),
|
||||||
cls('WebToons/OVERPOWERED'),
|
cls('WebToons/OVERPOWERED'),
|
||||||
cls('WintersLight'),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1667,10 +1722,8 @@ class Renamed(Scraper):
|
||||||
# Renamed in 3.0
|
# Renamed in 3.0
|
||||||
cls('AHClub', 'RickGriffinStudios/AHClub'),
|
cls('AHClub', 'RickGriffinStudios/AHClub'),
|
||||||
cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'),
|
cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'),
|
||||||
cls('ComicsKingdom/FunkyWinkerbeanSundays', 'ComicsKingdom/FunkyWinkerbeanSunday'),
|
|
||||||
cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'),
|
cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'),
|
||||||
cls('ComicsKingdom/TheLittleKing', 'ComicsKingdom/LittleKing'),
|
cls('ComicsKingdom/TheLittleKing', 'ComicsKingdom/LittleKing'),
|
||||||
cls('ComicsKingdom/TigerSundays', 'ComicsKingdom/TigerVintageSundays'),
|
|
||||||
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
|
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
|
||||||
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
|
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
|
||||||
cls('GoComics/DarrinBell', 'ComicsKingdom/DarrinBell'),
|
cls('GoComics/DarrinBell', 'ComicsKingdom/DarrinBell'),
|
||||||
|
@ -1681,7 +1734,6 @@ class Renamed(Scraper):
|
||||||
cls('GoComics/Widdershins', 'Widdershins'),
|
cls('GoComics/Widdershins', 'Widdershins'),
|
||||||
cls('Guardia', 'ComicFury/Guardia'),
|
cls('Guardia', 'ComicFury/Guardia'),
|
||||||
cls('RadioactivePanda', 'Tapas/RadioactivePanda'),
|
cls('RadioactivePanda', 'Tapas/RadioactivePanda'),
|
||||||
cls('Shivae/BlackRose', 'BlackRose'),
|
|
||||||
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
|
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
|
||||||
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
|
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
|
||||||
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
|
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
|
||||||
|
@ -1694,6 +1746,9 @@ class Renamed(Scraper):
|
||||||
cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'),
|
cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'),
|
||||||
|
|
||||||
# Renamed in 3.1
|
# Renamed in 3.1
|
||||||
|
cls('ComicsKingdom/SlylockFoxAndComicsForKids', 'ComicsKingdom/SlylockFox'),
|
||||||
|
cls('ComicsKingdom/SlylockFoxAndComicsForKidsSpanish', 'ComicsKingdom/SlylockFoxSpanish'),
|
||||||
cls('Exiern', 'ComicFury/Exiern'),
|
cls('Exiern', 'ComicFury/Exiern'),
|
||||||
|
cls('MaxOveracts', 'OccasionalComicsDisorder'),
|
||||||
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
|
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
|
@ -34,16 +34,11 @@ class ParadigmShift(_BasicScraper):
|
||||||
help = 'Index format: custom'
|
help = 'Index format: custom'
|
||||||
|
|
||||||
|
|
||||||
class ParallelUniversum(_BasicScraper):
|
class ParallelUniversum(WordPressScraper):
|
||||||
url = 'http://www.paralleluniversum.net/'
|
url = 'https://www.paralleluniversum.net/'
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
|
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
|
||||||
imageSearch = compile(tagre("img", "src",
|
prevSearch = '//a[@rel="prev"]'
|
||||||
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
|
|
||||||
tagre("span", "class", "prev"))
|
|
||||||
help = 'Index format: number-stripname'
|
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,14 +90,12 @@ class PebbleVersion(_ParserScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
class PennyAndAggie(_BasicScraper):
|
class PennyAndAggie(ComicControlScraper):
|
||||||
url = 'http://pennyandaggie.com/'
|
url = 'https://pixietrixcomix.com/penny-and-aggie'
|
||||||
rurl = escape(url)
|
stripUrl = url + '/%s'
|
||||||
stripUrl = url + 'index.php?p=%s'
|
firstStripUrl = stripUrl % '2004-09-06'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
endOfLife = True
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
|
help = 'Index format: yyyy-mm-dd'
|
||||||
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
|
|
||||||
class PennyArcade(_ParserScraper):
|
class PennyArcade(_ParserScraper):
|
||||||
|
@ -117,19 +110,17 @@ class PennyArcade(_ParserScraper):
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
||||||
class PeppermintSaga(WordPressNavi):
|
class PeppermintSaga(WordPressScraper):
|
||||||
url = 'http://www.pepsaga.com/'
|
url = 'http://www.pepsaga.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + 'comics/%s/'
|
||||||
firstStripUrl = stripUrl % '3'
|
firstStripUrl = stripUrl % 'the-sword-of-truth-vol1'
|
||||||
help = 'Index format: number'
|
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
class PeppermintSagaBGR(WordPressNavi):
|
class PeppermintSagaBGR(WordPressScraper):
|
||||||
url = 'http://bgr.pepsaga.com/'
|
url = 'http://bgr.pepsaga.com/'
|
||||||
stripUrl = url + '?p=%s'
|
stripUrl = url + '?comic=%s'
|
||||||
firstStripUrl = stripUrl % '4'
|
firstStripUrl = stripUrl % '04172011'
|
||||||
help = 'Index format: number'
|
|
||||||
adult = True
|
adult = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,14 +141,16 @@ class PeterAndWhitney(_ParserScraper):
|
||||||
prevSearch = '//a[./img[contains(@src, "nav_previous")]]'
|
prevSearch = '//a[./img[contains(@src, "nav_previous")]]'
|
||||||
|
|
||||||
|
|
||||||
class PHDComics(_ParserScraper):
|
class PHDComics(ParserScraper):
|
||||||
BROKEN_COMMENT_END = compile(r'--!>')
|
BROKEN_COMMENT_END = compile(r'--!>')
|
||||||
|
|
||||||
baseUrl = 'http://phdcomics.com/'
|
baseUrl = 'http://phdcomics.com/'
|
||||||
url = baseUrl + 'comics.php'
|
url = baseUrl + 'comics.php'
|
||||||
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//img[@id="comic2"]'
|
imageSearch = ('//img[@id="comic2"]',
|
||||||
|
r'//img[d:class("img-responsive") and re:test(@name, "comic\d+")]')
|
||||||
|
multipleImagesPerStrip = True
|
||||||
prevSearch = '//a[img[contains(@src, "prev_button")]]'
|
prevSearch = '//a[img[contains(@src, "prev_button")]]'
|
||||||
nextSearch = '//a[img[contains(@src, "next_button")]]'
|
nextSearch = '//a[img[contains(@src, "next_button")]]'
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
@ -173,7 +166,7 @@ class PHDComics(_ParserScraper):
|
||||||
# video
|
# video
|
||||||
self.stripUrl % '1880',
|
self.stripUrl % '1880',
|
||||||
self.stripUrl % '1669',
|
self.stripUrl % '1669',
|
||||||
)
|
) or self.match(data, '//img[@id="comic" and contains(@src, "phd083123s")]')
|
||||||
|
|
||||||
|
|
||||||
class Picklewhistle(ComicControlScraper):
|
class Picklewhistle(ComicControlScraper):
|
||||||
|
@ -333,11 +326,12 @@ class PS238(_ParserScraper):
|
||||||
|
|
||||||
class PvPOnline(ParserScraper):
|
class PvPOnline(ParserScraper):
|
||||||
baseUrl = 'https://www.toonhoundstudios.com/'
|
baseUrl = 'https://www.toonhoundstudios.com/'
|
||||||
url = baseUrl + 'pvp/'
|
stripUrl = baseUrl + 'comic/%s/?sid=372'
|
||||||
stripUrl = baseUrl + 'comic/%s/'
|
url = stripUrl % 'pvp-2022-09-16'
|
||||||
firstStripUrl = stripUrl % '19980504'
|
firstStripUrl = stripUrl % '19980504'
|
||||||
imageSearch = '//div[@id="spliced-comic"]//img/@data-src-img'
|
imageSearch = '//div[@id="spliced-comic"]//img/@data-src-img'
|
||||||
prevSearch = '//a[d:class("prev")]'
|
prevSearch = '//a[d:class("prev")]'
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, image_url, page_url):
|
||||||
return 'pvp' + imageUrl.rsplit('/', 1)[-1]
|
return 'pvp' + image_url.rsplit('/', 1)[-1]
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile
|
from re import compile
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ class Requiem(WordPressScraper):
|
||||||
firstStripUrl = stripUrl % '2004-06-07-3'
|
firstStripUrl = stripUrl % '2004-06-07-3'
|
||||||
|
|
||||||
|
|
||||||
class Replay(_ParserScraper):
|
class Replay(ParserScraper):
|
||||||
url = 'http://replaycomic.com/'
|
url = 'http://replaycomic.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % 'red-desert'
|
firstStripUrl = stripUrl % 'red-desert'
|
||||||
|
@ -132,11 +132,11 @@ class Replay(_ParserScraper):
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Retrieve archive page to identify chapters
|
# Retrieve archive page to identify chapters
|
||||||
archivePage = self.getPage(self.url + 'archive')
|
archivePage = self.getPage(self.url + 'archive')
|
||||||
archive = archivePage.xpath('//div[@class="comic-archive-chapter-wrap"]')
|
archive = self.match(archivePage, '//div[d:class("comic-archive-chapter-wrap")]')
|
||||||
self.chapter = len(archive) - 1
|
self.chapter = len(archive) - 1
|
||||||
self.startOfChapter = []
|
self.startOfChapter = []
|
||||||
for archiveChapter in archive:
|
for archiveChapter in archive:
|
||||||
self.startOfChapter.append(archiveChapter.xpath('.//a')[0].get('href'))
|
self.startOfChapter.append(self.match(archiveChapter, './/a')[0].get('href'))
|
||||||
return bounceStarter(self)
|
return bounceStarter(self)
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
|
|
@ -196,7 +196,7 @@ class Sharksplode(WordPressScraper):
|
||||||
class Sheldon(ParserScraper):
|
class Sheldon(ParserScraper):
|
||||||
url = 'https://www.sheldoncomics.com/'
|
url = 'https://www.sheldoncomics.com/'
|
||||||
firstStripUrl = url + 'comic/well-who-is-this/'
|
firstStripUrl = url + 'comic/well-who-is-this/'
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img/@data-src-img'
|
||||||
prevSearch = '//a[img[d:class("left")]]'
|
prevSearch = '//a[img[d:class("left")]]'
|
||||||
|
|
||||||
|
|
||||||
|
@ -435,7 +435,7 @@ class SpaceFurries(ParserScraper):
|
||||||
def extract_image_urls(self, url, data):
|
def extract_image_urls(self, url, data):
|
||||||
# Website requires JS, so build the list of image URLs manually
|
# Website requires JS, so build the list of image URLs manually
|
||||||
imageurls = []
|
imageurls = []
|
||||||
current = int(data.xpath('//input[@name="pagnum"]')[0].get('value'))
|
current = int(self.match(data, '//input[@name="pagnum"]')[0].get('value'))
|
||||||
for page in reversed(range(1, current + 1)):
|
for page in reversed(range(1, current + 1)):
|
||||||
imageurls.append(self.url + 'comics/' + str(page) + '.jpg')
|
imageurls.append(self.url + 'comics/' + str(page) + '.jpg')
|
||||||
return imageurls
|
return imageurls
|
||||||
|
@ -636,16 +636,16 @@ class StrongFemaleProtagonist(_ParserScraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class StupidFox(_ParserScraper):
|
class StupidFox(ParserScraper):
|
||||||
url = 'http://stupidfox.net/'
|
url = 'http://stupidfox.net/'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % 'hello'
|
firstStripUrl = stripUrl % 'hello'
|
||||||
imageSearch = '//div[@class="comicmid"]//img'
|
imageSearch = '//div[d:class("comicmid")]//img'
|
||||||
prevSearch = '//a[@accesskey="p"]'
|
prevSearch = '//a[@accesskey="p"]'
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
|
title = self.match(page, self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
|
||||||
return title + '.' + imageUrl.rsplit('.', 1)[-1]
|
return title + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2021 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from .common import WordPressSpliced
|
from .common import WordPressSpliced
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,22 +12,20 @@ class _CommonMulti(WordPressSpliced):
|
||||||
self.endOfLife = eol
|
self.endOfLife = eol
|
||||||
|
|
||||||
|
|
||||||
class AbbysAgency(WordPressSpliced):
|
|
||||||
url = 'https://abbysagency.us/'
|
|
||||||
stripUrl = url + 'blog/comic/%s/'
|
|
||||||
firstStripUrl = stripUrl % 'a'
|
|
||||||
|
|
||||||
|
|
||||||
class AlienDice(WordPressSpliced):
|
class AlienDice(WordPressSpliced):
|
||||||
url = 'https://aliendice.com/'
|
url = 'https://aliendice.com/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % '05162001'
|
firstStripUrl = stripUrl % '05162001'
|
||||||
|
|
||||||
|
def shouldSkipUrl(self, url, data):
|
||||||
|
"""Skip pages without images."""
|
||||||
|
return not self.match(data, self.imageSearch)
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
# Fix broken navigation
|
# Fix broken navigation
|
||||||
if url == self.stripUrl % 'day-29-part-2-page-3-4':
|
if url == self.stripUrl % 'day-29-part-2-page-3-4':
|
||||||
return self.stripUrl % 'day-29-part-2-page-3-2'
|
return self.stripUrl % 'day-29-part-2-page-3-2'
|
||||||
return super(AlienDice, self).getPrevUrl(url, data)
|
return super().getPrevUrl(url, data)
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
# Fix inconsistent filename
|
# Fix inconsistent filename
|
||||||
|
@ -47,12 +45,6 @@ class AlienDiceLegacy(WordPressSpliced):
|
||||||
return super().isfirststrip(url.rsplit('?', 1)[0])
|
return super().isfirststrip(url.rsplit('?', 1)[0])
|
||||||
|
|
||||||
|
|
||||||
class BlackRose(WordPressSpliced):
|
|
||||||
url = 'https://www.blackrose.monster/'
|
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = stripUrl % '2004-11-01'
|
|
||||||
|
|
||||||
|
|
||||||
class TheCyantianChronicles(_CommonMulti):
|
class TheCyantianChronicles(_CommonMulti):
|
||||||
baseUrl = 'https://cyantian.net/'
|
baseUrl = 'https://cyantian.net/'
|
||||||
|
|
||||||
|
@ -81,9 +73,9 @@ class TheCyantianChronicles(_CommonMulti):
|
||||||
|
|
||||||
|
|
||||||
class Shivae(WordPressSpliced):
|
class Shivae(WordPressSpliced):
|
||||||
url = 'https://shivae.com/'
|
url = 'https://shivae.net/'
|
||||||
stripUrl = url + 'comic/%s/'
|
stripUrl = url + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % '09202001'
|
firstStripUrl = stripUrl % '2002-02-27'
|
||||||
|
|
||||||
|
|
||||||
class ShivaeComics(_CommonMulti):
|
class ShivaeComics(_CommonMulti):
|
||||||
|
|
|
@ -4,10 +4,7 @@
|
||||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape, MULTILINE
|
from re import compile, escape, MULTILINE
|
||||||
try:
|
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
except ImportError:
|
|
||||||
from cached_property import cached_property
|
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
from ..helpers import indirectStarter, joinPathPartsNamer
|
from ..helpers import indirectStarter, joinPathPartsNamer
|
||||||
|
@ -275,7 +272,7 @@ class ToonHole(ParserScraper):
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
latestSearch = '//a[@rel="bookmark"]'
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
|
|
||||||
|
|
||||||
class TrippingOverYou(_BasicScraper):
|
class TrippingOverYou(_BasicScraper):
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..output import out
|
from ..output import out
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
from ..xml import NS
|
|
||||||
|
|
||||||
|
|
||||||
class Tapas(ParserScraper):
|
class Tapas(ParserScraper):
|
||||||
|
@ -21,7 +20,7 @@ class Tapas(ParserScraper):
|
||||||
def starter(self):
|
def starter(self):
|
||||||
# Retrieve comic metadata from info page
|
# Retrieve comic metadata from info page
|
||||||
info = self.getPage(self.url)
|
info = self.getPage(self.url)
|
||||||
series = info.xpath('//@data-series-id')[0]
|
series = self.match(info, '//@data-series-id')[0]
|
||||||
# Retrieve comic metadata from API
|
# Retrieve comic metadata from API
|
||||||
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
|
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
|
||||||
data.raise_for_status()
|
data.raise_for_status()
|
||||||
|
@ -43,7 +42,7 @@ class Tapas(ParserScraper):
|
||||||
return self._cached_image_urls
|
return self._cached_image_urls
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):
|
if self.match(data, '//button[d:class("js-have-to-sign")]'):
|
||||||
out.warn(f'Nothing to download on "{url}", because a login is required.')
|
out.warn(f'Nothing to download on "{url}", because a login is required.')
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -107,7 +107,7 @@ class Unsounded(ParserScraper):
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
def extract_css_bg(self, page) -> str | None:
|
def extract_css_bg(self, page) -> str | None:
|
||||||
comicdivs = page.xpath('//div[@id="comic"]')
|
comicdivs = self.match(page, '//div[@id="comic"]')
|
||||||
if comicdivs:
|
if comicdivs:
|
||||||
style = comicdivs[0].attrib.get('style')
|
style = comicdivs[0].attrib.get('style')
|
||||||
if style:
|
if style:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
|
|
||||||
from ..scraper import ParserScraper, _ParserScraper
|
from ..scraper import ParserScraper, _ParserScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
|
@ -27,7 +27,7 @@ class VGCats(_ParserScraper):
|
||||||
url = 'https://www.vgcats.com/comics/'
|
url = 'https://www.vgcats.com/comics/'
|
||||||
stripUrl = url + '?strip_id=%s'
|
stripUrl = url + '?strip_id=%s'
|
||||||
firstStripUrl = stripUrl % '0'
|
firstStripUrl = stripUrl % '0'
|
||||||
imageSearch = '//td/img[contains(@src, "images/")]'
|
imageSearch = '//td/font/img[contains(@src, "images/")]'
|
||||||
prevSearch = '//a[img[contains(@src, "back.")]]'
|
prevSearch = '//a[img[contains(@src, "back.")]]'
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
@ -44,15 +44,15 @@ class Vibe(ParserScraper):
|
||||||
help = 'Index format: VIBEnnn (padded)'
|
help = 'Index format: VIBEnnn (padded)'
|
||||||
|
|
||||||
|
|
||||||
class VickiFox(_ParserScraper):
|
class VickiFox(ParserScraper):
|
||||||
url = 'http://www.vickifox.com/comic/strip'
|
url = 'http://www.vickifox.com/comic/strip'
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
firstStripUrl = stripUrl % '001'
|
firstStripUrl = stripUrl % '001'
|
||||||
imageSearch = '//img[contains(@src, "comic/")]'
|
imageSearch = '//img[contains(@src, "comic/")]'
|
||||||
prevSearch = '//button[@id="btnPrev"]/@value'
|
prevSearch = '//button[@id="btnPrev"]/@value'
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def link_modifier(self, fromurl, tourl):
|
||||||
return self.stripUrl % self.getPage(url).xpath(self.prevSearch)[0]
|
return self.stripUrl % tourl
|
||||||
|
|
||||||
|
|
||||||
class ViiviJaWagner(_ParserScraper):
|
class ViiviJaWagner(_ParserScraper):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||||
|
@ -17,7 +17,7 @@ class WapsiSquare(WordPressNaviIn):
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return data.xpath('//iframe') # videos
|
return self.match(data, '//iframe') # videos
|
||||||
|
|
||||||
|
|
||||||
class WastedTalent(_ParserScraper):
|
class WastedTalent(_ParserScraper):
|
||||||
|
|
|
@ -24,9 +24,9 @@ class WebToons(ParserScraper):
|
||||||
self.session.cookies.set(cookie, 'false', domain='webtoons.com')
|
self.session.cookies.set(cookie, 'false', domain='webtoons.com')
|
||||||
# Find current episode number
|
# Find current episode number
|
||||||
listPage = self.getPage(self.listUrl)
|
listPage = self.getPage(self.listUrl)
|
||||||
currentEpisode = listPage.xpath('//div[@class="detail_lst"]/ul/li')[0].attrib['data-episode-no']
|
currentEpisode = self.match(listPage, '//div[d:class("detail_lst")]/ul/li')[0].attrib['data-episode-no']
|
||||||
# Check for completed tag
|
# Check for completed tag
|
||||||
self.endOfLife = (listPage.xpath('//div[@id="_asideDetail"]//span[@class="txt_ico_completed2"]') != [])
|
self.endOfLife = not self.match(listPage, '//div[@id="_asideDetail"]//span[d:class("txt_ico_completed2")]')
|
||||||
return self.stripUrl % currentEpisode
|
return self.stripUrl % currentEpisode
|
||||||
|
|
||||||
def extract_image_urls(self, url, data):
|
def extract_image_urls(self, url, data):
|
||||||
|
@ -52,6 +52,7 @@ class WebToons(ParserScraper):
|
||||||
cls('1111Animals', 'comedy/1111-animals', 437),
|
cls('1111Animals', 'comedy/1111-animals', 437),
|
||||||
cls('2015SpaceSeries', 'sf/2015-space-series', 391),
|
cls('2015SpaceSeries', 'sf/2015-space-series', 391),
|
||||||
cls('3SecondStrip', 'comedy/3-second-strip', 380),
|
cls('3SecondStrip', 'comedy/3-second-strip', 380),
|
||||||
|
cls('99ReinforcedStick', 'comedy/99-reinforced-wooden-stick', 4286),
|
||||||
cls('ABittersweetLife', 'slice-of-life/a-bittersweet-life', 294),
|
cls('ABittersweetLife', 'slice-of-life/a-bittersweet-life', 294),
|
||||||
cls('AboutDeath', 'drama/about-death', 82),
|
cls('AboutDeath', 'drama/about-death', 82),
|
||||||
cls('ABudgiesLife', 'slice-of-life/its-a-budgies-life', 985),
|
cls('ABudgiesLife', 'slice-of-life/its-a-budgies-life', 985),
|
||||||
|
@ -64,6 +65,7 @@ class WebToons(ParserScraper):
|
||||||
cls('AGoodDayToBeADog', 'romance/a-good-day-tobe-a-dog', 1390),
|
cls('AGoodDayToBeADog', 'romance/a-good-day-tobe-a-dog', 1390),
|
||||||
cls('Aisopos', 'drama/aisopos', 76),
|
cls('Aisopos', 'drama/aisopos', 76),
|
||||||
cls('AliceElise', 'fantasy/alice-elise', 1481),
|
cls('AliceElise', 'fantasy/alice-elise', 1481),
|
||||||
|
cls('AlloyComics', 'canvas/alloy-comics', 747447),
|
||||||
cls('AllThatWeHopeToBe', 'slice-of-life/all-that-we-hope-to-be', 470),
|
cls('AllThatWeHopeToBe', 'slice-of-life/all-that-we-hope-to-be', 470),
|
||||||
cls('AllThatYouAre', 'drama/all-that-you-are', 403),
|
cls('AllThatYouAre', 'drama/all-that-you-are', 403),
|
||||||
cls('AlwaysHuman', 'romance/always-human', 557),
|
cls('AlwaysHuman', 'romance/always-human', 557),
|
||||||
|
@ -128,6 +130,7 @@ class WebToons(ParserScraper):
|
||||||
cls('CursedPrincessClub', 'comedy/cursed-princess-club', 1537),
|
cls('CursedPrincessClub', 'comedy/cursed-princess-club', 1537),
|
||||||
cls('Cyberbunk', 'sf/cyberbunk', 466),
|
cls('Cyberbunk', 'sf/cyberbunk', 466),
|
||||||
cls('Cyberforce', 'super-hero/cyberforce', 531),
|
cls('Cyberforce', 'super-hero/cyberforce', 531),
|
||||||
|
cls('CydoniaShattering', 'fantasy/cydonia-shattering', 2881),
|
||||||
cls('CykoKO', 'super-hero/cyko-ko', 560),
|
cls('CykoKO', 'super-hero/cyko-ko', 560),
|
||||||
cls('Darbi', 'action/darbi', 1098),
|
cls('Darbi', 'action/darbi', 1098),
|
||||||
cls('Darchon', 'challenge/darchon', 532053),
|
cls('Darchon', 'challenge/darchon', 532053),
|
||||||
|
@ -153,6 +156,8 @@ class WebToons(ParserScraper):
|
||||||
cls('DrawnToYou', 'challenge/drawn-to-you', 172022),
|
cls('DrawnToYou', 'challenge/drawn-to-you', 172022),
|
||||||
cls('DrFrost', 'drama/dr-frost', 371),
|
cls('DrFrost', 'drama/dr-frost', 371),
|
||||||
cls('DuelIdentity', 'challenge/duel-identity', 532064),
|
cls('DuelIdentity', 'challenge/duel-identity', 532064),
|
||||||
|
cls('DungeonCleaningLife', 'action/the-dungeon-cleaning-life-of-a-once-genius-hunter', 4677),
|
||||||
|
cls('DungeonsAndDoodlesTalesFromTheTables', 'canvas/dungeons-doodles-tales-from-the-tables', 682646),
|
||||||
cls('DungeonMinis', 'challenge/dungeonminis', 64132),
|
cls('DungeonMinis', 'challenge/dungeonminis', 64132),
|
||||||
cls('Dustinteractive', 'comedy/dustinteractive', 907),
|
cls('Dustinteractive', 'comedy/dustinteractive', 907),
|
||||||
cls('DutyAfterSchool', 'sf/duty-after-school', 370),
|
cls('DutyAfterSchool', 'sf/duty-after-school', 370),
|
||||||
|
@ -170,6 +175,7 @@ class WebToons(ParserScraper):
|
||||||
cls('FAMILYMAN', 'drama/family-man', 85),
|
cls('FAMILYMAN', 'drama/family-man', 85),
|
||||||
cls('FantasySketchTheGame', 'sf/fantasy-sketch', 1020),
|
cls('FantasySketchTheGame', 'sf/fantasy-sketch', 1020),
|
||||||
cls('Faust', 'supernatural/faust', 522),
|
cls('Faust', 'supernatural/faust', 522),
|
||||||
|
cls('FinalRaidBoss', 'fantasy/the-final-raid-boss', 3921),
|
||||||
cls('FINALITY', 'mystery/finality', 1457),
|
cls('FINALITY', 'mystery/finality', 1457),
|
||||||
cls('Firebrand', 'supernatural/firebrand', 877),
|
cls('Firebrand', 'supernatural/firebrand', 877),
|
||||||
cls('FirstDefense', 'challenge/first-defense', 532072),
|
cls('FirstDefense', 'challenge/first-defense', 532072),
|
||||||
|
@ -204,11 +210,13 @@ class WebToons(ParserScraper):
|
||||||
cls('HeliosFemina', 'fantasy/helios-femina', 638),
|
cls('HeliosFemina', 'fantasy/helios-femina', 638),
|
||||||
cls('HelloWorld', 'slice-of-life/hello-world', 827),
|
cls('HelloWorld', 'slice-of-life/hello-world', 827),
|
||||||
cls('Hellper', 'fantasy/hellper', 185),
|
cls('Hellper', 'fantasy/hellper', 185),
|
||||||
|
cls('Hench', 'canvas/hench/', 857225),
|
||||||
cls('HeroineChic', 'super-hero/heroine-chic', 561),
|
cls('HeroineChic', 'super-hero/heroine-chic', 561),
|
||||||
cls('HIVE', 'thriller/hive', 65),
|
cls('HIVE', 'thriller/hive', 65),
|
||||||
cls('Hooky', 'fantasy/hooky', 425),
|
cls('Hooky', 'fantasy/hooky', 425),
|
||||||
cls('HoovesOfDeath', 'fantasy/hooves-of-death', 1535),
|
cls('HoovesOfDeath', 'fantasy/hooves-of-death', 1535),
|
||||||
cls('HouseOfStars', 'fantasy/house-of-stars', 1620),
|
cls('HouseOfStars', 'fantasy/house-of-stars', 1620),
|
||||||
|
cls('HowToBeAMindReaver', 'canvas/how-to-be-a-mind-reaver', 301213),
|
||||||
cls('HowToBecomeADragon', 'fantasy/how-to-become-a-dragon', 1973),
|
cls('HowToBecomeADragon', 'fantasy/how-to-become-a-dragon', 1973),
|
||||||
cls('HowToLove', 'slice-of-life/how-to-love', 472),
|
cls('HowToLove', 'slice-of-life/how-to-love', 472),
|
||||||
cls('IDontWantThisKindOfHero', 'super-hero/i-dont-want-this-kind-of-hero', 98),
|
cls('IDontWantThisKindOfHero', 'super-hero/i-dont-want-this-kind-of-hero', 98),
|
||||||
|
@ -235,6 +243,7 @@ class WebToons(ParserScraper):
|
||||||
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
|
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
|
||||||
cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703),
|
cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703),
|
||||||
cls('KnightRun', 'sf/knight-run', 67),
|
cls('KnightRun', 'sf/knight-run', 67),
|
||||||
|
cls('KnightUnderMyHeart', 'action/knight-under-my-heart', 4215),
|
||||||
cls('Kubera', 'fantasy/kubera', 83),
|
cls('Kubera', 'fantasy/kubera', 83),
|
||||||
cls('LalinsCurse', 'supernatural/lalins-curse', 1601),
|
cls('LalinsCurse', 'supernatural/lalins-curse', 1601),
|
||||||
cls('Lars', 'slice-of-life/lars', 358),
|
cls('Lars', 'slice-of-life/lars', 358),
|
||||||
|
@ -261,6 +270,7 @@ class WebToons(ParserScraper):
|
||||||
cls('LUMINE', 'fantasy/lumine', 1022),
|
cls('LUMINE', 'fantasy/lumine', 1022),
|
||||||
cls('Lunarbaboon', 'slice-of-life/lunarbaboon', 523),
|
cls('Lunarbaboon', 'slice-of-life/lunarbaboon', 523),
|
||||||
cls('MageAndDemonQueen', 'comedy/mage-and-demon-queen', 1438),
|
cls('MageAndDemonQueen', 'comedy/mage-and-demon-queen', 1438),
|
||||||
|
cls('MageAndMimic', 'comedy/mage-and-mimic', 5973),
|
||||||
cls('Magical12thGraders', 'super-hero/magical-12th-graders', 90),
|
cls('Magical12thGraders', 'super-hero/magical-12th-graders', 90),
|
||||||
cls('Magician', 'fantasy/magician', 70),
|
cls('Magician', 'fantasy/magician', 70),
|
||||||
cls('MagicSodaPop', 'fantasy/magic-soda-pop', 1947),
|
cls('MagicSodaPop', 'fantasy/magic-soda-pop', 1947),
|
||||||
|
@ -292,6 +302,8 @@ class WebToons(ParserScraper):
|
||||||
cls('MyGiantNerdBoyfriend', 'slice-of-life/my-giant-nerd-boyfriend', 958),
|
cls('MyGiantNerdBoyfriend', 'slice-of-life/my-giant-nerd-boyfriend', 958),
|
||||||
cls('MyKittyAndOldDog', 'slice-of-life/my-kitty-and-old-dog', 184),
|
cls('MyKittyAndOldDog', 'slice-of-life/my-kitty-and-old-dog', 184),
|
||||||
cls('MyNameIsBenny', 'slice-of-life/my-name-is-benny', 1279),
|
cls('MyNameIsBenny', 'slice-of-life/my-name-is-benny', 1279),
|
||||||
|
cls('MySClassHunter', 'action/my-s-class-hunters', 3963),
|
||||||
|
cls('MythicItemObtained', 'fantasy/mythic-item-obtained', 4582),
|
||||||
cls('MyWallflowerKiss', 'challenge/my-wallflower-kiss', 151869),
|
cls('MyWallflowerKiss', 'challenge/my-wallflower-kiss', 151869),
|
||||||
cls('NanoList', 'sf/nano-list', 700),
|
cls('NanoList', 'sf/nano-list', 700),
|
||||||
cls('NationalDogDay2016', 'slice-of-life/national-dog-day', 747),
|
cls('NationalDogDay2016', 'slice-of-life/national-dog-day', 747),
|
||||||
|
@ -439,6 +451,7 @@ class WebToons(ParserScraper):
|
||||||
cls('UpAndOut', 'slice-of-life/up-and-out', 488),
|
cls('UpAndOut', 'slice-of-life/up-and-out', 488),
|
||||||
cls('UrbanAnimal', 'super-hero/urban-animal', 1483),
|
cls('UrbanAnimal', 'super-hero/urban-animal', 1483),
|
||||||
cls('Uriah', 'horror/uriah', 1607),
|
cls('Uriah', 'horror/uriah', 1607),
|
||||||
|
cls('VampireFamily', 'comedy/vampire-family', 6402),
|
||||||
cls('VarsityNoir', 'mystery/varsity-noir', 1613),
|
cls('VarsityNoir', 'mystery/varsity-noir', 1613),
|
||||||
cls('VersionDayAndNight', 'drama/version-day-and-night', 1796),
|
cls('VersionDayAndNight', 'drama/version-day-and-night', 1796),
|
||||||
cls('WafflesAndPancakes', 'slice-of-life/waffles-and-pancakes', 1310),
|
cls('WafflesAndPancakes', 'slice-of-life/waffles-and-pancakes', 1310),
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2022 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
@ -15,21 +15,21 @@ class Wrongside(ParserScraper):
|
||||||
|
|
||||||
def starter(self):
|
def starter(self):
|
||||||
archivePage = self.getPage(self.url)
|
archivePage = self.getPage(self.url)
|
||||||
chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href')
|
chapterUrls = self.match(archivePage, '//ul[d:class("albThumbs")]//a/@href')
|
||||||
self.archive = []
|
self.archive = []
|
||||||
for chapterUrl in chapterUrls:
|
for chapterUrl in chapterUrls:
|
||||||
chapterPage = self.getPage(chapterUrl)
|
chapterPage = self.getPage(chapterUrl)
|
||||||
self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
|
self.archive.append(self.match(chapterPage, '(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
|
||||||
return self.archive[0]
|
return self.archive[0]
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
if data.xpath(self.prevSearch) == [] and len(self.archive) > 0:
|
if self.match(data, self.prevSearch) == [] and len(self.archive) > 0:
|
||||||
return self.archive.pop()
|
return self.archive.pop()
|
||||||
return super(Wrongside, self).getPrevUrl(url, data)
|
return super(Wrongside, self).getPrevUrl(url, data)
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0]
|
||||||
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
|
@ -71,5 +71,5 @@ class WrongsideSideStories(ParserScraper):
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
def namer(self, imageUrl, pageUrl):
|
||||||
page = self.getPage(pageUrl)
|
page = self.getPage(pageUrl)
|
||||||
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0]
|
||||||
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||||
|
|
|
@ -23,7 +23,7 @@ class Zapiro(ParserScraper):
|
||||||
imageSearch = '//div[@id="cartoon"]/img'
|
imageSearch = '//div[@id="cartoon"]/img'
|
||||||
prevSearch = '//a[d:class("left")]'
|
prevSearch = '//a[d:class("left")]'
|
||||||
nextSearch = '//a[d:class("right")]'
|
nextSearch = '//a[d:class("right")]'
|
||||||
namer = joinPathPartsNamer((-1,), ())
|
namer = joinPathPartsNamer(pageparts=(-1,))
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(WordPressNavi):
|
class ZenPencils(WordPressNavi):
|
||||||
|
@ -60,7 +60,7 @@ class Zwarwald(BasicScraper):
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
r'http://zwarwald\.de/images/prev\.jpg',
|
r'http://zwarwald\.de/images/prev\.jpg',
|
||||||
quote="'"))
|
quote="'"))
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -119,45 +119,45 @@ class Scraper:
|
||||||
if val:
|
if val:
|
||||||
self._indexes = tuple(sorted(val))
|
self._indexes = tuple(sorted(val))
|
||||||
|
|
||||||
def __init__(self, name):
|
def __init__(self, name: str) -> None:
|
||||||
"""Initialize internal variables."""
|
"""Initialize internal variables."""
|
||||||
self.name = name
|
self.name = name
|
||||||
self.urls = set()
|
self.urls: set[str] = set()
|
||||||
self._indexes = ()
|
self._indexes = ()
|
||||||
self.skippedUrls = set()
|
self.skippedUrls: set[str] = set()
|
||||||
self.hitFirstStripUrl = False
|
self.hitFirstStripUrl = False
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self) -> int:
|
||||||
"""Get hash value from name and index list."""
|
"""Get hash value from name and index list."""
|
||||||
return hash((self.name, self.indexes))
|
return hash((self.name, self.indexes))
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url: str, data) -> bool:
|
||||||
"""Determine if search for images in given URL should be skipped."""
|
"""Determine if search for images in given URL should be skipped."""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def getComicStrip(self, url, data):
|
def getComicStrip(self, url, data) -> ComicStrip:
|
||||||
"""Get comic strip downloader for given URL and data."""
|
"""Get comic strip downloader for given URL and data."""
|
||||||
imageUrls = self.extract_image_urls(url, data)
|
urls = self.extract_image_urls(url, data)
|
||||||
# map modifier function on image URLs
|
# map modifier function on image URLs
|
||||||
imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
|
urls = [self.imageUrlModifier(x, data) for x in urls]
|
||||||
# remove duplicate URLs
|
# remove duplicate URLs
|
||||||
imageUrls = uniq(imageUrls)
|
urls = uniq(urls)
|
||||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
if len(urls) > 1 and not self.multipleImagesPerStrip:
|
||||||
out.warn(
|
out.warn(
|
||||||
u"Found %d images instead of 1 at %s with expressions %s" %
|
u"Found %d images instead of 1 at %s with expressions %s" %
|
||||||
(len(imageUrls), url, prettyMatcherList(self.imageSearch)))
|
(len(urls), url, prettyMatcherList(self.imageSearch)))
|
||||||
image = imageUrls[0]
|
image = urls[0]
|
||||||
out.warn(u"Choosing image %s" % image)
|
out.warn("Choosing image %s" % image)
|
||||||
imageUrls = (image,)
|
urls = (image,)
|
||||||
elif not imageUrls:
|
elif not urls:
|
||||||
out.warn(u"Found no images at %s with expressions %s" % (url,
|
out.warn("Found no images at %s with expressions %s" % (url,
|
||||||
prettyMatcherList(self.imageSearch)))
|
prettyMatcherList(self.imageSearch)))
|
||||||
if self.textSearch:
|
if self.textSearch:
|
||||||
text = self.fetchText(url, data, self.textSearch,
|
text = self.fetchText(url, data, self.textSearch,
|
||||||
optional=self.textOptional)
|
optional=self.textOptional)
|
||||||
else:
|
else:
|
||||||
text = None
|
text = None
|
||||||
return ComicStrip(self, url, imageUrls, text=text)
|
return ComicStrip(self, url, urls, text=text)
|
||||||
|
|
||||||
def getStrips(self, maxstrips=None):
|
def getStrips(self, maxstrips=None):
|
||||||
"""Get comic strips."""
|
"""Get comic strips."""
|
||||||
|
@ -217,7 +217,7 @@ class Scraper:
|
||||||
break
|
break
|
||||||
url = prevUrl
|
url = prevUrl
|
||||||
|
|
||||||
def isfirststrip(self, url):
|
def isfirststrip(self, url: str) -> bool:
|
||||||
"""Check if the specified URL is the first strip of a comic. This is
|
"""Check if the specified URL is the first strip of a comic. This is
|
||||||
specially for comics taken from archive.org, since the base URL of
|
specially for comics taken from archive.org, since the base URL of
|
||||||
archive.org changes whenever pages are taken from a different
|
archive.org changes whenever pages are taken from a different
|
||||||
|
@ -228,7 +228,7 @@ class Scraper:
|
||||||
currenturl = ARCHIVE_ORG_URL.sub('', url)
|
currenturl = ARCHIVE_ORG_URL.sub('', url)
|
||||||
return firsturl == currenturl
|
return firsturl == currenturl
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url: str, data) -> str | None:
|
||||||
"""Find previous URL."""
|
"""Find previous URL."""
|
||||||
prevUrl = None
|
prevUrl = None
|
||||||
if self.prevSearch:
|
if self.prevSearch:
|
||||||
|
@ -243,40 +243,40 @@ class Scraper:
|
||||||
getHandler().comicPageLink(self, url, prevUrl)
|
getHandler().comicPageLink(self, url, prevUrl)
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index: str) -> str:
|
||||||
"""Get comic strip URL from index."""
|
"""Get comic strip URL from index."""
|
||||||
return self.stripUrl % index
|
return self.stripUrl % index
|
||||||
|
|
||||||
def starter(self):
|
def starter(self) -> str:
|
||||||
"""Get starter URL from where to scrape comic strips."""
|
"""Get starter URL from where to scrape comic strips."""
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url: str, page_url: str) -> str | None:
|
||||||
"""Return filename for given image and page URL."""
|
"""Return filename for given image and page URL."""
|
||||||
return
|
return
|
||||||
|
|
||||||
def link_modifier(self, fromurl, tourl):
|
def link_modifier(self, fromurl: str, tourl: str) -> str:
|
||||||
"""Optional modification of parsed link (previous/back/latest) URLs.
|
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||||
Useful if there are domain redirects. The default implementation does
|
Useful if there are domain redirects. The default implementation does
|
||||||
not modify the URL.
|
not modify the URL.
|
||||||
"""
|
"""
|
||||||
return tourl
|
return tourl
|
||||||
|
|
||||||
def imageUrlModifier(self, image_url, data):
|
def imageUrlModifier(self, image_url: str, data) -> str:
|
||||||
"""Optional modification of parsed image URLs. Useful if the URL
|
"""Optional modification of parsed image URLs. Useful if the URL
|
||||||
needs to be fixed before usage. The default implementation does
|
needs to be fixed before usage. The default implementation does
|
||||||
not modify the URL. The given data is the URL page data.
|
not modify the URL. The given data is the URL page data.
|
||||||
"""
|
"""
|
||||||
return image_url
|
return image_url
|
||||||
|
|
||||||
def vote(self):
|
def vote(self) -> None:
|
||||||
"""Cast a public vote for this comic."""
|
"""Cast a public vote for this comic."""
|
||||||
uid = get_system_uid()
|
uid = get_system_uid()
|
||||||
data = {"name": self.name.replace('/', '_'), "uid": uid}
|
data = {"name": self.name.replace('/', '_'), "uid": uid}
|
||||||
response = self.session.post(configuration.VoteUrl, data=data)
|
response = self.session.post(configuration.VoteUrl, data=data)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
def get_download_dir(self, basepath):
|
def get_download_dir(self, basepath: str) -> str:
|
||||||
"""Try to find the corect download directory, ignoring case
|
"""Try to find the corect download directory, ignoring case
|
||||||
differences."""
|
differences."""
|
||||||
path = basepath
|
path = basepath
|
||||||
|
@ -294,16 +294,16 @@ class Scraper:
|
||||||
path = os.path.join(path, part)
|
path = os.path.join(path, part)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
def getCompleteFile(self, basepath):
|
def getCompleteFile(self, basepath: str) -> str:
|
||||||
"""Get filename indicating all comics are downloaded."""
|
"""Get filename indicating all comics are downloaded."""
|
||||||
dirname = self.get_download_dir(basepath)
|
dirname = self.get_download_dir(basepath)
|
||||||
return os.path.join(dirname, "complete.txt")
|
return os.path.join(dirname, "complete.txt")
|
||||||
|
|
||||||
def isComplete(self, basepath):
|
def isComplete(self, basepath: str) -> bool:
|
||||||
"""Check if all comics are downloaded."""
|
"""Check if all comics are downloaded."""
|
||||||
return os.path.isfile(self.getCompleteFile(basepath))
|
return os.path.isfile(self.getCompleteFile(basepath))
|
||||||
|
|
||||||
def setComplete(self, basepath):
|
def setComplete(self, basepath: str) -> None:
|
||||||
"""Set complete flag for this comic, ie. all comics are downloaded."""
|
"""Set complete flag for this comic, ie. all comics are downloaded."""
|
||||||
if self.endOfLife:
|
if self.endOfLife:
|
||||||
filename = self.getCompleteFile(basepath)
|
filename = self.getCompleteFile(basepath)
|
||||||
|
@ -521,15 +521,10 @@ class ParserScraper(Scraper):
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
def _matchPattern(self, data, patterns):
|
def _matchPattern(self, data, patterns):
|
||||||
if self.css:
|
|
||||||
searchFun = data.cssselect
|
|
||||||
else:
|
|
||||||
def searchFun(s):
|
|
||||||
return data.xpath(s, namespaces=NS)
|
|
||||||
patterns = makeSequence(patterns)
|
patterns = makeSequence(patterns)
|
||||||
for search in patterns:
|
for search in patterns:
|
||||||
matched = False
|
matched = False
|
||||||
for match in searchFun(search):
|
for match in self.match(data, search):
|
||||||
matched = True
|
matched = True
|
||||||
yield match, search
|
yield match, search
|
||||||
|
|
||||||
|
@ -537,6 +532,13 @@ class ParserScraper(Scraper):
|
||||||
# do not search other links if one pattern matched
|
# do not search other links if one pattern matched
|
||||||
break
|
break
|
||||||
|
|
||||||
|
def match(self, data, pattern):
|
||||||
|
"""Match a pattern (XPath/CSS) against a page."""
|
||||||
|
if self.css:
|
||||||
|
return data.cssselect(pattern)
|
||||||
|
else:
|
||||||
|
return data.xpath(pattern, namespaces=NS)
|
||||||
|
|
||||||
def getDisabledReasons(self):
|
def getDisabledReasons(self):
|
||||||
res = {}
|
res = {}
|
||||||
if self.css and cssselect is None:
|
if self.css and cssselect is None:
|
||||||
|
|
|
@ -17,7 +17,6 @@ classifiers = [
|
||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.7",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
|
@ -27,15 +26,13 @@ classifiers = [
|
||||||
"Topic :: Multimedia :: Graphics",
|
"Topic :: Multimedia :: Graphics",
|
||||||
]
|
]
|
||||||
keywords = ["comic", "webcomic", "downloader", "archiver", "crawler"]
|
keywords = ["comic", "webcomic", "downloader", "archiver", "crawler"]
|
||||||
requires-python = ">=3.7"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"colorama",
|
"colorama",
|
||||||
"imagesize",
|
"imagesize",
|
||||||
"lxml>=4.0.0",
|
"lxml>=4.0.0",
|
||||||
"platformdirs",
|
"platformdirs",
|
||||||
"requests>=2.0",
|
"requests>=2.0",
|
||||||
"cached_property;python_version<'3.8'",
|
|
||||||
"importlib_metadata;python_version<'3.8'",
|
|
||||||
"importlib_resources>=5.0.0;python_version<'3.9'",
|
"importlib_resources>=5.0.0;python_version<'3.9'",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
@ -101,7 +98,7 @@ ignore = [
|
||||||
]
|
]
|
||||||
noqa-require-code = true
|
noqa-require-code = true
|
||||||
no-accept-encodings = true
|
no-accept-encodings = true
|
||||||
min-version = "3.7"
|
min-version = "3.8"
|
||||||
extend-exclude = [
|
extend-exclude = [
|
||||||
'.venv',
|
'.venv',
|
||||||
'build',
|
'build',
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019 Thomas W. Littauer
|
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||||
"""
|
"""
|
||||||
Script to get a list of comicskingdom.com comics and save the info in a JSON
|
Script to get a list of comicskingdom.com comics and save the info in a JSON
|
||||||
file for further processing.
|
file for further processing.
|
||||||
|
@ -19,39 +19,17 @@ class ComicsKingdomUpdater(ComicListUpdater):
|
||||||
"ComicGenesis/%s",
|
"ComicGenesis/%s",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_startpage(self, page):
|
def handle_listing(self, page):
|
||||||
"""Parse list of comics from the bottom of the start page."""
|
for link in page.xpath('//ul[d:class("index")]//a', namespaces=NS):
|
||||||
for li in page.xpath('//div[d:class("comics-list")]//li', namespaces=NS):
|
name = link.text_content().removeprefix('The ')
|
||||||
link = li.xpath('./a')[0]
|
|
||||||
url = link.attrib['href']
|
url = link.attrib['href']
|
||||||
name = link.text.removeprefix('The ')
|
lang = 'es' if ' (Spanish)' in name else None
|
||||||
|
|
||||||
self.add_comic(name, (url, None))
|
self.add_comic(name, (url, lang))
|
||||||
|
|
||||||
def handle_listing(self, page, lang: str = None, add: str = ''):
|
|
||||||
|
|
||||||
hasnew = True
|
|
||||||
while hasnew:
|
|
||||||
hasnew = False
|
|
||||||
for comicdiv in page.xpath('//div[d:class("tile")]', namespaces=NS):
|
|
||||||
nametag = comicdiv.xpath('./a/comic-name')
|
|
||||||
if len(nametag) == 0:
|
|
||||||
continue
|
|
||||||
name = nametag[0].text.removeprefix('The ') + add
|
|
||||||
url = comicdiv.xpath('./a')[0].attrib['href']
|
|
||||||
|
|
||||||
if self.add_comic(name, (url, lang)):
|
|
||||||
hasnew = True
|
|
||||||
|
|
||||||
nextlink = page.xpath('//a[./img[contains(@src, "page-right")]]')
|
|
||||||
page = self.get_url(nextlink[0].attrib['href'])
|
|
||||||
|
|
||||||
def collect_results(self):
|
def collect_results(self):
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
page = self.get_url('https://www.comicskingdom.com/')
|
self.handle_listing(self.get_url('https://comicskingdom.com/features'))
|
||||||
self.handle_startpage(page)
|
|
||||||
self.handle_listing(page)
|
|
||||||
self.handle_listing(self.get_url('https://www.comicskingdom.com/spanish'), 'es', 'Spanish')
|
|
||||||
|
|
||||||
def get_entry(self, name: str, data: tuple[str, str]):
|
def get_entry(self, name: str, data: tuple[str, str]):
|
||||||
opt = f", lang='{data[1]}'" if data[1] else ''
|
opt = f", lang='{data[1]}'" if data[1] else ''
|
||||||
|
|
|
@ -1,28 +1,30 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2017-2020 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2017 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
import re
|
||||||
|
from importlib import metadata
|
||||||
|
|
||||||
# Idea from
|
# Idea from
|
||||||
# https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Setuptools-Entry-Point,
|
# https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Setuptools-Entry-Point,
|
||||||
# but with importlib
|
# but with importlib
|
||||||
def Entrypoint(group, name, **kwargs):
|
def entrypoint(group, name, **kwargs):
|
||||||
import re
|
|
||||||
try:
|
|
||||||
from importlib.metadata import entry_points
|
|
||||||
except ImportError:
|
|
||||||
from importlib_metadata import entry_points
|
|
||||||
|
|
||||||
# get the entry point
|
# get the entry point
|
||||||
eps = entry_points()[group]
|
eps = metadata.entry_points()
|
||||||
ep = next(ep for ep in eps if ep.name == name)
|
if 'select' in dir(eps):
|
||||||
module, attr = re.split(r'\s*:\s*', ep.value, 1)
|
# modern
|
||||||
|
ep = eps.select(group=group)[name]
|
||||||
|
else:
|
||||||
|
# legacy (pre-3.10)
|
||||||
|
ep = next(ep for ep in eps[group] if ep.name == name)
|
||||||
|
module, attr = re.split(r'\s*:\s*', ep.value, maxsplit=1)
|
||||||
|
|
||||||
# script name must not be a valid module name to avoid name clashes on import
|
# script name must not be a valid module name to avoid name clashes on import
|
||||||
script_path = os.path.join(workpath, name + '-script.py')
|
script_path = os.path.join(workpath, name + '-script.py')
|
||||||
print("creating script for entry point", group, name)
|
print("creating script for entry point", group, name)
|
||||||
with open(script_path, 'w') as fh:
|
with open(script_path, mode='w', encoding='utf-8') as fh:
|
||||||
print("import sys", file=fh)
|
print("import sys", file=fh)
|
||||||
print("import", module, file=fh)
|
print("import", module, file=fh)
|
||||||
print("sys.exit(%s.%s())" % (module, attr), file=fh)
|
print(f"sys.exit({module}.{attr}())", file=fh)
|
||||||
|
|
||||||
return Analysis(
|
return Analysis(
|
||||||
[script_path] + kwargs.get('scripts', []),
|
[script_path] + kwargs.get('scripts', []),
|
||||||
|
@ -30,7 +32,7 @@ def Entrypoint(group, name, **kwargs):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
a = Entrypoint('console_scripts', 'dosage')
|
a = entrypoint('console_scripts', 'dosage')
|
||||||
|
|
||||||
a.binaries = [x for x in a.binaries if not x[1].lower().startswith(r'c:\windows')]
|
a.binaries = [x for x in a.binaries if not x[1].lower().startswith(r'c:\windows')]
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
"""
|
"""
|
||||||
Script to get a list of gocomics and save the info in a JSON file for further
|
Script to get a list of gocomics and save the info in a JSON file for further
|
||||||
processing.
|
processing.
|
||||||
|
@ -20,6 +20,8 @@ class GoComicsUpdater(ComicListUpdater):
|
||||||
excluded_comics = (
|
excluded_comics = (
|
||||||
# too short
|
# too short
|
||||||
'LukeyMcGarrysTLDR',
|
'LukeyMcGarrysTLDR',
|
||||||
|
# Has its own module
|
||||||
|
'Widdershins',
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_gocomics(self, url, outercss='a.gc-blended-link', lang=None):
|
def handle_gocomics(self, url, outercss='a.gc-blended-link', lang=None):
|
||||||
|
|
|
@ -61,6 +61,9 @@ def create_symlinks(d):
|
||||||
else:
|
else:
|
||||||
order.extend(data["pages"][work]["images"].values())
|
order.extend(data["pages"][work]["images"].values())
|
||||||
if "prev" in data["pages"][work]:
|
if "prev" in data["pages"][work]:
|
||||||
|
if data["pages"][work]["prev"] == work:
|
||||||
|
work = None
|
||||||
|
else:
|
||||||
work = data["pages"][work]["prev"]
|
work = data["pages"][work]["prev"]
|
||||||
else:
|
else:
|
||||||
work = None
|
work = None
|
||||||
|
|
|
@ -3,12 +3,15 @@
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
import re
|
import re
|
||||||
|
from operator import attrgetter
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from dosagelib.scraper import scrapers
|
from dosagelib.scraper import scrapers
|
||||||
from dosagelib.plugins import old
|
from dosagelib.plugins import old
|
||||||
|
|
||||||
|
|
||||||
class TestComicNames(object):
|
class TestComicNames:
|
||||||
|
|
||||||
def test_names(self):
|
def test_names(self):
|
||||||
for scraperobj in scrapers.all():
|
for scraperobj in scrapers.all():
|
||||||
|
@ -20,10 +23,10 @@ class TestComicNames(object):
|
||||||
comicname = name
|
comicname = name
|
||||||
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
||||||
|
|
||||||
def test_renamed(self):
|
@pytest.mark.parametrize(('scraperobj'),
|
||||||
for scraperobj in scrapers.all(include_removed=True):
|
[obj for obj in scrapers.all(include_removed=True)
|
||||||
if not isinstance(scraperobj, old.Renamed):
|
if isinstance(obj, old.Renamed)], ids=attrgetter('name'))
|
||||||
continue
|
def test_renamed(self, scraperobj):
|
||||||
assert len(scraperobj.getDisabledReasons()) > 0
|
assert len(scraperobj.getDisabledReasons()) > 0
|
||||||
# Renamed scraper should only point to an non-disabled scraper
|
# Renamed scraper should only point to an non-disabled scraper
|
||||||
newscraper = scrapers.find(scraperobj.newname)
|
newscraper = scrapers.find(scraperobj.newname)
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
||||||
|
|
||||||
|
|
||||||
class TestNamer(object):
|
class TestNamer:
|
||||||
"""
|
"""
|
||||||
Tests for comic namer.
|
Tests for comic namer.
|
||||||
"""
|
"""
|
||||||
|
@ -16,6 +16,8 @@ class TestNamer(object):
|
||||||
def test_joinPathPartsNamer(self):
|
def test_joinPathPartsNamer(self):
|
||||||
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
||||||
pageurl = 'https://HOST/2019/03/11/12450/'
|
pageurl = 'https://HOST/2019/03/11/12450/'
|
||||||
assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self,
|
||||||
assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||||
assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450'
|
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self,
|
||||||
|
imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||||
|
assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450'
|
||||||
|
|
3
tox.ini
3
tox.ini
|
@ -1,10 +1,9 @@
|
||||||
[tox]
|
[tox]
|
||||||
envlist = py37, py38, py39, py310, py311, py312, flake8
|
envlist = py38, py39, py310, py311, py312, flake8
|
||||||
isolated_build = True
|
isolated_build = True
|
||||||
|
|
||||||
[gh-actions]
|
[gh-actions]
|
||||||
python =
|
python =
|
||||||
3.7: py37
|
|
||||||
3.8: py38
|
3.8: py38
|
||||||
3.9: py39
|
3.9: py39
|
||||||
3.10: py310
|
3.10: py310
|
||||||
|
|
Loading…
Reference in a new issue