Compare commits
45 commits
6c00cdc111
...
bf9e7d2760
Author | SHA1 | Date | |
---|---|---|---|
bf9e7d2760 | |||
|
df1e72e90a | ||
|
8984e9a2b5 | ||
|
b32e67fce8 | ||
|
e7858373f6 | ||
|
6024b2a01b | ||
|
8afe7d1cdc | ||
|
2b7ca3f30c | ||
|
5391b8518f | ||
|
284efdc212 | ||
|
76d5180b49 | ||
|
6e138a0228 | ||
|
50a656bb6f | ||
|
f87526738c | ||
|
f4f45945d0 | ||
|
7d7166af6e | ||
|
dce299903b | ||
|
0bfcd32385 | ||
|
f63b899bb4 | ||
|
38f4dd0ed1 | ||
|
23125c74d4 | ||
|
2e912bcd2c | ||
|
cfe5738151 | ||
|
32b0dfef35 | ||
|
48eb4ef204 | ||
|
05b9be4cd9 | ||
|
da60636b8a | ||
|
3722fbe7e4 | ||
|
15423eab21 | ||
|
7b9ca867fb | ||
|
ee22169cc5 | ||
|
e2b3beac2e | ||
|
f76061e138 | ||
|
b495c51bcb | ||
|
89b38d450f | ||
|
3c203dae72 | ||
|
ea2bad5500 | ||
|
aa50afdbf7 | ||
|
6f6b4d6603 | ||
|
b4bcb65249 | ||
|
b3da06b270 | ||
|
17f7c53e53 | ||
|
7517b2fef8 | ||
|
74ffa7533f | ||
|
321d7d0a5a |
44 changed files with 517 additions and 568 deletions
6
.github/workflows/ci.yaml
vendored
6
.github/workflows/ci.yaml
vendored
|
@ -13,7 +13,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -32,7 +32,7 @@ jobs:
|
|||
if: ${{ matrix.python-version != env.DEFAULT_PYTHON }}
|
||||
|
||||
- name: Test with tox (and upload coverage)
|
||||
uses: paambaati/codeclimate-action@v5.0.0
|
||||
uses: paambaati/codeclimate-action@v8.0.0
|
||||
if: ${{ matrix.python-version == env.DEFAULT_PYTHON }}
|
||||
env:
|
||||
CC_TEST_REPORTER_ID: 2a411f596959fc32f5d73f3ba7cef8cc4d5733299d742dbfc97fd6c190b9010c
|
||||
|
@ -42,6 +42,6 @@ jobs:
|
|||
${{ github.workspace }}/.tox/reports/*/coverage.xml:coverage.py
|
||||
prefix: ${{ github.workspace }}/.tox/py39/lib/python3.9/site-packages
|
||||
|
||||
- uses: codecov/codecov-action@v3
|
||||
- uses: codecov/codecov-action@v4
|
||||
with:
|
||||
directory: '.tox/reports'
|
||||
|
|
35
.github/workflows/pages.yml
vendored
35
.github/workflows/pages.yml
vendored
|
@ -5,12 +5,19 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- master
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: "pages"
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -28,10 +35,24 @@ jobs:
|
|||
pip install wheel
|
||||
pip install git+https://github.com/spanezz/staticsite.git@v2.3
|
||||
ssite build --output public
|
||||
cd public
|
||||
rm -rf Jenkinsfile dosagelib scripts tests
|
||||
|
||||
- name: Deploy
|
||||
uses: peaceiris/actions-gh-pages@v3
|
||||
- name: Setup Pages
|
||||
id: pages
|
||||
uses: actions/configure-pages@v5
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
cname: dosage.rocks
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
exclude_assets: 'Jenkinsfile,dosagelib,scripts,setup.*,tests,*.ini'
|
||||
path: public
|
||||
|
||||
deploy:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
steps:
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
||||
|
|
2
COPYING
2
COPYING
|
@ -1,6 +1,6 @@
|
|||
Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
Copyright (C) 2015-2024 Tobias Gruetzmacher
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
|
|
3
Jenkinsfile
vendored
3
Jenkinsfile
vendored
|
@ -4,7 +4,6 @@ def pys = [
|
|||
[name: 'Python 3.10', docker: '3.10-bookworm', tox:'py310', main: false],
|
||||
[name: 'Python 3.9', docker: '3.9-bookworm', tox:'py39', main: false],
|
||||
[name: 'Python 3.8', docker: '3.8-bookworm', tox:'py38', main: false],
|
||||
[name: 'Python 3.7', docker: '3.7-bookworm', tox:'py37', main: false],
|
||||
]
|
||||
|
||||
properties([
|
||||
|
@ -75,7 +74,7 @@ pys.each { py ->
|
|||
parallel(tasks)
|
||||
parallel modern: {
|
||||
stage('Modern Windows binary') {
|
||||
windowsBuild('3.11', 'dosage.exe')
|
||||
windowsBuild('3.12', 'dosage.exe')
|
||||
}
|
||||
},
|
||||
legacy: {
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
# Dosage
|
||||
|
||||
[![Tests](https://github.com/webcomics/dosage/actions/workflows/test.yml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/test.yml)
|
||||
[![CI](https://github.com/webcomics/dosage/actions/workflows/ci.yaml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/ci.yaml)
|
||||
[![Code Climate](https://codeclimate.com/github/webcomics/dosage/badges/gpa.svg)](https://codeclimate.com/github/webcomics/dosage)
|
||||
[![codecov](https://codecov.io/gh/webcomics/dosage/branch/master/graph/badge.svg)](https://codecov.io/gh/webcomics/dosage)
|
||||
![Maintenance](https://img.shields.io/maintenance/yes/2023.svg)
|
||||
![Maintenance](https://img.shields.io/maintenance/yes/2024.svg)
|
||||
![License](https://img.shields.io/github/license/webcomics/dosage)
|
||||
|
||||
Dosage is designed to keep a local copy of specific webcomics and other
|
||||
|
@ -72,7 +72,7 @@ are old enough to view them.
|
|||
### Dependencies
|
||||
|
||||
Since dosage is written in [Python](http://www.python.org/), a Python
|
||||
installation is required: Dosage needs at least Python 3.7. Dosage requires
|
||||
installation is required: Dosage needs at least Python 3.8. Dosage requires
|
||||
some Python modules from PyPI, so installation with `pip` is recommended.
|
||||
|
||||
### Using the Windows binary
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
"""
|
||||
Automated comic downloader. Dosage traverses comic websites in
|
||||
order to download each strip of the comic. The intended use is for
|
||||
|
@ -14,14 +14,11 @@ The primary interface is the 'dosage' commandline script.
|
|||
Comic modules for each comic are located in L{dosagelib.plugins}.
|
||||
"""
|
||||
|
||||
try:
|
||||
from importlib.metadata import version, PackageNotFoundError
|
||||
except ImportError:
|
||||
from importlib_metadata import version, PackageNotFoundError
|
||||
from importlib.metadata import version, PackageNotFoundError
|
||||
|
||||
from .output import out
|
||||
|
||||
AppName = u'dosage'
|
||||
AppName = 'dosage'
|
||||
try:
|
||||
__version__ = version(AppName) # PEP 396
|
||||
except PackageNotFoundError:
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import glob
|
||||
import codecs
|
||||
import contextlib
|
||||
from datetime import datetime
|
||||
from typing import Iterator
|
||||
|
||||
from .output import out
|
||||
from .util import unquote, getFilename, urlopen, strsize
|
||||
|
@ -14,27 +17,27 @@ from .events import getHandler
|
|||
|
||||
|
||||
# Maximum content size for images
|
||||
MaxImageBytes = 1024 * 1024 * 20 # 20 MB
|
||||
MAX_IMAGE_BYTES = 1024 * 1024 * 20 # 20 MB
|
||||
# RFC 1123 format, as preferred by RFC 2616
|
||||
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
|
||||
|
||||
|
||||
class ComicStrip(object):
|
||||
class ComicStrip:
|
||||
"""A list of comic image URLs."""
|
||||
|
||||
def __init__(self, scraper, strip_url, image_urls, text=None):
|
||||
def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None:
|
||||
"""Store the image URL list."""
|
||||
self.scraper = scraper
|
||||
self.strip_url = strip_url
|
||||
self.image_urls = image_urls
|
||||
self.text = text
|
||||
|
||||
def getImages(self):
|
||||
def getImages(self) -> Iterator[ComicImage]:
|
||||
"""Get a list of image downloaders."""
|
||||
for image_url in self.image_urls:
|
||||
yield self.getDownloader(image_url)
|
||||
|
||||
def getDownloader(self, url):
|
||||
def getDownloader(self, url: str) -> ComicImage:
|
||||
"""Get an image downloader."""
|
||||
filename = self.scraper.namer(url, self.strip_url)
|
||||
if filename is None:
|
||||
|
@ -43,7 +46,7 @@ class ComicStrip(object):
|
|||
text=self.text)
|
||||
|
||||
|
||||
class ComicImage(object):
|
||||
class ComicImage:
|
||||
"""A comic image downloader."""
|
||||
|
||||
ChunkBytes = 1024 * 100 # 100KB
|
||||
|
@ -64,7 +67,7 @@ class ComicImage(object):
|
|||
headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR)
|
||||
self.urlobj = urlopen(self.url, self.scraper.session,
|
||||
referrer=self.referrer,
|
||||
max_content_bytes=MaxImageBytes, stream=True,
|
||||
max_content_bytes=MAX_IMAGE_BYTES, stream=True,
|
||||
headers=headers)
|
||||
if self.urlobj.status_code == 304: # Not modified
|
||||
return
|
||||
|
|
|
@ -1,39 +1,49 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
from .util import getQueryParams
|
||||
from .scraper import Scraper
|
||||
|
||||
|
||||
def queryNamer(param, use_page_url=False):
|
||||
class Namer(Protocol):
|
||||
"""A protocol for generic callbacks to name web comic images."""
|
||||
def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||
...
|
||||
|
||||
|
||||
def queryNamer(param, use_page_url=False) -> Namer:
|
||||
"""Get name from URL query part."""
|
||||
def _namer(self, image_url, page_url):
|
||||
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||
"""Get URL query part."""
|
||||
url = page_url if use_page_url else image_url
|
||||
return getQueryParams(url)[param][0]
|
||||
return _namer
|
||||
|
||||
|
||||
def regexNamer(regex, use_page_url=False):
|
||||
def regexNamer(regex, use_page_url=False) -> Namer:
|
||||
"""Get name from regular expression."""
|
||||
def _namer(self, image_url, page_url):
|
||||
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||
"""Get first regular expression group."""
|
||||
url = page_url if use_page_url else image_url
|
||||
mo = regex.search(url)
|
||||
if mo:
|
||||
return mo.group(1)
|
||||
return mo.group(1) if mo else None
|
||||
return _namer
|
||||
|
||||
|
||||
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
||||
def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer:
|
||||
"""Get name by mashing path parts together with underscores."""
|
||||
def _namer(self, imageurl, pageurl):
|
||||
def _namer(self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||
# Split and drop host name
|
||||
pageurlsplit = pageurl.split('/')[3:]
|
||||
imageurlsplit = imageurl.split('/')[3:]
|
||||
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
||||
[imageurlsplit[i] for i in imageurlparts])
|
||||
pagesplit = page_url.split('/')[3:]
|
||||
imagesplit = image_url.split('/')[3:]
|
||||
joinparts = ([pagesplit[i] for i in pageparts] +
|
||||
[imagesplit[i] for i in imageparts])
|
||||
return joinchar.join(joinparts)
|
||||
return _namer
|
||||
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape, sub, MULTILINE
|
||||
|
||||
from ..util import tagre
|
||||
from ..scraper import BasicScraper, ParserScraper, _BasicScraper, _ParserScraper
|
||||
from ..helpers import regexNamer, bounceStarter, indirectStarter
|
||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||
from ..helpers import joinPathPartsNamer, bounceStarter, indirectStarter
|
||||
from .common import WordPressScraper, WordPressNavi, WordPressWebcomic
|
||||
|
||||
|
||||
class AbstruseGoose(_ParserScraper):
|
||||
url = 'https://abstrusegoose.com/'
|
||||
class AbstruseGoose(ParserScraper):
|
||||
url = 'https://web.archive.org/web/20230930172141/https://abstrusegoose.com/'
|
||||
starter = bounceStarter
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
@ -41,24 +41,16 @@ class AbsurdNotions(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class AcademyVale(_BasicScraper):
|
||||
url = 'http://www.imagerie.com/vale/'
|
||||
stripUrl = url + 'avarch.cgi?%s'
|
||||
firstStripUrl = stripUrl % '001'
|
||||
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
|
||||
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") +
|
||||
tagre('img', 'src', r'AVNavBack\.gif'))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Achewood(_ParserScraper):
|
||||
url = 'https://www.achewood.com/'
|
||||
stripUrl = url + 'index.php?date=%s'
|
||||
firstStripUrl = stripUrl % '10012001'
|
||||
imageSearch = '//p[@id="comic_body"]//img'
|
||||
prevSearch = '//span[d:class("left")]/a[d:class("dateNav")]'
|
||||
help = 'Index format: mmddyyyy'
|
||||
namer = regexNamer(compile(r'date=(\d+)'))
|
||||
class Achewood(ParserScraper):
|
||||
baseUrl = 'https://achewood.com/'
|
||||
stripUrl = baseUrl + '%s/title.html'
|
||||
url = stripUrl % '2016/12/25'
|
||||
firstStripUrl = stripUrl % '2001/10/01'
|
||||
imageSearch = '//img[d:class("comicImage")]'
|
||||
prevSearch = '//a[d:class("comic_prev")]'
|
||||
namer = joinPathPartsNamer(pageparts=range(0, 2))
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class AdventuresOfFifne(_ParserScraper):
|
||||
|
@ -117,12 +109,8 @@ class AhoiPolloi(_ParserScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class AhoyEarth(WordPressNavi):
|
||||
url = 'http://www.ahoyearth.com/'
|
||||
|
||||
|
||||
class AirForceBlues(WordPressScraper):
|
||||
url = 'http://farvatoons.com/'
|
||||
url = 'https://web.archive.org/web/20210102113825/http://farvatoons.com/'
|
||||
firstStripUrl = url + 'comic/in-texas-there-are-texans/'
|
||||
|
||||
|
||||
|
@ -235,14 +223,11 @@ class AltermetaOld(_ParserScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class AmazingSuperPowers(_BasicScraper):
|
||||
url = 'http://www.amazingsuperpowers.com/'
|
||||
rurl = escape(url)
|
||||
class AmazingSuperPowers(WordPressNavi):
|
||||
url = 'https://www.amazingsuperpowers.com/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2007/09/heredity'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||
help = 'Index format: yyyy/mm/name'
|
||||
imageSearch = '//div[d:class("comicpane")]/img'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
|
@ -271,19 +256,7 @@ class Amya(WordPressScraper):
|
|||
url = 'http://www.amyachronicles.com/'
|
||||
|
||||
|
||||
class Anaria(_ParserScraper):
|
||||
url = 'https://www.leahbriere.com/anaria-the-witchs-dream/'
|
||||
firstStripUrl = url
|
||||
imageSearch = '//div[contains(@class, "gallery")]//a'
|
||||
multipleImagesPerStrip = True
|
||||
endOfLife = True
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
filename = imageUrl.rsplit('/', 1)[-1]
|
||||
return filename.replace('00.jpg', 'new00.jpg').replace('new', '1')
|
||||
|
||||
|
||||
class Angband(_ParserScraper):
|
||||
class Angband(ParserScraper):
|
||||
url = 'http://angband.calamarain.net/'
|
||||
stripUrl = url + '%s'
|
||||
imageSearch = '//img'
|
||||
|
@ -292,7 +265,7 @@ class Angband(_ParserScraper):
|
|||
|
||||
def starter(self):
|
||||
page = self.getPage(self.url)
|
||||
self.pages = page.xpath('//p/a[not(contains(@href, "cast"))]/@href')
|
||||
self.pages = self.match(page, '//p/a[not(contains(@href, "cast"))]/@href')
|
||||
self.firstStripUrl = self.pages[0]
|
||||
return self.pages[-1]
|
||||
|
||||
|
@ -300,14 +273,6 @@ class Angband(_ParserScraper):
|
|||
return self.pages[self.pages.index(url) - 1]
|
||||
|
||||
|
||||
class Angels2200(_BasicScraper):
|
||||
url = 'http://www.janahoffmann.com/angels/'
|
||||
stripUrl = url + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'"))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous")
|
||||
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
|
||||
|
||||
|
||||
class Annyseed(_ParserScraper):
|
||||
baseUrl = ('https://web.archive.org/web/20190511031451/'
|
||||
'http://www.mirrorwoodcomics.com/')
|
||||
|
@ -330,7 +295,7 @@ class Annyseed(_ParserScraper):
|
|||
return tourl
|
||||
|
||||
|
||||
class AntiheroForHire(_ParserScraper):
|
||||
class AntiheroForHire(ParserScraper):
|
||||
stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s'
|
||||
firstStripUrl = stripUrl % '2016/6/8/entrance-vigil'
|
||||
url = firstStripUrl
|
||||
|
@ -341,7 +306,7 @@ class AntiheroForHire(_ParserScraper):
|
|||
def starter(self):
|
||||
# Build list of chapters for navigation
|
||||
page = self.getPage(self.url)
|
||||
self.chapters = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]/@href')
|
||||
self.chapters = self.match(page, '//ul[d:class("archive-group-list")]//a[d:class("archive-item-link")]/@href')
|
||||
return self.chapters[0]
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
|
@ -377,7 +342,7 @@ class ArtificialIncident(WordPressWebcomic):
|
|||
firstStripUrl = stripUrl % 'issue-one-life-changing'
|
||||
|
||||
|
||||
class AstronomyPOTD(_ParserScraper):
|
||||
class AstronomyPOTD(ParserScraper):
|
||||
baseUrl = 'http://apod.nasa.gov/apod/'
|
||||
url = baseUrl + 'astropix.html'
|
||||
starter = bounceStarter
|
||||
|
@ -391,7 +356,7 @@ class AstronomyPOTD(_ParserScraper):
|
|||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return data.xpath('//iframe') # videos
|
||||
return self.match(data, '//iframe') # videos
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
||||
|
|
|
@ -34,11 +34,11 @@ class CaptainSNES(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
||||
|
||||
|
||||
class CarryOn(_ParserScraper):
|
||||
class CarryOn(ParserScraper):
|
||||
url = 'http://www.hirezfox.com/km/co/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
firstStripUrl = stripUrl % '20040701'
|
||||
imageSearch = '//div[@class="strip"]/img'
|
||||
imageSearch = '//div[d:class("strip")]/img'
|
||||
prevSearch = '//a[text()="Previous Day"]'
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
|
@ -122,13 +122,13 @@ class CatAndGirl(_ParserScraper):
|
|||
prevSearch = '//a[d:class("pager--prev")]'
|
||||
|
||||
|
||||
class CatenaManor(_ParserScraper):
|
||||
class CatenaManor(ParserScraper):
|
||||
baseUrl = ('https://web.archive.org/web/20141027141116/'
|
||||
'http://catenamanor.com/')
|
||||
url = baseUrl + 'archives'
|
||||
stripUrl = baseUrl + '%s/'
|
||||
firstStripUrl = stripUrl % '2003/07'
|
||||
imageSearch = '//img[@class="comicthumbnail"]'
|
||||
imageSearch = '//img[d:class("comicthumbnail")]'
|
||||
multipleImagesPerStrip = True
|
||||
endOfLife = True
|
||||
strips: List[str] = []
|
||||
|
@ -136,7 +136,7 @@ class CatenaManor(_ParserScraper):
|
|||
def starter(self):
|
||||
# Retrieve archive links and select valid range
|
||||
archivePage = self.getPage(self.url)
|
||||
archiveStrips = archivePage.xpath('//div[@id="archivepage"]//a')
|
||||
archiveStrips = self.match(archivePage, '//div[@id="archivepage"]//a')
|
||||
valid = False
|
||||
for link in archiveStrips:
|
||||
if self.stripUrl % '2012/01' in link.get('href'):
|
||||
|
@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper):
|
|||
class CSectionComics(WordPressScraper):
|
||||
url = 'https://www.csectioncomics.com/'
|
||||
firstStripUrl = url + 'comics/one-day-in-country'
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
|
||||
|
@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper):
|
|||
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
||||
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
||||
starter = bounceStarter
|
||||
namer = joinPathPartsNamer((), range(-4, 0))
|
||||
namer = joinPathPartsNamer(imageparts=range(-4, 0))
|
||||
|
||||
|
||||
class CynWolf(_ParserScraper):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
import os
|
||||
|
||||
from ..scraper import ParserScraper
|
||||
|
@ -79,7 +79,7 @@ class ComicFury(ParserScraper):
|
|||
num = parts[-1]
|
||||
if self.multipleImagesPerStrip:
|
||||
page = self.getPage(pageUrl)
|
||||
images = page.xpath('//img[@class="comicsegmentimage"]/@src')
|
||||
images = self.match(page, '//img[d:class("comicsegmentimage")]/@src')
|
||||
if len(images) > 1:
|
||||
imageIndex = images.index(imageUrl) + 1
|
||||
return "%s_%s-%d%s" % (self.prefix, num, imageIndex, ext)
|
||||
|
@ -88,8 +88,8 @@ class ComicFury(ParserScraper):
|
|||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
# Videos on Underverse
|
||||
return (data.xpath('//div[@id="comicimagewrap"]//video') and
|
||||
not data.xpath('//div[@id="comicimagewrap"]//img'))
|
||||
return (self.match(data, '//div[@id="comicimagewrap"]//video') and
|
||||
not self.match(data, '//div[@id="comicimagewrap"]//img'))
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls): # noqa: CFQ001
|
||||
|
|
|
@ -1,41 +1,35 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||
try:
|
||||
from importlib_resources import as_file, files
|
||||
except ImportError:
|
||||
from importlib.resources import as_file, files
|
||||
|
||||
from ..helpers import bounceStarter, joinPathPartsNamer
|
||||
from ..helpers import indirectStarter
|
||||
from ..scraper import ParserScraper
|
||||
|
||||
|
||||
class ComicsKingdom(ParserScraper):
|
||||
imageSearch = '//img[@id="theComicImage"]'
|
||||
prevSearch = '//a[./img[contains(@alt, "Previous")]]'
|
||||
nextSearch = '//a[./img[contains(@alt, "Next")]]'
|
||||
starter = bounceStarter
|
||||
namer = joinPathPartsNamer((-2, -1), ())
|
||||
partDiv = '//div[d:class("comic-reader-item")]'
|
||||
imageSearch = '//meta[@property="og:image"]/@content'
|
||||
prevSearch = partDiv + '[2]/@data-link'
|
||||
starter = indirectStarter
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
def __init__(self, name, path, lang=None):
|
||||
super().__init__('ComicsKingdom/' + name)
|
||||
self.url = 'https://comicskingdom.com/' + path
|
||||
self.stripUrl = self.url + '/%s'
|
||||
self.latestSearch = f'//a[re:test(@href, "/{path}/[0-9-]+$")]'
|
||||
if lang:
|
||||
self.lang = lang
|
||||
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
return tourl.replace('//wp.', '//', 1)
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls): # noqa: CFQ001
|
||||
return (
|
||||
# Some comics are not listed on the "all" page (too old?)
|
||||
cls('Retail', 'retail'),
|
||||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/comicskingdom.py
|
||||
# START AUTOUPDATE
|
||||
cls('AmazingSpiderman', 'amazing-spider-man'),
|
||||
cls('AmazingSpidermanSpanish', 'hombre-arana', lang='es'),
|
||||
cls('Alice', 'alice'),
|
||||
cls('Apartment3G', 'apartment-3-g_1'),
|
||||
cls('ArcticCircle', 'arctic-circle'),
|
||||
cls('ATodaVelocidadSpanish', 'a-toda-velocidad', lang='es'),
|
||||
|
@ -43,22 +37,25 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('BarneyGoogleAndSnuffySmithSpanish', 'tapon', lang='es'),
|
||||
cls('BeetleBailey', 'beetle-bailey-1'),
|
||||
cls('BeetleBaileySpanish', 'beto-el-recluta', lang='es'),
|
||||
cls('BeetleMoses', 'beetle-moses'),
|
||||
cls('BetweenFriends', 'between-friends'),
|
||||
cls('BewareOfToddler', 'beware-of-toddler'),
|
||||
cls('BigBenBolt', 'big-ben-bolt'),
|
||||
cls('BigBenBoltSundays', 'big-ben-bolt-sundays'),
|
||||
cls('Bizarro', 'bizarro'),
|
||||
cls('Blondie', 'blondie'),
|
||||
cls('BlondieSpanish', 'pepita', lang='es'),
|
||||
cls('BobMankoffPresentsShowMeTheFunny', 'show-me-the-funny'),
|
||||
cls('BobMankoffPresentsShowMeTheFunnyAnimalEdition', 'show-me-the-funny-pets'),
|
||||
cls('BonersArk', 'boners-ark'),
|
||||
cls('BonersArkSundays', 'boners-ark-sundays'),
|
||||
cls('BrianDuffy', 'brian-duffy'),
|
||||
cls('BreakOfDay', 'break-of-day'),
|
||||
cls('BrickBradford', 'brick-bradford'),
|
||||
cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'),
|
||||
cls('BringingUpFather', 'bringing-up-father'),
|
||||
cls('BringingUpFatherSpanish', 'educando-a-papa', lang='es'),
|
||||
cls('BuzSawyer', 'buz-sawyer'),
|
||||
cls('Candorville', 'candorville'),
|
||||
cls('CarpeDiem', 'carpe-diem'),
|
||||
cls('Crankshaft', 'crankshaft'),
|
||||
cls('Comiclicious', 'comiclicious'),
|
||||
cls('Crock', 'crock'),
|
||||
cls('CrockSpanish', 'crock-spanish', lang='es'),
|
||||
cls('Curtis', 'curtis'),
|
||||
|
@ -67,6 +64,7 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('DavidMHitch', 'david-m-hitch'),
|
||||
cls('DennisTheMenace', 'dennis-the-menace'),
|
||||
cls('DennisTheMenaceSpanish', 'daniel-el-travieso', lang='es'),
|
||||
cls('Dumplings', 'dumplings'),
|
||||
cls('Dustin', 'dustin'),
|
||||
cls('EdGamble', 'ed-gamble'),
|
||||
# EdgeCity has a duplicate in GoComics/EdgeCity
|
||||
|
@ -74,18 +72,15 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('FamilyCircusSpanish', 'circulo-familiar', lang='es'),
|
||||
cls('FlashForward', 'flash-forward'),
|
||||
cls('FlashGordon', 'flash-gordon'),
|
||||
cls('FlashGordonSundays', 'flash-gordon-sundays'),
|
||||
cls('FunkyWinkerbean', 'funky-winkerbean'),
|
||||
cls('FunkyWinkerbeanSunday', 'funky-winkerbean-sundays'),
|
||||
cls('FunkyWinkerbeanVintage', 'funky-winkerbean-1'),
|
||||
cls('FunnyOnlineAnimals', 'Funny-Online-Animals'),
|
||||
cls('GearheadGertie', 'Gearhead-Gertie'),
|
||||
cls('FunnyOnlineAnimals', 'funny-online-animals'),
|
||||
cls('GearheadGertie', 'gearhead-gertie'),
|
||||
cls('GodsHands', 'gods-hands'),
|
||||
cls('HagarTheHorrible', 'hagar-the-horrible'),
|
||||
cls('HagarTheHorribleSpanish', 'olafo', lang='es'),
|
||||
cls('HeartOfJulietJones', 'heart-of-juliet-jones'),
|
||||
cls('HeartOfJulietJonesSundays', 'heart-of-juliet-jones-sundays'),
|
||||
cls('HiAndLois', 'hi-and-lois'),
|
||||
cls('IntelligentLife', 'Intelligent'),
|
||||
cls('InsanityStreak', 'insanity-streak'),
|
||||
cls('IntelligentLife', 'intelligent'),
|
||||
cls('JimmyMargulies', 'jimmy-margulies'),
|
||||
cls('JohnBranch', 'john-branch'),
|
||||
cls('JohnnyHazard', 'johnny-hazard'),
|
||||
|
@ -93,7 +88,6 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('JungleJimSundays', 'jungle-jim-sundays'),
|
||||
cls('KatzenjammerKids', 'katzenjammer-kids'),
|
||||
cls('KatzenjammerKidsSpanish', 'maldades-de-dos-pilluelos', lang='es'),
|
||||
cls('KatzenjammerKidsSundays', 'katzenjammer-kids-sundays'),
|
||||
cls('KevinAndKell', 'kevin-and-kell'),
|
||||
cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'),
|
||||
cls('KirkWalters', 'kirk-walters'),
|
||||
|
@ -101,44 +95,42 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('LaloYLolaSpanish', 'lalo-y-lola', lang='es'),
|
||||
cls('LeeJudge', 'lee-judge'),
|
||||
cls('LegalizationNation', 'legalization-nation'),
|
||||
cls('LegendOfBill', 'Legend-of-Bill'),
|
||||
cls('LegendOfBill', 'legend-of-bill'),
|
||||
cls('LittleIodineSundays', 'little-iodine-sundays'),
|
||||
cls('LittleKing', 'the-little-king'),
|
||||
cls('Lockhorns', 'lockhorns'),
|
||||
cls('Macanudo', 'Macanudo'),
|
||||
cls('Macanudo', 'macanudo'),
|
||||
cls('MacanudoSpanish', 'macanudo-spanish', lang='es'),
|
||||
cls('MallardFillmore', 'mallard-fillmore'),
|
||||
cls('MandrakeTheMagician', 'mandrake-the-magician-1'),
|
||||
cls('MandrakeTheMagician', 'mandrake-the-magician'),
|
||||
cls('MandrakeTheMagicianSpanish', 'mandrake-the-magician-spanish', lang='es'),
|
||||
cls('MandrakeTheMagicianSundays', 'mandrake-the-magician-sundays'),
|
||||
cls('MaraLlaveKeeperOfTime', 'mara-llave-keeper-of-time'),
|
||||
cls('MarkTrail', 'mark-trail'),
|
||||
cls('MarkTrailSpanish', 'mark-trail-spanish', lang='es'),
|
||||
cls('MarkTrailVintage', 'Mark-Trail-Vintage'),
|
||||
cls('Marvin', 'marvin'),
|
||||
cls('MarvinSpanish', 'marvin-spanish', lang='es'),
|
||||
cls('MaryWorth', 'mary-worth'),
|
||||
cls('MaryWorthSpanish', 'maria-de-oro', lang='es'),
|
||||
cls('MikePeters', 'mike-peters'),
|
||||
cls('Mazetoons', 'mazetoons'),
|
||||
cls('MikeShelton', 'mike-shelton'),
|
||||
cls('MikeSmith', 'mike-smith'),
|
||||
cls('MooseAndMolly', 'moose-and-molly'),
|
||||
cls('MooseAndMollySpanish', 'quintin', lang='es'),
|
||||
cls('MotherGooseAndGrimm', 'mother-goose-grimm'),
|
||||
cls('MrAbernathySpanish', 'don-abundio', lang='es'),
|
||||
cls('Mutts', 'mutts'),
|
||||
cls('MuttsSpanish', 'motas', lang='es'),
|
||||
cls('NeverBeenDeader', 'never-been-deader'),
|
||||
cls('OfficeHours', 'office-hours'),
|
||||
cls('OliveAndPopeye', 'olive-popeye'),
|
||||
cls('OnTheFastrack', 'on-the-fastrack'),
|
||||
cls('PajamaDiaries', 'pajama-diaries'),
|
||||
cls('PardonMyPlanet', 'pardon-my-planet'),
|
||||
cls('Phantom', 'phantom'),
|
||||
cls('PhantomSpanish', 'el-fantasma', lang='es'),
|
||||
cls('PhantomSundays', 'phantom-sundays'),
|
||||
cls('PlanetSyndicate', 'the_planet_syndicate'),
|
||||
cls('Popeye', 'popeye'),
|
||||
cls('PopeyesCartoonClub', 'popeyes-cartoon-club'),
|
||||
cls('PopeyeSpanish', 'popeye-spanish', lang='es'),
|
||||
cls('PrinceValiant', 'prince-valiant'),
|
||||
cls('PrinceValiantSundays', 'prince-valiant-sundays'),
|
||||
cls('PrincipeValienteSpanish', 'principe-valiente', lang='es'),
|
||||
cls('ProsAndCons', 'pros-cons'),
|
||||
cls('Quincy', 'quincy'),
|
||||
|
@ -148,7 +140,9 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('RexMorganMDSpanish', 'rex-morgan-md-spanish', lang='es'),
|
||||
cls('RhymesWithOrange', 'rhymes-with-orange'),
|
||||
cls('RipKirby', 'rip-kirby'),
|
||||
# Rosebuds has a duplicate in GoComics/Rosebuds
|
||||
cls('SafeHavens', 'safe-havens'),
|
||||
cls('SagaOfBrannBjornson', 'the-saga-of-brann-bjornson'),
|
||||
cls('Sales', 'sales'),
|
||||
cls('SallyForth', 'sally-forth'),
|
||||
cls('SamAndSilo', 'sam-and-silo'),
|
||||
|
@ -156,17 +150,18 @@ class ComicsKingdom(ParserScraper):
|
|||
cls('SecretAgentX9', 'secret-agent-x-9'),
|
||||
# Shoe has a duplicate in GoComics/Shoe
|
||||
cls('SixChix', 'six-chix'),
|
||||
cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'),
|
||||
cls('SlylockFoxAndComicsForKidsSpanish', 'solo-para-ninos', lang='es'),
|
||||
cls('SlylockFox', 'slylock-fox-and-comics-for-kids'),
|
||||
cls('SlylockFoxSpanish', 'solo-para-ninos', lang='es'),
|
||||
cls('SuburbanFairyTales', 'suburban-fairy-tales'),
|
||||
cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'),
|
||||
cls('TheyllDoItEveryTimeSpanish', 'nunca-falta-alguien-asi', lang='es'),
|
||||
cls('ThimbleTheater', 'thimble-theater'),
|
||||
cls('Tiger', 'tiger'),
|
||||
cls('TigerSpanish', 'tigrillo', lang='es'),
|
||||
cls('TigerVintage', 'tiger-1'),
|
||||
cls('TigerVintageSundays', 'tiger-sundays'),
|
||||
cls('TinasGroove', 'tina-s-groove'),
|
||||
cls('ToddTheDinosaur', 'todd-the-dinosaur'),
|
||||
cls('WillyBlack', 'willy-black'),
|
||||
cls('WillyBlacksSpanish', 'willy-black-spanish', lang='es'),
|
||||
cls('ZippyThePinhead', 'zippy-the-pinhead'),
|
||||
cls('Zits', 'zits'),
|
||||
cls('ZitsSpanish', 'jeremias', lang='es'),
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||
|
@ -328,19 +328,14 @@ class DreamKeepersPrelude(_ParserScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
class DresdenCodak(_ParserScraper):
|
||||
class DresdenCodak(ParserScraper):
|
||||
url = 'http://dresdencodak.com/'
|
||||
startUrl = url + 'cat/comic/'
|
||||
firstStripUrl = url + '2007/02/08/pom/'
|
||||
imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]'
|
||||
prevSearch = '//a[img[contains(@src, "prev")]]'
|
||||
latestSearch = '//a[d:class("tc-grid-bg-link")]'
|
||||
starter = indirectStarter
|
||||
|
||||
# Blog and comic are mixed...
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return not data.xpath(self.imageSearch)
|
||||
|
||||
|
||||
class DrFun(_ParserScraper):
|
||||
baseUrl = ('https://web.archive.org/web/20180726145737/'
|
||||
|
@ -355,14 +350,12 @@ class DrFun(_ParserScraper):
|
|||
help = 'Index format: nnnnn'
|
||||
|
||||
|
||||
class Drive(_BasicScraper):
|
||||
class Drive(ParserScraper):
|
||||
url = 'http://www.drivecomic.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'archive/%s.html'
|
||||
firstStripUrl = stripUrl % '090815'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.drivecomic\.com/strips/main/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl) + "Previous")
|
||||
help = 'Index format: yymmdd'
|
||||
firstStripUrl = url + 'comic/act-1-pg-001/'
|
||||
imageSearch = ('//div[@id="unspliced-comic"]//img/@data-src-img',
|
||||
'//div[@id="unspliced-comic"]//picture//img')
|
||||
prevSearch = '//a[d:class("previous-comic")]'
|
||||
|
||||
|
||||
class DrMcNinja(_ParserScraper):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from ..scraper import ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
|
||||
|
@ -27,7 +27,7 @@ class Derideal(ParserScraper):
|
|||
|
||||
def starter(self):
|
||||
indexPage = self.getPage(self.url)
|
||||
self.chapters = indexPage.xpath('//a[contains(text(), "Read this episode")]/@href')
|
||||
self.chapters = self.match(indexPage, '//a[contains(text(), "Read this episode")]/@href')
|
||||
self.currentChapter = len(self.chapters)
|
||||
return indirectStarter(self)
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ class Erfworld(ParserScraper):
|
|||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return not data.xpath(self.imageSearch)
|
||||
return not self.match(data, self.imageSearch)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Fix inconsistent filenames
|
||||
|
@ -167,15 +167,6 @@ class Erstwhile(WordPressNavi):
|
|||
endOfLife = True
|
||||
|
||||
|
||||
class Everblue(ComicControlScraper):
|
||||
url = 'http://www.everblue-comic.com/comic/'
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
|
||||
|
||||
|
||||
class EverybodyLovesEricRaymond(_ParserScraper):
|
||||
url = 'http://geekz.co.uk/lovesraymond/'
|
||||
firstStripUrl = url + 'archive/slashdotted'
|
||||
|
@ -190,9 +181,10 @@ class EvilDiva(WordPressScraper):
|
|||
endOfLife = True
|
||||
|
||||
|
||||
class EvilInc(_ParserScraper):
|
||||
class EvilInc(ParserScraper):
|
||||
url = 'https://www.evil-inc.com/'
|
||||
imageSearch = '//div[@id="unspliced-comic"]/img/@data-src'
|
||||
imageSearch = ('//div[@id="unspliced-comic"]/img',
|
||||
'//div[@id="unspliced-comic"]/picture//img')
|
||||
prevSearch = '//a[./i[d:class("fa-chevron-left")]]'
|
||||
firstStripUrl = url + 'comic/monday-3/'
|
||||
|
||||
|
@ -263,7 +255,7 @@ class ExtraFabulousComics(WordPressScraper):
|
|||
return '_'.join((pagepart, imagename))
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
return data.xpath('//div[@id="comic"]//iframe')
|
||||
return self.match(data, '//div[@id="comic"]//iframe')
|
||||
|
||||
|
||||
class ExtraLife(_BasicScraper):
|
||||
|
|
|
@ -140,7 +140,7 @@ class FoxDad(ParserScraper):
|
|||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
page = self.getPage(pageUrl)
|
||||
post = page.xpath('//li[@class="timestamp"]/a/@href')[0]
|
||||
post = self.match(page, '//li[d:class("timestamp")]/a/@href')[0]
|
||||
post = post.replace('https://foxdad.com/post/', '')
|
||||
if '-consider-support' in post:
|
||||
post = post.split('-consider-support')[0]
|
||||
|
@ -171,7 +171,7 @@ class Fragile(_ParserScraper):
|
|||
endOfLife = True
|
||||
|
||||
|
||||
class FredoAndPidjin(_ParserScraper):
|
||||
class FredoAndPidjin(ParserScraper):
|
||||
url = 'https://www.pidjin.net/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
||||
|
@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper):
|
|||
prevSearch = '//span[d:class("prev")]/a'
|
||||
latestSearch = '//section[d:class("latest")]//a'
|
||||
starter = indirectStarter
|
||||
namer = joinPathPartsNamer((0, 1, 2))
|
||||
namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))
|
||||
|
||||
|
||||
class Freefall(_ParserScraper):
|
||||
|
@ -216,7 +216,7 @@ class FriendsYouAreStuckWith(WordPressScraper):
|
|||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
page = self.getPage(pageUrl)
|
||||
strip = page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')
|
||||
strip = self.match(page, '//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '')
|
||||
return strip + '_' + imageUrl.rstrip('/').rsplit('/', 1)[-1]
|
||||
|
||||
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
from ..util import tagre, getQueryParams
|
||||
from .common import ComicControlScraper, WordPressScraper, WordPressNavi
|
||||
|
||||
|
||||
|
@ -27,13 +27,9 @@ class Garanos(WordPressScraper):
|
|||
endOfLife = True
|
||||
|
||||
|
||||
class GastroPhobia(_ParserScraper):
|
||||
url = 'http://www.gastrophobia.com/'
|
||||
stripUrl = url + 'index.php?date=%s'
|
||||
firstStripUrl = stripUrl % '2008-07-30'
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = '//div[@id="prev"]/a'
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
class GastroPhobia(ComicControlScraper):
|
||||
url = 'https://gastrophobia.com/'
|
||||
firstStripUrl = url + 'comix/the-mane-event'
|
||||
|
||||
|
||||
class Geeks(_ParserScraper):
|
||||
|
@ -51,7 +47,7 @@ class GeeksNextDoor(_ParserScraper):
|
|||
url = 'http://www.geeksnextcomic.com/'
|
||||
stripUrl = url + '%s.html'
|
||||
firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04'
|
||||
imageSearch = '//p/img'
|
||||
imageSearch = ('//p/img', '//p/span/img')
|
||||
prevSearch = (
|
||||
'//a[img[contains(@src, "/nav_prev")]]',
|
||||
'//a[contains(text(), "< prev")]', # start page is different
|
||||
|
@ -59,16 +55,12 @@ class GeeksNextDoor(_ParserScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class GirlGenius(_BasicScraper):
|
||||
baseUrl = 'http://www.girlgeniusonline.com/'
|
||||
rurl = escape(baseUrl)
|
||||
url = baseUrl + 'comic.php'
|
||||
class GirlGenius(ParserScraper):
|
||||
url = 'https://www.girlgeniusonline.com/comic.php'
|
||||
stripUrl = url + '?date=%s'
|
||||
firstStripUrl = stripUrl % '20021104'
|
||||
imageSearch = compile(
|
||||
tagre("img", "src", r"(%sggmain/strips/[^']*)" % rurl, quote="'"))
|
||||
prevSearch = compile(tagre("a", "id", "topprev", quote="\"",
|
||||
before=r"(%s[^\"']+)" % rurl))
|
||||
imageSearch = '//img[@alt="Comic"]'
|
||||
prevSearch = '//a[@id="topprev"]'
|
||||
multipleImagesPerStrip = True
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
@ -99,20 +91,18 @@ class GoGetARoomie(ComicControlScraper):
|
|||
url = 'http://www.gogetaroomie.com'
|
||||
|
||||
|
||||
class GoneWithTheBlastwave(_BasicScraper):
|
||||
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
|
||||
starter = indirectStarter
|
||||
stripUrl = url[:-1] + '%s'
|
||||
class GoneWithTheBlastwave(ParserScraper):
|
||||
stripUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
|
||||
prevSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
||||
r'<img src="images/page/default/previous')
|
||||
latestSearch = compile(r'href="(index.php\?p=comic&nro=\d+)">' +
|
||||
r'<img src="images/page/default/latest')
|
||||
url = firstStripUrl
|
||||
starter = indirectStarter
|
||||
imageSearch = '//*[@id="comic_ruutu"]/center/img'
|
||||
prevSearch = '//a[img[contains(@src, "previous")]]'
|
||||
latestSearch = '//a[img[contains(@src, "latest")]]'
|
||||
help = 'Index format: n'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1))
|
||||
return '%02d' % int(getQueryParams(page_url)['nro'][0])
|
||||
|
||||
|
||||
class GrrlPower(WordPressScraper):
|
||||
|
@ -130,13 +120,12 @@ class GuildedAge(WordPressScraper):
|
|||
firstStripUrl = url + 'comic/chapter-1-cover/'
|
||||
|
||||
|
||||
class GUComics(_BasicScraper):
|
||||
url = 'http://www.gucomics.com/'
|
||||
stripUrl = url + '%s'
|
||||
class GUComics(ParserScraper):
|
||||
stripUrl = 'https://www.gucomics.com/%s'
|
||||
url = stripUrl % 'comic/'
|
||||
firstStripUrl = stripUrl % '20000710'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
|
||||
tagre("img", "src", r'/images/nav/prev\.png'))
|
||||
imageSearch = '//img[contains(@src, "/comics/2")]'
|
||||
prevSearch = '//a[img[contains(@alt, "previous")]]'
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
from ..scraper import ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
|
||||
|
@ -31,7 +31,7 @@ class GoComics(ParserScraper):
|
|||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return data.xpath('//img[contains(@src, "content-error-missing")]')
|
||||
return self.match(data, '//img[contains(@src, "content-error-missing")]')
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls): # noqa: CFQ001
|
||||
|
@ -44,7 +44,6 @@ class GoComics(ParserScraper):
|
|||
# START AUTOUPDATE
|
||||
cls('1AndDone', '1-and-done'),
|
||||
cls('9ChickweedLane', '9chickweedlane'),
|
||||
cls('9ChickweedLaneClassics', '9-chickweed-lane-classics'),
|
||||
cls('9To5', '9to5'),
|
||||
cls('Aaggghhh', 'Aaggghhh', 'es'),
|
||||
cls('AdamAtHome', 'adamathome'),
|
||||
|
@ -62,6 +61,7 @@ class GoComics(ParserScraper):
|
|||
cls('Annie', 'annie'),
|
||||
cls('AProblemLikeJamal', 'a-problem-like-jamal'),
|
||||
cls('ArloAndJanis', 'arloandjanis'),
|
||||
cls('ArtByMoga', 'artbymoga'),
|
||||
cls('AskShagg', 'askshagg'),
|
||||
cls('AtTavicat', 'tavicat'),
|
||||
cls('AuntyAcid', 'aunty-acid'),
|
||||
|
@ -69,7 +69,6 @@ class GoComics(ParserScraper):
|
|||
cls('BackInTheDay', 'backintheday'),
|
||||
cls('BackToBC', 'back-to-bc'),
|
||||
cls('Bacon', 'bacon'),
|
||||
cls('Badlands', 'badlands'),
|
||||
cls('BadMachinery', 'bad-machinery'),
|
||||
cls('Baldo', 'baldo'),
|
||||
cls('BaldoEnEspanol', 'baldoespanol', 'es'),
|
||||
|
@ -90,8 +89,8 @@ class GoComics(ParserScraper):
|
|||
cls('Betty', 'betty'),
|
||||
cls('BFGFSyndrome', 'bfgf-syndrome'),
|
||||
cls('BigNate', 'bignate'),
|
||||
cls('BigNateFirstClass', 'big-nate-first-class'),
|
||||
cls('BigTop', 'bigtop'),
|
||||
cls('BillBramhall', 'bill-bramhall'),
|
||||
cls('BirdAndMoon', 'bird-and-moon'),
|
||||
cls('Birdbrains', 'birdbrains'),
|
||||
cls('BleekerTheRechargeableDog', 'bleeker'),
|
||||
|
@ -99,14 +98,14 @@ class GoComics(ParserScraper):
|
|||
cls('BloomCounty', 'bloomcounty'),
|
||||
cls('BloomCounty2019', 'bloom-county'),
|
||||
cls('BobGorrell', 'bobgorrell'),
|
||||
cls('BobTheAngryFlower', 'bob-the-angry-flower'),
|
||||
cls('BobTheSquirrel', 'bobthesquirrel'),
|
||||
cls('BoNanas', 'bonanas'),
|
||||
cls('Boomerangs', 'boomerangs'),
|
||||
cls('Bottomliners', 'bottomliners'),
|
||||
cls('BottomLiners', 'bottomliners'),
|
||||
cls('BoundAndGagged', 'boundandgagged'),
|
||||
cls('Bozo', 'bozo'),
|
||||
cls('BreakingCatNews', 'breaking-cat-news'),
|
||||
cls('BreakOfDay', 'break-of-day'),
|
||||
cls('Brevity', 'brevity'),
|
||||
cls('BrewsterRockit', 'brewsterrockit'),
|
||||
cls('BrianMcFadden', 'brian-mcfadden'),
|
||||
|
@ -116,7 +115,6 @@ class GoComics(ParserScraper):
|
|||
cls('Buni', 'buni'),
|
||||
cls('CalvinAndHobbes', 'calvinandhobbes'),
|
||||
cls('CalvinAndHobbesEnEspanol', 'calvinandhobbesespanol', 'es'),
|
||||
cls('Candorville', 'candorville'),
|
||||
cls('CatanaComics', 'little-moments-of-love'),
|
||||
cls('CathyClassics', 'cathy'),
|
||||
cls('CathyCommiserations', 'cathy-commiserations'),
|
||||
|
@ -139,17 +137,18 @@ class GoComics(ParserScraper):
|
|||
cls('CowAndBoyClassics', 'cowandboy'),
|
||||
cls('CowTown', 'cowtown'),
|
||||
cls('Crabgrass', 'crabgrass'),
|
||||
# Crankshaft has a duplicate in ComicsKingdom/Crankshaft
|
||||
cls('Crumb', 'crumb'),
|
||||
cls('CulDeSac', 'culdesac'),
|
||||
cls('Curses', 'curses'),
|
||||
cls('DaddysHome', 'daddyshome'),
|
||||
cls('DanaSummers', 'danasummers'),
|
||||
cls('DarkSideOfTheHorse', 'darksideofthehorse'),
|
||||
cls('DayByDave', 'day-by-dave'),
|
||||
cls('DeepDarkFears', 'deep-dark-fears'),
|
||||
cls('DeFlocked', 'deflocked'),
|
||||
cls('DiamondLil', 'diamondlil'),
|
||||
cls('DickTracy', 'dicktracy'),
|
||||
cls('DilbertClassics', 'dilbert-classics'),
|
||||
cls('DilbertEnEspanol', 'dilbert-en-espanol', 'es'),
|
||||
cls('DinosaurComics', 'dinosaur-comics'),
|
||||
cls('DogEatDoug', 'dogeatdoug'),
|
||||
cls('DogsOfCKennel', 'dogsofckennel'),
|
||||
|
@ -160,15 +159,14 @@ class GoComics(ParserScraper):
|
|||
cls('Doonesbury', 'doonesbury'),
|
||||
cls('Drabble', 'drabble'),
|
||||
cls('DrewSheneman', 'drewsheneman'),
|
||||
cls('DumbwichCastle', 'dumbwich-castle'),
|
||||
cls('EdgeCity', 'edge-city'),
|
||||
cls('Eek', 'eek'),
|
||||
cls('ElCafDePoncho', 'el-cafe-de-poncho', 'es'),
|
||||
cls('EmmyLou', 'emmy-lou'),
|
||||
cls('Endtown', 'endtown'),
|
||||
cls('EricAllie', 'eric-allie'),
|
||||
cls('EverydayPeopleCartoons', 'everyday-people-cartoons'),
|
||||
cls('Eyebeam', 'eyebeam'),
|
||||
cls('EyebeamClassic', 'eyebeam-classic'),
|
||||
cls('FalseKnees', 'false-knees'),
|
||||
cls('FamilyTree', 'familytree'),
|
||||
cls('Farcus', 'farcus'),
|
||||
|
@ -191,8 +189,8 @@ class GoComics(ParserScraper):
|
|||
cls('FreeRange', 'freerange'),
|
||||
cls('FreshlySqueezed', 'freshlysqueezed'),
|
||||
cls('FrogApplause', 'frogapplause'),
|
||||
cls('FurBabies', 'furbabies'),
|
||||
cls('Garfield', 'garfield'),
|
||||
cls('GarfieldClassics', 'garfield-classics'),
|
||||
cls('GarfieldEnEspanol', 'garfieldespanol', 'es'),
|
||||
cls('GaryMarkstein', 'garymarkstein'),
|
||||
cls('GaryVarvel', 'garyvarvel'),
|
||||
|
@ -222,6 +220,7 @@ class GoComics(ParserScraper):
|
|||
cls('HerbAndJamaal', 'herbandjamaal'),
|
||||
cls('Herman', 'herman'),
|
||||
cls('HomeAndAway', 'homeandaway'),
|
||||
cls('HomeFree', 'homefree'),
|
||||
cls('HotComicsForCoolPeople', 'hot-comics-for-cool-people'),
|
||||
cls('HutchOwen', 'hutch-owen'),
|
||||
cls('ImagineThis', 'imaginethis'),
|
||||
|
@ -238,10 +237,12 @@ class GoComics(ParserScraper):
|
|||
cls('JeffDanziger', 'jeffdanziger'),
|
||||
cls('JeffStahler', 'jeffstahler'),
|
||||
cls('JenSorensen', 'jen-sorensen'),
|
||||
cls('JerryKingComics', 'jerry-king-comics'),
|
||||
cls('JimBentonCartoons', 'jim-benton-cartoons'),
|
||||
cls('JimMorin', 'jimmorin'),
|
||||
cls('JoeHeller', 'joe-heller'),
|
||||
cls('JoelPett', 'joelpett'),
|
||||
cls('JoeyWeatherford', 'joey-weatherford'),
|
||||
cls('JohnDeering', 'johndeering'),
|
||||
cls('JumpStart', 'jumpstart'),
|
||||
cls('JunkDrawer', 'junk-drawer'),
|
||||
|
@ -287,7 +288,6 @@ class GoComics(ParserScraper):
|
|||
cls('Lunarbaboon', 'lunarbaboon'),
|
||||
cls('M2Bulls', 'm2bulls'),
|
||||
cls('Maintaining', 'maintaining'),
|
||||
cls('MakingIt', 'making-it'),
|
||||
cls('MannequinOnTheMoon', 'mannequin-on-the-moon'),
|
||||
cls('MariasDay', 'marias-day'),
|
||||
cls('Marmaduke', 'marmaduke'),
|
||||
|
@ -299,6 +299,7 @@ class GoComics(ParserScraper):
|
|||
cls('MessycowComics', 'messy-cow'),
|
||||
cls('MexikidStories', 'mexikid-stories'),
|
||||
cls('MichaelRamirez', 'michaelramirez'),
|
||||
cls('MikeBeckom', 'mike-beckom'),
|
||||
cls('MikeDuJour', 'mike-du-jour'),
|
||||
cls('MikeLester', 'mike-lester'),
|
||||
cls('MikeLuckovich', 'mikeluckovich'),
|
||||
|
@ -307,9 +308,9 @@ class GoComics(ParserScraper):
|
|||
cls('Momma', 'momma'),
|
||||
cls('Monty', 'monty'),
|
||||
cls('MontyDiaros', 'monty-diaros', 'es'),
|
||||
# MotherGooseAndGrimm has a duplicate in ComicsKingdom/MotherGooseAndGrimm
|
||||
cls('MotleyClassics', 'motley-classics'),
|
||||
cls('MrLowe', 'mr-lowe'),
|
||||
cls('MtPleasant', 'mtpleasant'),
|
||||
cls('MuttAndJeff', 'muttandjeff'),
|
||||
cls('MyDadIsDracula', 'my-dad-is-dracula'),
|
||||
cls('MythTickle', 'mythtickle'),
|
||||
|
@ -341,10 +342,10 @@ class GoComics(ParserScraper):
|
|||
cls('OverTheHedge', 'overthehedge'),
|
||||
cls('OzyAndMillie', 'ozy-and-millie'),
|
||||
cls('PatOliphant', 'patoliphant'),
|
||||
cls('PCAndPixel', 'pcandpixel'),
|
||||
cls('Peanuts', 'peanuts'),
|
||||
cls('PeanutsBegins', 'peanuts-begins'),
|
||||
cls('PearlsBeforeSwine', 'pearlsbeforeswine'),
|
||||
cls('PedroXMolina', 'pedroxmolina'),
|
||||
cls('Periquita', 'periquita', 'es'),
|
||||
cls('PerlasParaLosCerdos', 'perlas-para-los-cerdos', 'es'),
|
||||
cls('PerryBibleFellowship', 'perry-bible-fellowship'),
|
||||
|
@ -383,7 +384,6 @@ class GoComics(ParserScraper):
|
|||
cls('RoseIsRose', 'roseisrose'),
|
||||
cls('Rubes', 'rubes'),
|
||||
cls('RudyPark', 'rudypark'),
|
||||
cls('SaltNPepper', 'salt-n-pepper'),
|
||||
cls('SarahsScribbles', 'sarahs-scribbles'),
|
||||
cls('SaturdayMorningBreakfastCereal', 'saturday-morning-breakfast-cereal'),
|
||||
cls('SavageChickens', 'savage-chickens'),
|
||||
|
@ -394,13 +394,11 @@ class GoComics(ParserScraper):
|
|||
cls('ShermansLagoon', 'shermanslagoon'),
|
||||
cls('ShirleyAndSonClassics', 'shirley-and-son-classics'),
|
||||
cls('Shoe', 'shoe'),
|
||||
cls('SigneWilkinson', 'signewilkinson'),
|
||||
cls('SketchsharkComics', 'sketchshark-comics'),
|
||||
cls('SkinHorse', 'skinhorse'),
|
||||
cls('Skippy', 'skippy'),
|
||||
cls('SmallPotatoes', 'small-potatoes'),
|
||||
cls('SnoopyEnEspanol', 'peanuts-espanol', 'es'),
|
||||
cls('Snowflakes', 'snowflakes'),
|
||||
cls('SnowSez', 'snow-sez'),
|
||||
cls('SpeedBump', 'speedbump'),
|
||||
cls('SpiritOfTheStaircase', 'spirit-of-the-staircase'),
|
||||
|
@ -410,9 +408,7 @@ class GoComics(ParserScraper):
|
|||
cls('SteveKelley', 'stevekelley'),
|
||||
cls('StickyComics', 'sticky-comics'),
|
||||
cls('StoneSoup', 'stonesoup'),
|
||||
cls('StoneSoupClassics', 'stone-soup-classics'),
|
||||
cls('StrangeBrew', 'strangebrew'),
|
||||
cls('StuartCarlson', 'stuartcarlson'),
|
||||
cls('StudioJantze', 'studio-jantze'),
|
||||
cls('SunnyStreet', 'sunny-street'),
|
||||
cls('SunshineState', 'sunshine-state'),
|
||||
|
@ -425,6 +421,7 @@ class GoComics(ParserScraper):
|
|||
cls('TarzanEnEspanol', 'tarzan-en-espanol', 'es'),
|
||||
cls('TedRall', 'ted-rall'),
|
||||
cls('TenCats', 'ten-cats'),
|
||||
cls('Tex', 'tex'),
|
||||
cls('TextsFromMittens', 'texts-from-mittens'),
|
||||
cls('Thatababy', 'thatababy'),
|
||||
cls('ThatIsPriceless', 'that-is-priceless'),
|
||||
|
@ -451,6 +448,7 @@ class GoComics(ParserScraper):
|
|||
cls('TheHumbleStumble', 'humble-stumble'),
|
||||
cls('TheKChronicles', 'thekchronicles'),
|
||||
cls('TheKnightLife', 'theknightlife'),
|
||||
cls('TheLockhorns', 'lockhorns'),
|
||||
cls('TheMartianConfederacy', 'the-martian-confederacy'),
|
||||
cls('TheMeaningOfLila', 'meaningoflila'),
|
||||
cls('TheMiddleAge', 'the-middle-age'),
|
||||
|
@ -473,6 +471,7 @@ class GoComics(ParserScraper):
|
|||
cls('TruthFacts', 'truth-facts'),
|
||||
cls('Tutelandia', 'tutelandia', 'es'),
|
||||
cls('TwoPartyOpera', 'two-party-opera'),
|
||||
cls('UFO', 'ufo'),
|
||||
cls('UnderpantsAndOverbites', 'underpants-and-overbites'),
|
||||
cls('UnderstandingChaos', 'understanding-chaos'),
|
||||
cls('UnstrangePhenomena', 'unstrange-phenomena'),
|
||||
|
@ -487,6 +486,7 @@ class GoComics(ParserScraper):
|
|||
cls('ViiviAndWagner', 'viivi-and-wagner'),
|
||||
cls('WallaceTheBrave', 'wallace-the-brave'),
|
||||
cls('WaltHandelsman', 'walthandelsman'),
|
||||
cls('Wannabe', 'wannabe'),
|
||||
cls('Warped', 'warped'),
|
||||
cls('WatchYourHead', 'watchyourhead'),
|
||||
cls('Wawawiwa', 'wawawiwa'),
|
||||
|
@ -505,6 +505,7 @@ class GoComics(ParserScraper):
|
|||
cls('WuMo', 'wumo'),
|
||||
cls('WumoEnEspanol', 'wumoespanol', 'es'),
|
||||
cls('Yaffle', 'yaffle'),
|
||||
cls('YeahItsChill', 'yeah-its-chill'),
|
||||
cls('YesImHotInThis', 'yesimhotinthis'),
|
||||
cls('ZackHill', 'zackhill'),
|
||||
cls('ZenPencils', 'zen-pencils'),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from ..scraper import ParserScraper
|
||||
|
||||
|
||||
|
@ -44,7 +44,7 @@ class KemonoCafe(ParserScraper):
|
|||
# Fix unordered filenames
|
||||
if 'addictivescience' in pageUrl:
|
||||
page = self.getPage(pageUrl)
|
||||
num = int(page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', ''))
|
||||
num = int(self.match(page, '//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', ''))
|
||||
filename = '%04d_%s' % (num, filename)
|
||||
elif 'CaughtInOrbit' in filename:
|
||||
filename = filename.replace('CaughtInOrbit', 'CIO')
|
||||
|
|
|
@ -5,24 +5,7 @@
|
|||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from ..scraper import ParserScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
|
||||
|
||||
|
||||
class Lackadaisy(ParserScraper):
|
||||
url = 'https://www.lackadaisy.com/comic.php'
|
||||
stripUrl = url + '?comicid=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//div[@id="exhibit"]/img[contains(@src, "comic/")]'
|
||||
prevSearch = '//div[@class="prev"]/a'
|
||||
nextSearch = '//div[@class="next"]/a'
|
||||
help = 'Index format: n'
|
||||
starter = bounceStarter
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Use comic id for filename
|
||||
num = pageUrl.rsplit('=', 1)[-1]
|
||||
ext = imageUrl.rsplit('.', 1)[-1]
|
||||
return 'lackadaisy_%s.%s' % (num, ext)
|
||||
from .common import ComicControlScraper, WordPressScraper
|
||||
|
||||
|
||||
class Lancer(WordPressScraper):
|
||||
|
@ -55,7 +38,7 @@ class LazJonesAndTheMayfieldRegulatorsSideStories(LazJonesAndTheMayfieldRegulato
|
|||
|
||||
def getPrevUrl(self, url, data):
|
||||
# Fix broken navigation links
|
||||
if url == self.url and data.xpath(self.prevSearch + '/@href')[0] == self.stripUrl % 'summer00':
|
||||
if url == self.url and self.match(data, self.prevSearch + '/@href')[0] == self.stripUrl % 'summer00':
|
||||
return self.stripUrl % 'summer21'
|
||||
return super(LazJonesAndTheMayfieldRegulators, self).getPrevUrl(url, data)
|
||||
|
||||
|
|
|
@ -4,22 +4,18 @@
|
|||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
import json
|
||||
from re import compile, escape, IGNORECASE
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..helpers import indirectStarter
|
||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from ..xml import NS
|
||||
from .common import ComicControlScraper, WordPressScraper, WordPressWebcomic
|
||||
|
||||
|
||||
class MacHall(_BasicScraper):
|
||||
url = 'http://www.machall.com/'
|
||||
stripUrl = url + 'view.php?date=%s'
|
||||
firstStripUrl = stripUrl % '2000-11-07'
|
||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><img[^>]+?src=\'drop_shadow/previous.gif\'>')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
class MacHall(ComicControlScraper):
|
||||
url = 'https://www.machall.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % 'moving-in'
|
||||
|
||||
|
||||
class MadamAndEve(_BasicScraper):
|
||||
|
@ -58,12 +54,12 @@ class MareInternum(WordPressScraper):
|
|||
firstStripUrl = stripUrl % 'intro-page-1'
|
||||
|
||||
|
||||
class Marilith(_BasicScraper):
|
||||
url = 'http://www.marilith.com/'
|
||||
class Marilith(ParserScraper):
|
||||
url = 'https://web.archive.org/web/20170619193143/http://www.marilith.com/'
|
||||
stripUrl = url + 'archive.php?date=%s'
|
||||
firstStripUrl = stripUrl % '20041215'
|
||||
imageSearch = compile(r'<img src="(comics/.+?)" border')
|
||||
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
|
||||
imageSearch = '//img[contains(@src, "comics/")]'
|
||||
prevSearch = '//a[img[@name="previous_day"]]'
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
@ -80,22 +76,14 @@ class MarriedToTheSea(_ParserScraper):
|
|||
return '%s-%s' % (date, filename)
|
||||
|
||||
|
||||
class MarryMe(_ParserScraper):
|
||||
url = 'http://marryme.keenspot.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
class MarryMe(ParserScraper):
|
||||
stripUrl = 'http://marryme.keenspot.com/d/%s.html'
|
||||
url = stripUrl % '20191001'
|
||||
firstStripUrl = stripUrl % '20120730'
|
||||
imageSearch = '//img[@class="ksc"]'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class MaxOveracts(_ParserScraper):
|
||||
url = 'http://occasionalcomics.com/'
|
||||
stripUrl = url + '%s/'
|
||||
css = True
|
||||
imageSearch = '#comic img'
|
||||
prevSearch = '.nav-previous > a'
|
||||
help = 'Index format: nnn'
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Meek(WordPressScraper):
|
||||
|
@ -149,20 +137,22 @@ class MisfileHellHigh(Misfile):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class MistyTheMouse(WordPressScraper):
|
||||
class MistyTheMouse(ParserScraper):
|
||||
url = 'http://www.mistythemouse.com/'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
firstStripUrl = 'http://www.mistythemouse.com/?p=12'
|
||||
imageSearch = '//center/p/img'
|
||||
prevSearch = '//a[img[contains(@src, "Previous")]]'
|
||||
firstStripUrl = url + 'The_Live_In.html'
|
||||
|
||||
|
||||
class MonkeyUser(_ParserScraper):
|
||||
class MonkeyUser(ParserScraper):
|
||||
url = 'https://www.monkeyuser.com/'
|
||||
prevSearch = '//div[@title="previous"]/a'
|
||||
imageSearch = '//div[d:class("content")]/p/img'
|
||||
prevSearch = '//a[text()="Prev"]'
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
# videos
|
||||
return data.xpath('//div[d:class("video-container")]', namespaces=NS)
|
||||
return self.match(data, '//div[d:class("video-container")]')
|
||||
|
||||
|
||||
class MonsieurLeChien(ParserScraper):
|
||||
|
@ -195,43 +185,10 @@ class Moonlace(WordPressWebcomic):
|
|||
return indirectStarter(self)
|
||||
|
||||
|
||||
class Moonsticks(_ParserScraper):
|
||||
url = "http://moonsticks.org/"
|
||||
imageSearch = "//div[@class='entry']//img"
|
||||
prevSearch = u"//a[text()='\u00AB Prev']"
|
||||
|
||||
|
||||
class MrLovenstein(_BasicScraper):
|
||||
url = 'http://www.mrlovenstein.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = (
|
||||
# captures rollover comic
|
||||
compile(tagre("div", "class", r'comic_image') + r'\s*.*\s*' +
|
||||
tagre("div", "style", r'display: none;') + r'\s*.*\s' +
|
||||
tagre("img", "src", r'(/images/comics/[^"]+)')),
|
||||
# captures standard comic
|
||||
compile(tagre("img", "src", r'(/images/comics/[^"]+)',
|
||||
before="comic_main_image")),
|
||||
)
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') +
|
||||
tagre("img", "src", "/images/nav_left.png"))
|
||||
textSearch = compile(r'<meta name="description" content="(.+?)" />')
|
||||
help = 'Index Format: n'
|
||||
|
||||
|
||||
class MyCartoons(_BasicScraper):
|
||||
url = 'http://mycartoons.de/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'page/%s'
|
||||
imageSearch = (
|
||||
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
|
||||
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
|
||||
)
|
||||
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) +
|
||||
"«")
|
||||
help = 'Index format: number'
|
||||
lang = 'de'
|
||||
class Moonsticks(ParserScraper):
|
||||
url = "https://moonsticks.org/"
|
||||
imageSearch = "//div[d:class('entry-content')]//img"
|
||||
prevSearch = ('//a[@rel="prev"]', "//a[text()='\u00AB Prev']")
|
||||
|
||||
|
||||
class MyLifeWithFel(ParserScraper):
|
||||
|
|
|
@ -11,6 +11,12 @@ from ..util import tagre
|
|||
from .common import WordPressScraper, WordPressNavi
|
||||
|
||||
|
||||
class OccasionalComicsDisorder(WordPressScraper):
|
||||
url = 'https://occasionalcomics.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'latest-comic-2'
|
||||
|
||||
|
||||
class OctopusPie(_ParserScraper):
|
||||
url = 'http://www.octopuspie.com/'
|
||||
rurl = escape(url)
|
||||
|
|
|
@ -604,7 +604,6 @@ class Removed(Scraper):
|
|||
cls('WotNow'),
|
||||
|
||||
# Removed in 3.0
|
||||
cls('CatenaManor/CatenaCafe'),
|
||||
cls('ComicFury/AdventuresOftheGreatCaptainMaggieandCrew'),
|
||||
cls('ComicFury/AWAKENING'),
|
||||
cls('ComicFury/Beebleville'),
|
||||
|
@ -833,8 +832,6 @@ class Removed(Scraper):
|
|||
cls('ComicsKingdom/Redeye'),
|
||||
cls('ComicsKingdom/RedeyeSundays'),
|
||||
cls('CrapIDrewOnMyLunchBreak'),
|
||||
cls('FalseStart'),
|
||||
cls('Ginpu'),
|
||||
cls('GoComics/060'),
|
||||
cls('GoComics/2CowsAndAChicken'),
|
||||
cls('GoComics/ABitSketch'),
|
||||
|
@ -995,11 +992,9 @@ class Removed(Scraper):
|
|||
cls('GoComics/Wrobbertcartoons'),
|
||||
cls('GoComics/Zootopia'),
|
||||
cls('JustAnotherEscape'),
|
||||
cls('KemonoCafe/PrincessBunny'),
|
||||
cls('Laiyu', 'brk'),
|
||||
cls('MangaDex/DrStone', 'legal'),
|
||||
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
||||
cls('MangaDex/ImTheMaxLevelNewbie', 'legal'),
|
||||
cls('MangaDex/SPYxFAMILY', 'legal'),
|
||||
cls('Ryugou'),
|
||||
cls('SeelPeel'),
|
||||
|
@ -1573,22 +1568,82 @@ class Removed(Scraper):
|
|||
cls('SnafuComics/Tin'),
|
||||
cls('SnafuComics/Titan'),
|
||||
cls('StudioKhimera/Eorah', 'mov'),
|
||||
cls('StudioKhimera/Mousechevious'),
|
||||
cls('StuffNoOneToldMe'),
|
||||
cls('TaleOfTenThousand'),
|
||||
cls('TalesAndTactics'),
|
||||
cls('TheCyantianChronicles/CookieCaper'),
|
||||
cls('TheCyantianChronicles/Pawprints'),
|
||||
cls('VampireHunterBoyfriends'),
|
||||
cls('VGCats/Adventure'),
|
||||
cls('VGCats/Super'),
|
||||
cls('VictimsOfTheSystem'),
|
||||
cls('WebDesignerCOTW'),
|
||||
cls('WebToons/Adamsville'),
|
||||
cls('WebToons/CrapIDrewOnMyLunchBreak'),
|
||||
cls('WintersLight'),
|
||||
|
||||
# Removed in 3.1
|
||||
cls('AbbysAgency', 'brk'),
|
||||
cls('AcademyVale'),
|
||||
cls('AhoyEarth', 'block'),
|
||||
cls('Anaria', 'del'),
|
||||
cls('Angels2200', 'del'),
|
||||
cls('BlackRose', 'brk'),
|
||||
cls('CatenaManor/CatenaCafe'),
|
||||
cls('ComicsKingdom/AmazingSpiderman'),
|
||||
cls('ComicsKingdom/AmazingSpidermanSpanish'),
|
||||
cls('ComicsKingdom/BigBenBoltSundays'),
|
||||
cls('ComicsKingdom/BonersArkSundays'),
|
||||
cls('ComicsKingdom/BrianDuffy'),
|
||||
cls('ComicsKingdom/Crankshaft'),
|
||||
cls('ComicsKingdom/FlashGordonSundays'),
|
||||
cls('ComicsKingdom/FunkyWinkerbean'),
|
||||
cls('ComicsKingdom/FunkyWinkerbeanSunday'),
|
||||
cls('ComicsKingdom/FunkyWinkerbeanSundays'),
|
||||
cls('ComicsKingdom/FunkyWinkerbeanVintage'),
|
||||
cls('ComicsKingdom/HeartOfJulietJonesSundays'),
|
||||
cls('ComicsKingdom/KatzenjammerKidsSundays'),
|
||||
cls('ComicsKingdom/Lockhorns'),
|
||||
cls('ComicsKingdom/MandrakeTheMagicianSundays'),
|
||||
cls('ComicsKingdom/MarkTrailVintage'),
|
||||
cls('ComicsKingdom/MikePeters'),
|
||||
cls('ComicsKingdom/MotherGooseAndGrimm'),
|
||||
cls('ComicsKingdom/PhantomSundays'),
|
||||
cls('ComicsKingdom/PrinceValiantSundays'),
|
||||
cls('ComicsKingdom/Retail'),
|
||||
cls('ComicsKingdom/TigerSundays'),
|
||||
cls('ComicsKingdom/TigerVintage'),
|
||||
cls('ComicsKingdom/TigerVintageSundays'),
|
||||
cls('Everblue', 'block'),
|
||||
cls('FalseStart'),
|
||||
cls('Ginpu'),
|
||||
cls('GoComics/9ChickweedLaneClassics'),
|
||||
cls('GoComics/Badlands'),
|
||||
cls('GoComics/BigNateFirstClass'),
|
||||
cls('GoComics/BreakOfDay'),
|
||||
cls('GoComics/Candorville'),
|
||||
cls('GoComics/DilbertClassics'),
|
||||
cls('GoComics/DilbertEnEspanol'),
|
||||
cls('GoComics/DumbwichCastle'),
|
||||
cls('GoComics/EyebeamClassic'),
|
||||
cls('GoComics/GarfieldClassics'),
|
||||
cls('GoComics/MakingIt'),
|
||||
cls('GoComics/MtPleasant'),
|
||||
cls('GoComics/PCAndPixel'),
|
||||
cls('GoComics/SaltNPepper'),
|
||||
cls('GoComics/SigneWilkinson'),
|
||||
cls('GoComics/Snowflakes'),
|
||||
cls('GoComics/StoneSoupClassics'),
|
||||
cls('GoComics/StuartCarlson'),
|
||||
cls('KemonoCafe/PrincessBunny'),
|
||||
cls('Lackadaisy', 'block'),
|
||||
cls('MangaDex/ImTheMaxLevelNewbie', 'legal'),
|
||||
cls('MrLovenstein', 'jsh'),
|
||||
cls('MyCartoons'),
|
||||
cls('Shivae/BlackRose', 'brk'),
|
||||
cls('StudioKhimera/Mousechevious'),
|
||||
cls('TalesAndTactics'),
|
||||
cls('VampireHunterBoyfriends'),
|
||||
cls('WebToons/CrystalVirus'),
|
||||
cls('WebToons/OVERPOWERED'),
|
||||
cls('WintersLight'),
|
||||
)
|
||||
|
||||
|
||||
|
@ -1667,10 +1722,8 @@ class Renamed(Scraper):
|
|||
# Renamed in 3.0
|
||||
cls('AHClub', 'RickGriffinStudios/AHClub'),
|
||||
cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'),
|
||||
cls('ComicsKingdom/FunkyWinkerbeanSundays', 'ComicsKingdom/FunkyWinkerbeanSunday'),
|
||||
cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'),
|
||||
cls('ComicsKingdom/TheLittleKing', 'ComicsKingdom/LittleKing'),
|
||||
cls('ComicsKingdom/TigerSundays', 'ComicsKingdom/TigerVintageSundays'),
|
||||
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
|
||||
cls('GoComics/Cathy', 'GoComics/CathyClassics'),
|
||||
cls('GoComics/DarrinBell', 'ComicsKingdom/DarrinBell'),
|
||||
|
@ -1681,7 +1734,6 @@ class Renamed(Scraper):
|
|||
cls('GoComics/Widdershins', 'Widdershins'),
|
||||
cls('Guardia', 'ComicFury/Guardia'),
|
||||
cls('RadioactivePanda', 'Tapas/RadioactivePanda'),
|
||||
cls('Shivae/BlackRose', 'BlackRose'),
|
||||
cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'),
|
||||
cls('SmackJeeves/ByTheBook', 'ByTheBook'),
|
||||
cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'),
|
||||
|
@ -1694,6 +1746,9 @@ class Renamed(Scraper):
|
|||
cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'),
|
||||
|
||||
# Renamed in 3.1
|
||||
cls('ComicsKingdom/SlylockFoxAndComicsForKids', 'ComicsKingdom/SlylockFox'),
|
||||
cls('ComicsKingdom/SlylockFoxAndComicsForKidsSpanish', 'ComicsKingdom/SlylockFoxSpanish'),
|
||||
cls('Exiern', 'ComicFury/Exiern'),
|
||||
cls('MaxOveracts', 'OccasionalComicsDisorder'),
|
||||
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
|
||||
)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||
|
@ -34,16 +34,11 @@ class ParadigmShift(_BasicScraper):
|
|||
help = 'Index format: custom'
|
||||
|
||||
|
||||
class ParallelUniversum(_BasicScraper):
|
||||
url = 'http://www.paralleluniversum.net/'
|
||||
rurl = escape(url)
|
||||
class ParallelUniversum(WordPressScraper):
|
||||
url = 'https://www.paralleluniversum.net/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
|
||||
tagre("span", "class", "prev"))
|
||||
help = 'Index format: number-stripname'
|
||||
prevSearch = '//a[@rel="prev"]'
|
||||
lang = 'de'
|
||||
|
||||
|
||||
|
@ -95,14 +90,12 @@ class PebbleVersion(_ParserScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class PennyAndAggie(_BasicScraper):
|
||||
url = 'http://pennyandaggie.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.php?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
|
||||
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
|
||||
help = 'Index format: n (unpadded)'
|
||||
class PennyAndAggie(ComicControlScraper):
|
||||
url = 'https://pixietrixcomix.com/penny-and-aggie'
|
||||
stripUrl = url + '/%s'
|
||||
firstStripUrl = stripUrl % '2004-09-06'
|
||||
endOfLife = True
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class PennyArcade(_ParserScraper):
|
||||
|
@ -117,19 +110,17 @@ class PennyArcade(_ParserScraper):
|
|||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
|
||||
class PeppermintSaga(WordPressNavi):
|
||||
class PeppermintSaga(WordPressScraper):
|
||||
url = 'http://www.pepsaga.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '3'
|
||||
help = 'Index format: number'
|
||||
stripUrl = url + 'comics/%s/'
|
||||
firstStripUrl = stripUrl % 'the-sword-of-truth-vol1'
|
||||
adult = True
|
||||
|
||||
|
||||
class PeppermintSagaBGR(WordPressNavi):
|
||||
class PeppermintSagaBGR(WordPressScraper):
|
||||
url = 'http://bgr.pepsaga.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
firstStripUrl = stripUrl % '4'
|
||||
help = 'Index format: number'
|
||||
stripUrl = url + '?comic=%s'
|
||||
firstStripUrl = stripUrl % '04172011'
|
||||
adult = True
|
||||
|
||||
|
||||
|
@ -150,14 +141,16 @@ class PeterAndWhitney(_ParserScraper):
|
|||
prevSearch = '//a[./img[contains(@src, "nav_previous")]]'
|
||||
|
||||
|
||||
class PHDComics(_ParserScraper):
|
||||
class PHDComics(ParserScraper):
|
||||
BROKEN_COMMENT_END = compile(r'--!>')
|
||||
|
||||
baseUrl = 'http://phdcomics.com/'
|
||||
url = baseUrl + 'comics.php'
|
||||
stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//img[@id="comic2"]'
|
||||
imageSearch = ('//img[@id="comic2"]',
|
||||
r'//img[d:class("img-responsive") and re:test(@name, "comic\d+")]')
|
||||
multipleImagesPerStrip = True
|
||||
prevSearch = '//a[img[contains(@src, "prev_button")]]'
|
||||
nextSearch = '//a[img[contains(@src, "next_button")]]'
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
@ -173,7 +166,7 @@ class PHDComics(_ParserScraper):
|
|||
# video
|
||||
self.stripUrl % '1880',
|
||||
self.stripUrl % '1669',
|
||||
)
|
||||
) or self.match(data, '//img[@id="comic" and contains(@src, "phd083123s")]')
|
||||
|
||||
|
||||
class Picklewhistle(ComicControlScraper):
|
||||
|
@ -333,11 +326,12 @@ class PS238(_ParserScraper):
|
|||
|
||||
class PvPOnline(ParserScraper):
|
||||
baseUrl = 'https://www.toonhoundstudios.com/'
|
||||
url = baseUrl + 'pvp/'
|
||||
stripUrl = baseUrl + 'comic/%s/'
|
||||
stripUrl = baseUrl + 'comic/%s/?sid=372'
|
||||
url = stripUrl % 'pvp-2022-09-16'
|
||||
firstStripUrl = stripUrl % '19980504'
|
||||
imageSearch = '//div[@id="spliced-comic"]//img/@data-src-img'
|
||||
prevSearch = '//a[d:class("prev")]'
|
||||
endOfLife = True
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
return 'pvp' + imageUrl.rsplit('/', 1)[-1]
|
||||
def namer(self, image_url, page_url):
|
||||
return 'pvp' + image_url.rsplit('/', 1)[-1]
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
@ -121,7 +121,7 @@ class Requiem(WordPressScraper):
|
|||
firstStripUrl = stripUrl % '2004-06-07-3'
|
||||
|
||||
|
||||
class Replay(_ParserScraper):
|
||||
class Replay(ParserScraper):
|
||||
url = 'http://replaycomic.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'red-desert'
|
||||
|
@ -132,11 +132,11 @@ class Replay(_ParserScraper):
|
|||
def starter(self):
|
||||
# Retrieve archive page to identify chapters
|
||||
archivePage = self.getPage(self.url + 'archive')
|
||||
archive = archivePage.xpath('//div[@class="comic-archive-chapter-wrap"]')
|
||||
archive = self.match(archivePage, '//div[d:class("comic-archive-chapter-wrap")]')
|
||||
self.chapter = len(archive) - 1
|
||||
self.startOfChapter = []
|
||||
for archiveChapter in archive:
|
||||
self.startOfChapter.append(archiveChapter.xpath('.//a')[0].get('href'))
|
||||
self.startOfChapter.append(self.match(archiveChapter, './/a')[0].get('href'))
|
||||
return bounceStarter(self)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
|
|
|
@ -196,7 +196,7 @@ class Sharksplode(WordPressScraper):
|
|||
class Sheldon(ParserScraper):
|
||||
url = 'https://www.sheldoncomics.com/'
|
||||
firstStripUrl = url + 'comic/well-who-is-this/'
|
||||
imageSearch = '//div[@id="comic"]//img'
|
||||
imageSearch = '//div[@id="comic"]//img/@data-src-img'
|
||||
prevSearch = '//a[img[d:class("left")]]'
|
||||
|
||||
|
||||
|
@ -435,7 +435,7 @@ class SpaceFurries(ParserScraper):
|
|||
def extract_image_urls(self, url, data):
|
||||
# Website requires JS, so build the list of image URLs manually
|
||||
imageurls = []
|
||||
current = int(data.xpath('//input[@name="pagnum"]')[0].get('value'))
|
||||
current = int(self.match(data, '//input[@name="pagnum"]')[0].get('value'))
|
||||
for page in reversed(range(1, current + 1)):
|
||||
imageurls.append(self.url + 'comics/' + str(page) + '.jpg')
|
||||
return imageurls
|
||||
|
@ -636,16 +636,16 @@ class StrongFemaleProtagonist(_ParserScraper):
|
|||
)
|
||||
|
||||
|
||||
class StupidFox(_ParserScraper):
|
||||
class StupidFox(ParserScraper):
|
||||
url = 'http://stupidfox.net/'
|
||||
stripUrl = url + '%s'
|
||||
firstStripUrl = stripUrl % 'hello'
|
||||
imageSearch = '//div[@class="comicmid"]//img'
|
||||
imageSearch = '//div[d:class("comicmid")]//img'
|
||||
prevSearch = '//a[@accesskey="p"]'
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
page = self.getPage(pageUrl)
|
||||
title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
|
||||
title = self.match(page, self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-')
|
||||
return title + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2021 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from .common import WordPressSpliced
|
||||
|
||||
|
||||
|
@ -12,22 +12,20 @@ class _CommonMulti(WordPressSpliced):
|
|||
self.endOfLife = eol
|
||||
|
||||
|
||||
class AbbysAgency(WordPressSpliced):
|
||||
url = 'https://abbysagency.us/'
|
||||
stripUrl = url + 'blog/comic/%s/'
|
||||
firstStripUrl = stripUrl % 'a'
|
||||
|
||||
|
||||
class AlienDice(WordPressSpliced):
|
||||
url = 'https://aliendice.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % '05162001'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return not self.match(data, self.imageSearch)
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
# Fix broken navigation
|
||||
if url == self.stripUrl % 'day-29-part-2-page-3-4':
|
||||
return self.stripUrl % 'day-29-part-2-page-3-2'
|
||||
return super(AlienDice, self).getPrevUrl(url, data)
|
||||
return super().getPrevUrl(url, data)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
# Fix inconsistent filename
|
||||
|
@ -47,12 +45,6 @@ class AlienDiceLegacy(WordPressSpliced):
|
|||
return super().isfirststrip(url.rsplit('?', 1)[0])
|
||||
|
||||
|
||||
class BlackRose(WordPressSpliced):
|
||||
url = 'https://www.blackrose.monster/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % '2004-11-01'
|
||||
|
||||
|
||||
class TheCyantianChronicles(_CommonMulti):
|
||||
baseUrl = 'https://cyantian.net/'
|
||||
|
||||
|
@ -81,9 +73,9 @@ class TheCyantianChronicles(_CommonMulti):
|
|||
|
||||
|
||||
class Shivae(WordPressSpliced):
|
||||
url = 'https://shivae.com/'
|
||||
url = 'https://shivae.net/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % '09202001'
|
||||
firstStripUrl = stripUrl % '2002-02-27'
|
||||
|
||||
|
||||
class ShivaeComics(_CommonMulti):
|
||||
|
|
|
@ -4,10 +4,7 @@
|
|||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape, MULTILINE
|
||||
try:
|
||||
from functools import cached_property
|
||||
except ImportError:
|
||||
from cached_property import cached_property
|
||||
from functools import cached_property
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||
from ..helpers import indirectStarter, joinPathPartsNamer
|
||||
|
@ -275,7 +272,7 @@ class ToonHole(ParserScraper):
|
|||
prevSearch = '//a[@rel="prev"]'
|
||||
latestSearch = '//a[@rel="bookmark"]'
|
||||
starter = indirectStarter
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
|
||||
|
||||
class TrippingOverYou(_BasicScraper):
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from ..output import out
|
||||
from ..scraper import ParserScraper
|
||||
from ..xml import NS
|
||||
|
||||
|
||||
class Tapas(ParserScraper):
|
||||
|
@ -21,7 +20,7 @@ class Tapas(ParserScraper):
|
|||
def starter(self):
|
||||
# Retrieve comic metadata from info page
|
||||
info = self.getPage(self.url)
|
||||
series = info.xpath('//@data-series-id')[0]
|
||||
series = self.match(info, '//@data-series-id')[0]
|
||||
# Retrieve comic metadata from API
|
||||
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
|
||||
data.raise_for_status()
|
||||
|
@ -43,7 +42,7 @@ class Tapas(ParserScraper):
|
|||
return self._cached_image_urls
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):
|
||||
if self.match(data, '//button[d:class("js-have-to-sign")]'):
|
||||
out.warn(f'Nothing to download on "{url}", because a login is required.')
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -107,7 +107,7 @@ class Unsounded(ParserScraper):
|
|||
return urls
|
||||
|
||||
def extract_css_bg(self, page) -> str | None:
|
||||
comicdivs = page.xpath('//div[@id="comic"]')
|
||||
comicdivs = self.match(page, '//div[@id="comic"]')
|
||||
if comicdivs:
|
||||
style = comicdivs[0].attrib.get('style')
|
||||
if style:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
|
||||
from ..scraper import ParserScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
|
@ -27,7 +27,7 @@ class VGCats(_ParserScraper):
|
|||
url = 'https://www.vgcats.com/comics/'
|
||||
stripUrl = url + '?strip_id=%s'
|
||||
firstStripUrl = stripUrl % '0'
|
||||
imageSearch = '//td/img[contains(@src, "images/")]'
|
||||
imageSearch = '//td/font/img[contains(@src, "images/")]'
|
||||
prevSearch = '//a[img[contains(@src, "back.")]]'
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
@ -44,15 +44,15 @@ class Vibe(ParserScraper):
|
|||
help = 'Index format: VIBEnnn (padded)'
|
||||
|
||||
|
||||
class VickiFox(_ParserScraper):
|
||||
class VickiFox(ParserScraper):
|
||||
url = 'http://www.vickifox.com/comic/strip'
|
||||
stripUrl = url + '?id=%s'
|
||||
firstStripUrl = stripUrl % '001'
|
||||
imageSearch = '//img[contains(@src, "comic/")]'
|
||||
prevSearch = '//button[@id="btnPrev"]/@value'
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
return self.stripUrl % self.getPage(url).xpath(self.prevSearch)[0]
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
return self.stripUrl % tourl
|
||||
|
||||
|
||||
class ViiviJaWagner(_ParserScraper):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from re import compile, escape, IGNORECASE
|
||||
|
||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
||||
|
@ -17,7 +17,7 @@ class WapsiSquare(WordPressNaviIn):
|
|||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip pages without images."""
|
||||
return data.xpath('//iframe') # videos
|
||||
return self.match(data, '//iframe') # videos
|
||||
|
||||
|
||||
class WastedTalent(_ParserScraper):
|
||||
|
|
|
@ -24,9 +24,9 @@ class WebToons(ParserScraper):
|
|||
self.session.cookies.set(cookie, 'false', domain='webtoons.com')
|
||||
# Find current episode number
|
||||
listPage = self.getPage(self.listUrl)
|
||||
currentEpisode = listPage.xpath('//div[@class="detail_lst"]/ul/li')[0].attrib['data-episode-no']
|
||||
currentEpisode = self.match(listPage, '//div[d:class("detail_lst")]/ul/li')[0].attrib['data-episode-no']
|
||||
# Check for completed tag
|
||||
self.endOfLife = (listPage.xpath('//div[@id="_asideDetail"]//span[@class="txt_ico_completed2"]') != [])
|
||||
self.endOfLife = not self.match(listPage, '//div[@id="_asideDetail"]//span[d:class("txt_ico_completed2")]')
|
||||
return self.stripUrl % currentEpisode
|
||||
|
||||
def extract_image_urls(self, url, data):
|
||||
|
@ -52,6 +52,7 @@ class WebToons(ParserScraper):
|
|||
cls('1111Animals', 'comedy/1111-animals', 437),
|
||||
cls('2015SpaceSeries', 'sf/2015-space-series', 391),
|
||||
cls('3SecondStrip', 'comedy/3-second-strip', 380),
|
||||
cls('99ReinforcedStick', 'comedy/99-reinforced-wooden-stick', 4286),
|
||||
cls('ABittersweetLife', 'slice-of-life/a-bittersweet-life', 294),
|
||||
cls('AboutDeath', 'drama/about-death', 82),
|
||||
cls('ABudgiesLife', 'slice-of-life/its-a-budgies-life', 985),
|
||||
|
@ -64,6 +65,7 @@ class WebToons(ParserScraper):
|
|||
cls('AGoodDayToBeADog', 'romance/a-good-day-tobe-a-dog', 1390),
|
||||
cls('Aisopos', 'drama/aisopos', 76),
|
||||
cls('AliceElise', 'fantasy/alice-elise', 1481),
|
||||
cls('AlloyComics', 'canvas/alloy-comics', 747447),
|
||||
cls('AllThatWeHopeToBe', 'slice-of-life/all-that-we-hope-to-be', 470),
|
||||
cls('AllThatYouAre', 'drama/all-that-you-are', 403),
|
||||
cls('AlwaysHuman', 'romance/always-human', 557),
|
||||
|
@ -128,6 +130,7 @@ class WebToons(ParserScraper):
|
|||
cls('CursedPrincessClub', 'comedy/cursed-princess-club', 1537),
|
||||
cls('Cyberbunk', 'sf/cyberbunk', 466),
|
||||
cls('Cyberforce', 'super-hero/cyberforce', 531),
|
||||
cls('CydoniaShattering', 'fantasy/cydonia-shattering', 2881),
|
||||
cls('CykoKO', 'super-hero/cyko-ko', 560),
|
||||
cls('Darbi', 'action/darbi', 1098),
|
||||
cls('Darchon', 'challenge/darchon', 532053),
|
||||
|
@ -153,6 +156,8 @@ class WebToons(ParserScraper):
|
|||
cls('DrawnToYou', 'challenge/drawn-to-you', 172022),
|
||||
cls('DrFrost', 'drama/dr-frost', 371),
|
||||
cls('DuelIdentity', 'challenge/duel-identity', 532064),
|
||||
cls('DungeonCleaningLife', 'action/the-dungeon-cleaning-life-of-a-once-genius-hunter', 4677),
|
||||
cls('DungeonsAndDoodlesTalesFromTheTables', 'canvas/dungeons-doodles-tales-from-the-tables', 682646),
|
||||
cls('DungeonMinis', 'challenge/dungeonminis', 64132),
|
||||
cls('Dustinteractive', 'comedy/dustinteractive', 907),
|
||||
cls('DutyAfterSchool', 'sf/duty-after-school', 370),
|
||||
|
@ -170,6 +175,7 @@ class WebToons(ParserScraper):
|
|||
cls('FAMILYMAN', 'drama/family-man', 85),
|
||||
cls('FantasySketchTheGame', 'sf/fantasy-sketch', 1020),
|
||||
cls('Faust', 'supernatural/faust', 522),
|
||||
cls('FinalRaidBoss', 'fantasy/the-final-raid-boss', 3921),
|
||||
cls('FINALITY', 'mystery/finality', 1457),
|
||||
cls('Firebrand', 'supernatural/firebrand', 877),
|
||||
cls('FirstDefense', 'challenge/first-defense', 532072),
|
||||
|
@ -204,11 +210,13 @@ class WebToons(ParserScraper):
|
|||
cls('HeliosFemina', 'fantasy/helios-femina', 638),
|
||||
cls('HelloWorld', 'slice-of-life/hello-world', 827),
|
||||
cls('Hellper', 'fantasy/hellper', 185),
|
||||
cls('Hench', 'canvas/hench/', 857225),
|
||||
cls('HeroineChic', 'super-hero/heroine-chic', 561),
|
||||
cls('HIVE', 'thriller/hive', 65),
|
||||
cls('Hooky', 'fantasy/hooky', 425),
|
||||
cls('HoovesOfDeath', 'fantasy/hooves-of-death', 1535),
|
||||
cls('HouseOfStars', 'fantasy/house-of-stars', 1620),
|
||||
cls('HowToBeAMindReaver', 'canvas/how-to-be-a-mind-reaver', 301213),
|
||||
cls('HowToBecomeADragon', 'fantasy/how-to-become-a-dragon', 1973),
|
||||
cls('HowToLove', 'slice-of-life/how-to-love', 472),
|
||||
cls('IDontWantThisKindOfHero', 'super-hero/i-dont-want-this-kind-of-hero', 98),
|
||||
|
@ -235,6 +243,7 @@ class WebToons(ParserScraper):
|
|||
cls('KindOfLove', 'slice-of-life/kind-of-love', 1850),
|
||||
cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703),
|
||||
cls('KnightRun', 'sf/knight-run', 67),
|
||||
cls('KnightUnderMyHeart', 'action/knight-under-my-heart', 4215),
|
||||
cls('Kubera', 'fantasy/kubera', 83),
|
||||
cls('LalinsCurse', 'supernatural/lalins-curse', 1601),
|
||||
cls('Lars', 'slice-of-life/lars', 358),
|
||||
|
@ -261,6 +270,7 @@ class WebToons(ParserScraper):
|
|||
cls('LUMINE', 'fantasy/lumine', 1022),
|
||||
cls('Lunarbaboon', 'slice-of-life/lunarbaboon', 523),
|
||||
cls('MageAndDemonQueen', 'comedy/mage-and-demon-queen', 1438),
|
||||
cls('MageAndMimic', 'comedy/mage-and-mimic', 5973),
|
||||
cls('Magical12thGraders', 'super-hero/magical-12th-graders', 90),
|
||||
cls('Magician', 'fantasy/magician', 70),
|
||||
cls('MagicSodaPop', 'fantasy/magic-soda-pop', 1947),
|
||||
|
@ -292,6 +302,8 @@ class WebToons(ParserScraper):
|
|||
cls('MyGiantNerdBoyfriend', 'slice-of-life/my-giant-nerd-boyfriend', 958),
|
||||
cls('MyKittyAndOldDog', 'slice-of-life/my-kitty-and-old-dog', 184),
|
||||
cls('MyNameIsBenny', 'slice-of-life/my-name-is-benny', 1279),
|
||||
cls('MySClassHunter', 'action/my-s-class-hunters', 3963),
|
||||
cls('MythicItemObtained', 'fantasy/mythic-item-obtained', 4582),
|
||||
cls('MyWallflowerKiss', 'challenge/my-wallflower-kiss', 151869),
|
||||
cls('NanoList', 'sf/nano-list', 700),
|
||||
cls('NationalDogDay2016', 'slice-of-life/national-dog-day', 747),
|
||||
|
@ -439,6 +451,7 @@ class WebToons(ParserScraper):
|
|||
cls('UpAndOut', 'slice-of-life/up-and-out', 488),
|
||||
cls('UrbanAnimal', 'super-hero/urban-animal', 1483),
|
||||
cls('Uriah', 'horror/uriah', 1607),
|
||||
cls('VampireFamily', 'comedy/vampire-family', 6402),
|
||||
cls('VarsityNoir', 'mystery/varsity-noir', 1613),
|
||||
cls('VersionDayAndNight', 'drama/version-day-and-night', 1796),
|
||||
cls('WafflesAndPancakes', 'slice-of-life/waffles-and-pancakes', 1310),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2022 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from ..scraper import ParserScraper
|
||||
from ..helpers import indirectStarter
|
||||
|
||||
|
@ -15,21 +15,21 @@ class Wrongside(ParserScraper):
|
|||
|
||||
def starter(self):
|
||||
archivePage = self.getPage(self.url)
|
||||
chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href')
|
||||
chapterUrls = self.match(archivePage, '//ul[d:class("albThumbs")]//a/@href')
|
||||
self.archive = []
|
||||
for chapterUrl in chapterUrls:
|
||||
chapterPage = self.getPage(chapterUrl)
|
||||
self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
|
||||
self.archive.append(self.match(chapterPage, '(//ul[@id="thumbnails"]//a/@href)[last()]')[0])
|
||||
return self.archive[0]
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
if data.xpath(self.prevSearch) == [] and len(self.archive) > 0:
|
||||
if self.match(data, self.prevSearch) == [] and len(self.archive) > 0:
|
||||
return self.archive.pop()
|
||||
return super(Wrongside, self).getPrevUrl(url, data)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
page = self.getPage(pageUrl)
|
||||
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
||||
title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0]
|
||||
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||
|
||||
|
||||
|
@ -71,5 +71,5 @@ class WrongsideSideStories(ParserScraper):
|
|||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
page = self.getPage(pageUrl)
|
||||
title = page.xpath('//div[@class="browsePath"]/h2/text()')[0]
|
||||
title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0]
|
||||
return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1]
|
||||
|
|
|
@ -23,7 +23,7 @@ class Zapiro(ParserScraper):
|
|||
imageSearch = '//div[@id="cartoon"]/img'
|
||||
prevSearch = '//a[d:class("left")]'
|
||||
nextSearch = '//a[d:class("right")]'
|
||||
namer = joinPathPartsNamer((-1,), ())
|
||||
namer = joinPathPartsNamer(pageparts=(-1,))
|
||||
|
||||
|
||||
class ZenPencils(WordPressNavi):
|
||||
|
@ -60,7 +60,7 @@ class Zwarwald(BasicScraper):
|
|||
tagre("img", "src",
|
||||
r'http://zwarwald\.de/images/prev\.jpg',
|
||||
quote="'"))
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
help = 'Index format: number'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
|
|
@ -119,45 +119,45 @@ class Scraper:
|
|||
if val:
|
||||
self._indexes = tuple(sorted(val))
|
||||
|
||||
def __init__(self, name):
|
||||
def __init__(self, name: str) -> None:
|
||||
"""Initialize internal variables."""
|
||||
self.name = name
|
||||
self.urls = set()
|
||||
self.urls: set[str] = set()
|
||||
self._indexes = ()
|
||||
self.skippedUrls = set()
|
||||
self.skippedUrls: set[str] = set()
|
||||
self.hitFirstStripUrl = False
|
||||
|
||||
def __hash__(self):
|
||||
def __hash__(self) -> int:
|
||||
"""Get hash value from name and index list."""
|
||||
return hash((self.name, self.indexes))
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
def shouldSkipUrl(self, url: str, data) -> bool:
|
||||
"""Determine if search for images in given URL should be skipped."""
|
||||
return False
|
||||
|
||||
def getComicStrip(self, url, data):
|
||||
def getComicStrip(self, url, data) -> ComicStrip:
|
||||
"""Get comic strip downloader for given URL and data."""
|
||||
imageUrls = self.extract_image_urls(url, data)
|
||||
urls = self.extract_image_urls(url, data)
|
||||
# map modifier function on image URLs
|
||||
imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
|
||||
urls = [self.imageUrlModifier(x, data) for x in urls]
|
||||
# remove duplicate URLs
|
||||
imageUrls = uniq(imageUrls)
|
||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||
urls = uniq(urls)
|
||||
if len(urls) > 1 and not self.multipleImagesPerStrip:
|
||||
out.warn(
|
||||
u"Found %d images instead of 1 at %s with expressions %s" %
|
||||
(len(imageUrls), url, prettyMatcherList(self.imageSearch)))
|
||||
image = imageUrls[0]
|
||||
out.warn(u"Choosing image %s" % image)
|
||||
imageUrls = (image,)
|
||||
elif not imageUrls:
|
||||
out.warn(u"Found no images at %s with expressions %s" % (url,
|
||||
(len(urls), url, prettyMatcherList(self.imageSearch)))
|
||||
image = urls[0]
|
||||
out.warn("Choosing image %s" % image)
|
||||
urls = (image,)
|
||||
elif not urls:
|
||||
out.warn("Found no images at %s with expressions %s" % (url,
|
||||
prettyMatcherList(self.imageSearch)))
|
||||
if self.textSearch:
|
||||
text = self.fetchText(url, data, self.textSearch,
|
||||
optional=self.textOptional)
|
||||
else:
|
||||
text = None
|
||||
return ComicStrip(self, url, imageUrls, text=text)
|
||||
return ComicStrip(self, url, urls, text=text)
|
||||
|
||||
def getStrips(self, maxstrips=None):
|
||||
"""Get comic strips."""
|
||||
|
@ -217,7 +217,7 @@ class Scraper:
|
|||
break
|
||||
url = prevUrl
|
||||
|
||||
def isfirststrip(self, url):
|
||||
def isfirststrip(self, url: str) -> bool:
|
||||
"""Check if the specified URL is the first strip of a comic. This is
|
||||
specially for comics taken from archive.org, since the base URL of
|
||||
archive.org changes whenever pages are taken from a different
|
||||
|
@ -228,7 +228,7 @@ class Scraper:
|
|||
currenturl = ARCHIVE_ORG_URL.sub('', url)
|
||||
return firsturl == currenturl
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
def getPrevUrl(self, url: str, data) -> str | None:
|
||||
"""Find previous URL."""
|
||||
prevUrl = None
|
||||
if self.prevSearch:
|
||||
|
@ -243,40 +243,40 @@ class Scraper:
|
|||
getHandler().comicPageLink(self, url, prevUrl)
|
||||
return prevUrl
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
def getIndexStripUrl(self, index: str) -> str:
|
||||
"""Get comic strip URL from index."""
|
||||
return self.stripUrl % index
|
||||
|
||||
def starter(self):
|
||||
def starter(self) -> str:
|
||||
"""Get starter URL from where to scrape comic strips."""
|
||||
return self.url
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
def namer(self, image_url: str, page_url: str) -> str | None:
|
||||
"""Return filename for given image and page URL."""
|
||||
return
|
||||
|
||||
def link_modifier(self, fromurl, tourl):
|
||||
def link_modifier(self, fromurl: str, tourl: str) -> str:
|
||||
"""Optional modification of parsed link (previous/back/latest) URLs.
|
||||
Useful if there are domain redirects. The default implementation does
|
||||
not modify the URL.
|
||||
"""
|
||||
return tourl
|
||||
|
||||
def imageUrlModifier(self, image_url, data):
|
||||
def imageUrlModifier(self, image_url: str, data) -> str:
|
||||
"""Optional modification of parsed image URLs. Useful if the URL
|
||||
needs to be fixed before usage. The default implementation does
|
||||
not modify the URL. The given data is the URL page data.
|
||||
"""
|
||||
return image_url
|
||||
|
||||
def vote(self):
|
||||
def vote(self) -> None:
|
||||
"""Cast a public vote for this comic."""
|
||||
uid = get_system_uid()
|
||||
data = {"name": self.name.replace('/', '_'), "uid": uid}
|
||||
response = self.session.post(configuration.VoteUrl, data=data)
|
||||
response.raise_for_status()
|
||||
|
||||
def get_download_dir(self, basepath):
|
||||
def get_download_dir(self, basepath: str) -> str:
|
||||
"""Try to find the corect download directory, ignoring case
|
||||
differences."""
|
||||
path = basepath
|
||||
|
@ -294,16 +294,16 @@ class Scraper:
|
|||
path = os.path.join(path, part)
|
||||
return path
|
||||
|
||||
def getCompleteFile(self, basepath):
|
||||
def getCompleteFile(self, basepath: str) -> str:
|
||||
"""Get filename indicating all comics are downloaded."""
|
||||
dirname = self.get_download_dir(basepath)
|
||||
return os.path.join(dirname, "complete.txt")
|
||||
|
||||
def isComplete(self, basepath):
|
||||
def isComplete(self, basepath: str) -> bool:
|
||||
"""Check if all comics are downloaded."""
|
||||
return os.path.isfile(self.getCompleteFile(basepath))
|
||||
|
||||
def setComplete(self, basepath):
|
||||
def setComplete(self, basepath: str) -> None:
|
||||
"""Set complete flag for this comic, ie. all comics are downloaded."""
|
||||
if self.endOfLife:
|
||||
filename = self.getCompleteFile(basepath)
|
||||
|
@ -521,15 +521,10 @@ class ParserScraper(Scraper):
|
|||
return text.strip()
|
||||
|
||||
def _matchPattern(self, data, patterns):
|
||||
if self.css:
|
||||
searchFun = data.cssselect
|
||||
else:
|
||||
def searchFun(s):
|
||||
return data.xpath(s, namespaces=NS)
|
||||
patterns = makeSequence(patterns)
|
||||
for search in patterns:
|
||||
matched = False
|
||||
for match in searchFun(search):
|
||||
for match in self.match(data, search):
|
||||
matched = True
|
||||
yield match, search
|
||||
|
||||
|
@ -537,6 +532,13 @@ class ParserScraper(Scraper):
|
|||
# do not search other links if one pattern matched
|
||||
break
|
||||
|
||||
def match(self, data, pattern):
|
||||
"""Match a pattern (XPath/CSS) against a page."""
|
||||
if self.css:
|
||||
return data.cssselect(pattern)
|
||||
else:
|
||||
return data.xpath(pattern, namespaces=NS)
|
||||
|
||||
def getDisabledReasons(self):
|
||||
res = {}
|
||||
if self.css and cssselect is None:
|
||||
|
|
|
@ -17,7 +17,6 @@ classifiers = [
|
|||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
|
@ -27,15 +26,13 @@ classifiers = [
|
|||
"Topic :: Multimedia :: Graphics",
|
||||
]
|
||||
keywords = ["comic", "webcomic", "downloader", "archiver", "crawler"]
|
||||
requires-python = ">=3.7"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"colorama",
|
||||
"imagesize",
|
||||
"lxml>=4.0.0",
|
||||
"platformdirs",
|
||||
"requests>=2.0",
|
||||
"cached_property;python_version<'3.8'",
|
||||
"importlib_metadata;python_version<'3.8'",
|
||||
"importlib_resources>=5.0.0;python_version<'3.9'",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
@ -101,7 +98,7 @@ ignore = [
|
|||
]
|
||||
noqa-require-code = true
|
||||
no-accept-encodings = true
|
||||
min-version = "3.7"
|
||||
min-version = "3.8"
|
||||
extend-exclude = [
|
||||
'.venv',
|
||||
'build',
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019-2022 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019 Thomas W. Littauer
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||
"""
|
||||
Script to get a list of comicskingdom.com comics and save the info in a JSON
|
||||
file for further processing.
|
||||
|
@ -19,39 +19,17 @@ class ComicsKingdomUpdater(ComicListUpdater):
|
|||
"ComicGenesis/%s",
|
||||
)
|
||||
|
||||
def handle_startpage(self, page):
|
||||
"""Parse list of comics from the bottom of the start page."""
|
||||
for li in page.xpath('//div[d:class("comics-list")]//li', namespaces=NS):
|
||||
link = li.xpath('./a')[0]
|
||||
def handle_listing(self, page):
|
||||
for link in page.xpath('//ul[d:class("index")]//a', namespaces=NS):
|
||||
name = link.text_content().removeprefix('The ')
|
||||
url = link.attrib['href']
|
||||
name = link.text.removeprefix('The ')
|
||||
lang = 'es' if ' (Spanish)' in name else None
|
||||
|
||||
self.add_comic(name, (url, None))
|
||||
|
||||
def handle_listing(self, page, lang: str = None, add: str = ''):
|
||||
|
||||
hasnew = True
|
||||
while hasnew:
|
||||
hasnew = False
|
||||
for comicdiv in page.xpath('//div[d:class("tile")]', namespaces=NS):
|
||||
nametag = comicdiv.xpath('./a/comic-name')
|
||||
if len(nametag) == 0:
|
||||
continue
|
||||
name = nametag[0].text.removeprefix('The ') + add
|
||||
url = comicdiv.xpath('./a')[0].attrib['href']
|
||||
|
||||
if self.add_comic(name, (url, lang)):
|
||||
hasnew = True
|
||||
|
||||
nextlink = page.xpath('//a[./img[contains(@src, "page-right")]]')
|
||||
page = self.get_url(nextlink[0].attrib['href'])
|
||||
self.add_comic(name, (url, lang))
|
||||
|
||||
def collect_results(self):
|
||||
"""Parse all search result pages."""
|
||||
page = self.get_url('https://www.comicskingdom.com/')
|
||||
self.handle_startpage(page)
|
||||
self.handle_listing(page)
|
||||
self.handle_listing(self.get_url('https://www.comicskingdom.com/spanish'), 'es', 'Spanish')
|
||||
self.handle_listing(self.get_url('https://comicskingdom.com/features'))
|
||||
|
||||
def get_entry(self, name: str, data: tuple[str, str]):
|
||||
opt = f", lang='{data[1]}'" if data[1] else ''
|
||||
|
|
|
@ -1,28 +1,30 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2017-2020 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2017 Tobias Gruetzmacher
|
||||
|
||||
import re
|
||||
from importlib import metadata
|
||||
|
||||
# Idea from
|
||||
# https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Setuptools-Entry-Point,
|
||||
# but with importlib
|
||||
def Entrypoint(group, name, **kwargs):
|
||||
import re
|
||||
try:
|
||||
from importlib.metadata import entry_points
|
||||
except ImportError:
|
||||
from importlib_metadata import entry_points
|
||||
|
||||
def entrypoint(group, name, **kwargs):
|
||||
# get the entry point
|
||||
eps = entry_points()[group]
|
||||
ep = next(ep for ep in eps if ep.name == name)
|
||||
module, attr = re.split(r'\s*:\s*', ep.value, 1)
|
||||
eps = metadata.entry_points()
|
||||
if 'select' in dir(eps):
|
||||
# modern
|
||||
ep = eps.select(group=group)[name]
|
||||
else:
|
||||
# legacy (pre-3.10)
|
||||
ep = next(ep for ep in eps[group] if ep.name == name)
|
||||
module, attr = re.split(r'\s*:\s*', ep.value, maxsplit=1)
|
||||
|
||||
# script name must not be a valid module name to avoid name clashes on import
|
||||
script_path = os.path.join(workpath, name + '-script.py')
|
||||
print("creating script for entry point", group, name)
|
||||
with open(script_path, 'w') as fh:
|
||||
with open(script_path, mode='w', encoding='utf-8') as fh:
|
||||
print("import sys", file=fh)
|
||||
print("import", module, file=fh)
|
||||
print("sys.exit(%s.%s())" % (module, attr), file=fh)
|
||||
print(f"sys.exit({module}.{attr}())", file=fh)
|
||||
|
||||
return Analysis(
|
||||
[script_path] + kwargs.get('scripts', []),
|
||||
|
@ -30,7 +32,7 @@ def Entrypoint(group, name, **kwargs):
|
|||
)
|
||||
|
||||
|
||||
a = Entrypoint('console_scripts', 'dosage')
|
||||
a = entrypoint('console_scripts', 'dosage')
|
||||
|
||||
a.binaries = [x for x in a.binaries if not x[1].lower().startswith(r'c:\windows')]
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
"""
|
||||
Script to get a list of gocomics and save the info in a JSON file for further
|
||||
processing.
|
||||
|
@ -20,6 +20,8 @@ class GoComicsUpdater(ComicListUpdater):
|
|||
excluded_comics = (
|
||||
# too short
|
||||
'LukeyMcGarrysTLDR',
|
||||
# Has its own module
|
||||
'Widdershins',
|
||||
)
|
||||
|
||||
def handle_gocomics(self, url, outercss='a.gc-blended-link', lang=None):
|
||||
|
|
|
@ -61,6 +61,9 @@ def create_symlinks(d):
|
|||
else:
|
||||
order.extend(data["pages"][work]["images"].values())
|
||||
if "prev" in data["pages"][work]:
|
||||
if data["pages"][work]["prev"] == work:
|
||||
work = None
|
||||
else:
|
||||
work = data["pages"][work]["prev"]
|
||||
else:
|
||||
work = None
|
||||
|
|
|
@ -3,12 +3,15 @@
|
|||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
import re
|
||||
from operator import attrgetter
|
||||
|
||||
import pytest
|
||||
|
||||
from dosagelib.scraper import scrapers
|
||||
from dosagelib.plugins import old
|
||||
|
||||
|
||||
class TestComicNames(object):
|
||||
class TestComicNames:
|
||||
|
||||
def test_names(self):
|
||||
for scraperobj in scrapers.all():
|
||||
|
@ -20,10 +23,10 @@ class TestComicNames(object):
|
|||
comicname = name
|
||||
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
||||
|
||||
def test_renamed(self):
|
||||
for scraperobj in scrapers.all(include_removed=True):
|
||||
if not isinstance(scraperobj, old.Renamed):
|
||||
continue
|
||||
@pytest.mark.parametrize(('scraperobj'),
|
||||
[obj for obj in scrapers.all(include_removed=True)
|
||||
if isinstance(obj, old.Renamed)], ids=attrgetter('name'))
|
||||
def test_renamed(self, scraperobj):
|
||||
assert len(scraperobj.getDisabledReasons()) > 0
|
||||
# Renamed scraper should only point to an non-disabled scraper
|
||||
newscraper = scrapers.find(scraperobj.newname)
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
||||
|
||||
|
||||
class TestNamer(object):
|
||||
class TestNamer:
|
||||
"""
|
||||
Tests for comic namer.
|
||||
"""
|
||||
|
@ -16,6 +16,8 @@ class TestNamer(object):
|
|||
def test_joinPathPartsNamer(self):
|
||||
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
||||
pageurl = 'https://HOST/2019/03/11/12450/'
|
||||
assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||
assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||
assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450'
|
||||
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self,
|
||||
imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self,
|
||||
imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||
assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450'
|
||||
|
|
3
tox.ini
3
tox.ini
|
@ -1,10 +1,9 @@
|
|||
[tox]
|
||||
envlist = py37, py38, py39, py310, py311, py312, flake8
|
||||
envlist = py38, py39, py310, py311, py312, flake8
|
||||
isolated_build = True
|
||||
|
||||
[gh-actions]
|
||||
python =
|
||||
3.7: py37
|
||||
3.8: py38
|
||||
3.9: py39
|
||||
3.10: py310
|
||||
|
|
Loading…
Reference in a new issue