diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 91fc9ef4a..837f3e0df 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 @@ -32,7 +32,7 @@ jobs: if: ${{ matrix.python-version != env.DEFAULT_PYTHON }} - name: Test with tox (and upload coverage) - uses: paambaati/codeclimate-action@v5.0.0 + uses: paambaati/codeclimate-action@v8.0.0 if: ${{ matrix.python-version == env.DEFAULT_PYTHON }} env: CC_TEST_REPORTER_ID: 2a411f596959fc32f5d73f3ba7cef8cc4d5733299d742dbfc97fd6c190b9010c @@ -42,6 +42,6 @@ jobs: ${{ github.workspace }}/.tox/reports/*/coverage.xml:coverage.py prefix: ${{ github.workspace }}/.tox/py39/lib/python3.9/site-packages - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: directory: '.tox/reports' diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 14e3a7ce3..4a9c29eb4 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -5,12 +5,19 @@ on: push: branches: - master + workflow_dispatch: permissions: - contents: write + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false jobs: - deploy: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -28,10 +35,24 @@ jobs: pip install wheel pip install git+https://github.com/spanezz/staticsite.git@v2.3 ssite build --output public + cd public + rm -rf Jenkinsfile dosagelib scripts tests - - name: Deploy - uses: peaceiris/actions-gh-pages@v3 + - name: Setup Pages + id: pages + uses: actions/configure-pages@v5 + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 with: - cname: dosage.rocks - github_token: ${{ secrets.GITHUB_TOKEN }} - exclude_assets: 'Jenkinsfile,dosagelib,scripts,setup.*,tests,*.ini' + path: public + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/COPYING b/COPYING index 7233a8518..bd9871497 100644 --- a/COPYING +++ b/COPYING @@ -1,6 +1,6 @@ Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs Copyright (C) 2012-2014 Bastian Kleineidam -Copyright (C) 2015-2022 Tobias Gruetzmacher +Copyright (C) 2015-2024 Tobias Gruetzmacher Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Jenkinsfile b/Jenkinsfile index db7d2c10c..1d60b1969 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,7 +4,6 @@ def pys = [ [name: 'Python 3.10', docker: '3.10-bookworm', tox:'py310', main: false], [name: 'Python 3.9', docker: '3.9-bookworm', tox:'py39', main: false], [name: 'Python 3.8', docker: '3.8-bookworm', tox:'py38', main: false], - [name: 'Python 3.7', docker: '3.7-bookworm', tox:'py37', main: false], ] properties([ @@ -75,7 +74,7 @@ pys.each { py -> parallel(tasks) parallel modern: { stage('Modern Windows binary') { - windowsBuild('3.11', 'dosage.exe') + windowsBuild('3.12', 'dosage.exe') } }, legacy: { diff --git a/README.md b/README.md index d87bfaf4f..fb46a453a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # Dosage -[![Tests](https://github.com/webcomics/dosage/actions/workflows/test.yml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/test.yml) +[![CI](https://github.com/webcomics/dosage/actions/workflows/ci.yaml/badge.svg)](https://github.com/webcomics/dosage/actions/workflows/ci.yaml) [![Code Climate](https://codeclimate.com/github/webcomics/dosage/badges/gpa.svg)](https://codeclimate.com/github/webcomics/dosage) [![codecov](https://codecov.io/gh/webcomics/dosage/branch/master/graph/badge.svg)](https://codecov.io/gh/webcomics/dosage) -![Maintenance](https://img.shields.io/maintenance/yes/2023.svg) +![Maintenance](https://img.shields.io/maintenance/yes/2024.svg) ![License](https://img.shields.io/github/license/webcomics/dosage) Dosage is designed to keep a local copy of specific webcomics and other @@ -72,7 +72,7 @@ are old enough to view them. ### Dependencies Since dosage is written in [Python](http://www.python.org/), a Python -installation is required: Dosage needs at least Python 3.7. Dosage requires +installation is required: Dosage needs at least Python 3.8. Dosage requires some Python modules from PyPI, so installation with `pip` is recommended. ### Using the Windows binary diff --git a/dosagelib/__init__.py b/dosagelib/__init__.py index 4f80013df..39ba36033 100644 --- a/dosagelib/__init__.py +++ b/dosagelib/__init__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher """ Automated comic downloader. Dosage traverses comic websites in order to download each strip of the comic. The intended use is for @@ -14,14 +14,11 @@ The primary interface is the 'dosage' commandline script. Comic modules for each comic are located in L{dosagelib.plugins}. """ -try: - from importlib.metadata import version, PackageNotFoundError -except ImportError: - from importlib_metadata import version, PackageNotFoundError +from importlib.metadata import version, PackageNotFoundError from .output import out -AppName = u'dosage' +AppName = 'dosage' try: __version__ = version(AppName) # PEP 396 except PackageNotFoundError: diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 20374c126..222549e14 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -1,12 +1,15 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +from __future__ import annotations + import os import glob import codecs import contextlib from datetime import datetime +from typing import Iterator from .output import out from .util import unquote, getFilename, urlopen, strsize @@ -14,27 +17,27 @@ from .events import getHandler # Maximum content size for images -MaxImageBytes = 1024 * 1024 * 20 # 20 MB +MAX_IMAGE_BYTES = 1024 * 1024 * 20 # 20 MB # RFC 1123 format, as preferred by RFC 2616 RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" -class ComicStrip(object): +class ComicStrip: """A list of comic image URLs.""" - def __init__(self, scraper, strip_url, image_urls, text=None): + def __init__(self, scraper, strip_url: str, image_urls: str, text=None) -> None: """Store the image URL list.""" self.scraper = scraper self.strip_url = strip_url self.image_urls = image_urls self.text = text - def getImages(self): + def getImages(self) -> Iterator[ComicImage]: """Get a list of image downloaders.""" for image_url in self.image_urls: yield self.getDownloader(image_url) - def getDownloader(self, url): + def getDownloader(self, url: str) -> ComicImage: """Get an image downloader.""" filename = self.scraper.namer(url, self.strip_url) if filename is None: @@ -43,7 +46,7 @@ class ComicStrip(object): text=self.text) -class ComicImage(object): +class ComicImage: """A comic image downloader.""" ChunkBytes = 1024 * 100 # 100KB @@ -64,7 +67,7 @@ class ComicImage(object): headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR) self.urlobj = urlopen(self.url, self.scraper.session, referrer=self.referrer, - max_content_bytes=MaxImageBytes, stream=True, + max_content_bytes=MAX_IMAGE_BYTES, stream=True, headers=headers) if self.urlobj.status_code == 304: # Not modified return diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index d53e04cfb..b3e4f00cc 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -1,39 +1,49 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring +from __future__ import annotations + +from typing import Protocol + from .util import getQueryParams +from .scraper import Scraper -def queryNamer(param, use_page_url=False): +class Namer(Protocol): + """A protocol for generic callbacks to name web comic images.""" + def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None: + ... + + +def queryNamer(param, use_page_url=False) -> Namer: """Get name from URL query part.""" - def _namer(self, image_url, page_url): + def _namer(self, image_url: str, page_url: str) -> str | None: """Get URL query part.""" url = page_url if use_page_url else image_url return getQueryParams(url)[param][0] return _namer -def regexNamer(regex, use_page_url=False): +def regexNamer(regex, use_page_url=False) -> Namer: """Get name from regular expression.""" - def _namer(self, image_url, page_url): + def _namer(self, image_url: str, page_url: str) -> str | None: """Get first regular expression group.""" url = page_url if use_page_url else image_url mo = regex.search(url) - if mo: - return mo.group(1) + return mo.group(1) if mo else None return _namer -def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'): +def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer: """Get name by mashing path parts together with underscores.""" - def _namer(self, imageurl, pageurl): + def _namer(self: Scraper, image_url: str, page_url: str) -> str | None: # Split and drop host name - pageurlsplit = pageurl.split('/')[3:] - imageurlsplit = imageurl.split('/')[3:] - joinparts = ([pageurlsplit[i] for i in pageurlparts] + - [imageurlsplit[i] for i in imageurlparts]) + pagesplit = page_url.split('/')[3:] + imagesplit = image_url.split('/')[3:] + joinparts = ([pagesplit[i] for i in pageparts] + + [imagesplit[i] for i in imageparts]) return joinchar.join(joinparts) return _namer diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index e3fc866ca..fd57d55a6 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -1,18 +1,18 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape, sub, MULTILINE from ..util import tagre -from ..scraper import BasicScraper, ParserScraper, _BasicScraper, _ParserScraper -from ..helpers import regexNamer, bounceStarter, indirectStarter +from ..scraper import ParserScraper, _BasicScraper, _ParserScraper +from ..helpers import joinPathPartsNamer, bounceStarter, indirectStarter from .common import WordPressScraper, WordPressNavi, WordPressWebcomic -class AbstruseGoose(_ParserScraper): - url = 'https://abstrusegoose.com/' +class AbstruseGoose(ParserScraper): + url = 'https://web.archive.org/web/20230930172141/https://abstrusegoose.com/' starter = bounceStarter stripUrl = url + '%s' firstStripUrl = stripUrl % '1' @@ -41,24 +41,16 @@ class AbsurdNotions(_BasicScraper): help = 'Index format: n (unpadded)' -class AcademyVale(_BasicScraper): - url = 'http://www.imagerie.com/vale/' - stripUrl = url + 'avarch.cgi?%s' - firstStripUrl = stripUrl % '001' - imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) - prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + - tagre('img', 'src', r'AVNavBack\.gif')) - help = 'Index format: nnn' - - -class Achewood(_ParserScraper): - url = 'https://www.achewood.com/' - stripUrl = url + 'index.php?date=%s' - firstStripUrl = stripUrl % '10012001' - imageSearch = '//p[@id="comic_body"]//img' - prevSearch = '//span[d:class("left")]/a[d:class("dateNav")]' - help = 'Index format: mmddyyyy' - namer = regexNamer(compile(r'date=(\d+)')) +class Achewood(ParserScraper): + baseUrl = 'https://achewood.com/' + stripUrl = baseUrl + '%s/title.html' + url = stripUrl % '2016/12/25' + firstStripUrl = stripUrl % '2001/10/01' + imageSearch = '//img[d:class("comicImage")]' + prevSearch = '//a[d:class("comic_prev")]' + namer = joinPathPartsNamer(pageparts=range(0, 2)) + help = 'Index format: yyyy/mm/dd' + endOfLife = True class AdventuresOfFifne(_ParserScraper): @@ -117,12 +109,8 @@ class AhoiPolloi(_ParserScraper): help = 'Index format: yyyymmdd' -class AhoyEarth(WordPressNavi): - url = 'http://www.ahoyearth.com/' - - class AirForceBlues(WordPressScraper): - url = 'http://farvatoons.com/' + url = 'https://web.archive.org/web/20210102113825/http://farvatoons.com/' firstStripUrl = url + 'comic/in-texas-there-are-texans/' @@ -235,14 +223,11 @@ class AltermetaOld(_ParserScraper): help = 'Index format: n (unpadded)' -class AmazingSuperPowers(_BasicScraper): - url = 'http://www.amazingsuperpowers.com/' - rurl = escape(url) +class AmazingSuperPowers(WordPressNavi): + url = 'https://www.amazingsuperpowers.com/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2007/09/heredity' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/name' + imageSearch = '//div[d:class("comicpane")]/img' def shouldSkipUrl(self, url, data): """Skip pages without images.""" @@ -271,19 +256,7 @@ class Amya(WordPressScraper): url = 'http://www.amyachronicles.com/' -class Anaria(_ParserScraper): - url = 'https://www.leahbriere.com/anaria-the-witchs-dream/' - firstStripUrl = url - imageSearch = '//div[contains(@class, "gallery")]//a' - multipleImagesPerStrip = True - endOfLife = True - - def namer(self, imageUrl, pageUrl): - filename = imageUrl.rsplit('/', 1)[-1] - return filename.replace('00.jpg', 'new00.jpg').replace('new', '1') - - -class Angband(_ParserScraper): +class Angband(ParserScraper): url = 'http://angband.calamarain.net/' stripUrl = url + '%s' imageSearch = '//img' @@ -292,7 +265,7 @@ class Angband(_ParserScraper): def starter(self): page = self.getPage(self.url) - self.pages = page.xpath('//p/a[not(contains(@href, "cast"))]/@href') + self.pages = self.match(page, '//p/a[not(contains(@href, "cast"))]/@href') self.firstStripUrl = self.pages[0] return self.pages[-1] @@ -300,14 +273,6 @@ class Angband(_ParserScraper): return self.pages[self.pages.index(url) - 1] -class Angels2200(_BasicScraper): - url = 'http://www.janahoffmann.com/angels/' - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous") - help = 'Index format: yyyy/mm/dd/part--comic-' - - class Annyseed(_ParserScraper): baseUrl = ('https://web.archive.org/web/20190511031451/' 'http://www.mirrorwoodcomics.com/') @@ -330,7 +295,7 @@ class Annyseed(_ParserScraper): return tourl -class AntiheroForHire(_ParserScraper): +class AntiheroForHire(ParserScraper): stripUrl = 'https://www.giantrobot.club/antihero-for-hire/%s' firstStripUrl = stripUrl % '2016/6/8/entrance-vigil' url = firstStripUrl @@ -341,7 +306,7 @@ class AntiheroForHire(_ParserScraper): def starter(self): # Build list of chapters for navigation page = self.getPage(self.url) - self.chapters = page.xpath('//ul[@class="archive-group-list"]//a[contains(@class, "archive-item-link")]/@href') + self.chapters = self.match(page, '//ul[d:class("archive-group-list")]//a[d:class("archive-item-link")]/@href') return self.chapters[0] def getPrevUrl(self, url, data): @@ -377,7 +342,7 @@ class ArtificialIncident(WordPressWebcomic): firstStripUrl = stripUrl % 'issue-one-life-changing' -class AstronomyPOTD(_ParserScraper): +class AstronomyPOTD(ParserScraper): baseUrl = 'http://apod.nasa.gov/apod/' url = baseUrl + 'astropix.html' starter = bounceStarter @@ -391,7 +356,7 @@ class AstronomyPOTD(_ParserScraper): def shouldSkipUrl(self, url, data): """Skip pages without images.""" - return data.xpath('//iframe') # videos + return self.match(data, '//iframe') # videos def namer(self, image_url, page_url): return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:], diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index c596ede60..78b17399a 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -34,11 +34,11 @@ class CaptainSNES(_BasicScraper): help = 'Index format: yyyy/mm/dd/nnn-stripname' -class CarryOn(_ParserScraper): +class CarryOn(ParserScraper): url = 'http://www.hirezfox.com/km/co/' stripUrl = url + 'd/%s.html' firstStripUrl = stripUrl % '20040701' - imageSearch = '//div[@class="strip"]/img' + imageSearch = '//div[d:class("strip")]/img' prevSearch = '//a[text()="Previous Day"]' multipleImagesPerStrip = True @@ -122,13 +122,13 @@ class CatAndGirl(_ParserScraper): prevSearch = '//a[d:class("pager--prev")]' -class CatenaManor(_ParserScraper): +class CatenaManor(ParserScraper): baseUrl = ('https://web.archive.org/web/20141027141116/' 'http://catenamanor.com/') url = baseUrl + 'archives' stripUrl = baseUrl + '%s/' firstStripUrl = stripUrl % '2003/07' - imageSearch = '//img[@class="comicthumbnail"]' + imageSearch = '//img[d:class("comicthumbnail")]' multipleImagesPerStrip = True endOfLife = True strips: List[str] = [] @@ -136,7 +136,7 @@ class CatenaManor(_ParserScraper): def starter(self): # Retrieve archive links and select valid range archivePage = self.getPage(self.url) - archiveStrips = archivePage.xpath('//div[@id="archivepage"]//a') + archiveStrips = self.match(archivePage, '//div[@id="archivepage"]//a') valid = False for link in archiveStrips: if self.stripUrl % '2012/01' in link.get('href'): @@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper): class CSectionComics(WordPressScraper): url = 'https://www.csectioncomics.com/' firstStripUrl = url + 'comics/one-day-in-country' - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) multipleImagesPerStrip = True @@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper): prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]' nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]' starter = bounceStarter - namer = joinPathPartsNamer((), range(-4, 0)) + namer = joinPathPartsNamer(imageparts=range(-4, 0)) class CynWolf(_ParserScraper): diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index 0a7a9c108..f5962db33 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring import os from ..scraper import ParserScraper @@ -79,7 +79,7 @@ class ComicFury(ParserScraper): num = parts[-1] if self.multipleImagesPerStrip: page = self.getPage(pageUrl) - images = page.xpath('//img[@class="comicsegmentimage"]/@src') + images = self.match(page, '//img[d:class("comicsegmentimage")]/@src') if len(images) > 1: imageIndex = images.index(imageUrl) + 1 return "%s_%s-%d%s" % (self.prefix, num, imageIndex, ext) @@ -88,8 +88,8 @@ class ComicFury(ParserScraper): def shouldSkipUrl(self, url, data): """Skip pages without images.""" # Videos on Underverse - return (data.xpath('//div[@id="comicimagewrap"]//video') and - not data.xpath('//div[@id="comicimagewrap"]//img')) + return (self.match(data, '//div[@id="comicimagewrap"]//video') and + not self.match(data, '//div[@id="comicimagewrap"]//img')) @classmethod def getmodules(cls): # noqa: CFQ001 diff --git a/dosagelib/plugins/comicskingdom.py b/dosagelib/plugins/comicskingdom.py index 818a37fa7..372cf8933 100644 --- a/dosagelib/plugins/comicskingdom.py +++ b/dosagelib/plugins/comicskingdom.py @@ -1,41 +1,35 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Thomas W. Littauer -try: - from importlib_resources import as_file, files -except ImportError: - from importlib.resources import as_file, files - -from ..helpers import bounceStarter, joinPathPartsNamer +from ..helpers import indirectStarter from ..scraper import ParserScraper class ComicsKingdom(ParserScraper): - imageSearch = '//img[@id="theComicImage"]' - prevSearch = '//a[./img[contains(@alt, "Previous")]]' - nextSearch = '//a[./img[contains(@alt, "Next")]]' - starter = bounceStarter - namer = joinPathPartsNamer((-2, -1), ()) + partDiv = '//div[d:class("comic-reader-item")]' + imageSearch = '//meta[@property="og:image"]/@content' + prevSearch = partDiv + '[2]/@data-link' + starter = indirectStarter help = 'Index format: yyyy-mm-dd' def __init__(self, name, path, lang=None): super().__init__('ComicsKingdom/' + name) self.url = 'https://comicskingdom.com/' + path self.stripUrl = self.url + '/%s' + self.latestSearch = f'//a[re:test(@href, "/{path}/[0-9-]+$")]' if lang: self.lang = lang + def link_modifier(self, fromurl, tourl): + return tourl.replace('//wp.', '//', 1) + @classmethod def getmodules(cls): # noqa: CFQ001 return ( - # Some comics are not listed on the "all" page (too old?) - cls('Retail', 'retail'), - # do not edit anything below since these entries are generated from # scripts/comicskingdom.py # START AUTOUPDATE - cls('AmazingSpiderman', 'amazing-spider-man'), - cls('AmazingSpidermanSpanish', 'hombre-arana', lang='es'), + cls('Alice', 'alice'), cls('Apartment3G', 'apartment-3-g_1'), cls('ArcticCircle', 'arctic-circle'), cls('ATodaVelocidadSpanish', 'a-toda-velocidad', lang='es'), @@ -43,22 +37,25 @@ class ComicsKingdom(ParserScraper): cls('BarneyGoogleAndSnuffySmithSpanish', 'tapon', lang='es'), cls('BeetleBailey', 'beetle-bailey-1'), cls('BeetleBaileySpanish', 'beto-el-recluta', lang='es'), + cls('BeetleMoses', 'beetle-moses'), cls('BetweenFriends', 'between-friends'), + cls('BewareOfToddler', 'beware-of-toddler'), cls('BigBenBolt', 'big-ben-bolt'), - cls('BigBenBoltSundays', 'big-ben-bolt-sundays'), cls('Bizarro', 'bizarro'), cls('Blondie', 'blondie'), cls('BlondieSpanish', 'pepita', lang='es'), + cls('BobMankoffPresentsShowMeTheFunny', 'show-me-the-funny'), + cls('BobMankoffPresentsShowMeTheFunnyAnimalEdition', 'show-me-the-funny-pets'), cls('BonersArk', 'boners-ark'), - cls('BonersArkSundays', 'boners-ark-sundays'), - cls('BrianDuffy', 'brian-duffy'), + cls('BreakOfDay', 'break-of-day'), cls('BrickBradford', 'brick-bradford'), cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'), cls('BringingUpFather', 'bringing-up-father'), cls('BringingUpFatherSpanish', 'educando-a-papa', lang='es'), cls('BuzSawyer', 'buz-sawyer'), + cls('Candorville', 'candorville'), cls('CarpeDiem', 'carpe-diem'), - cls('Crankshaft', 'crankshaft'), + cls('Comiclicious', 'comiclicious'), cls('Crock', 'crock'), cls('CrockSpanish', 'crock-spanish', lang='es'), cls('Curtis', 'curtis'), @@ -67,6 +64,7 @@ class ComicsKingdom(ParserScraper): cls('DavidMHitch', 'david-m-hitch'), cls('DennisTheMenace', 'dennis-the-menace'), cls('DennisTheMenaceSpanish', 'daniel-el-travieso', lang='es'), + cls('Dumplings', 'dumplings'), cls('Dustin', 'dustin'), cls('EdGamble', 'ed-gamble'), # EdgeCity has a duplicate in GoComics/EdgeCity @@ -74,18 +72,15 @@ class ComicsKingdom(ParserScraper): cls('FamilyCircusSpanish', 'circulo-familiar', lang='es'), cls('FlashForward', 'flash-forward'), cls('FlashGordon', 'flash-gordon'), - cls('FlashGordonSundays', 'flash-gordon-sundays'), - cls('FunkyWinkerbean', 'funky-winkerbean'), - cls('FunkyWinkerbeanSunday', 'funky-winkerbean-sundays'), - cls('FunkyWinkerbeanVintage', 'funky-winkerbean-1'), - cls('FunnyOnlineAnimals', 'Funny-Online-Animals'), - cls('GearheadGertie', 'Gearhead-Gertie'), + cls('FunnyOnlineAnimals', 'funny-online-animals'), + cls('GearheadGertie', 'gearhead-gertie'), + cls('GodsHands', 'gods-hands'), cls('HagarTheHorrible', 'hagar-the-horrible'), cls('HagarTheHorribleSpanish', 'olafo', lang='es'), cls('HeartOfJulietJones', 'heart-of-juliet-jones'), - cls('HeartOfJulietJonesSundays', 'heart-of-juliet-jones-sundays'), cls('HiAndLois', 'hi-and-lois'), - cls('IntelligentLife', 'Intelligent'), + cls('InsanityStreak', 'insanity-streak'), + cls('IntelligentLife', 'intelligent'), cls('JimmyMargulies', 'jimmy-margulies'), cls('JohnBranch', 'john-branch'), cls('JohnnyHazard', 'johnny-hazard'), @@ -93,7 +88,6 @@ class ComicsKingdom(ParserScraper): cls('JungleJimSundays', 'jungle-jim-sundays'), cls('KatzenjammerKids', 'katzenjammer-kids'), cls('KatzenjammerKidsSpanish', 'maldades-de-dos-pilluelos', lang='es'), - cls('KatzenjammerKidsSundays', 'katzenjammer-kids-sundays'), cls('KevinAndKell', 'kevin-and-kell'), cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'), cls('KirkWalters', 'kirk-walters'), @@ -101,44 +95,42 @@ class ComicsKingdom(ParserScraper): cls('LaloYLolaSpanish', 'lalo-y-lola', lang='es'), cls('LeeJudge', 'lee-judge'), cls('LegalizationNation', 'legalization-nation'), - cls('LegendOfBill', 'Legend-of-Bill'), + cls('LegendOfBill', 'legend-of-bill'), cls('LittleIodineSundays', 'little-iodine-sundays'), cls('LittleKing', 'the-little-king'), - cls('Lockhorns', 'lockhorns'), - cls('Macanudo', 'Macanudo'), + cls('Macanudo', 'macanudo'), cls('MacanudoSpanish', 'macanudo-spanish', lang='es'), cls('MallardFillmore', 'mallard-fillmore'), - cls('MandrakeTheMagician', 'mandrake-the-magician-1'), + cls('MandrakeTheMagician', 'mandrake-the-magician'), cls('MandrakeTheMagicianSpanish', 'mandrake-the-magician-spanish', lang='es'), - cls('MandrakeTheMagicianSundays', 'mandrake-the-magician-sundays'), + cls('MaraLlaveKeeperOfTime', 'mara-llave-keeper-of-time'), cls('MarkTrail', 'mark-trail'), cls('MarkTrailSpanish', 'mark-trail-spanish', lang='es'), - cls('MarkTrailVintage', 'Mark-Trail-Vintage'), cls('Marvin', 'marvin'), cls('MarvinSpanish', 'marvin-spanish', lang='es'), cls('MaryWorth', 'mary-worth'), cls('MaryWorthSpanish', 'maria-de-oro', lang='es'), - cls('MikePeters', 'mike-peters'), + cls('Mazetoons', 'mazetoons'), cls('MikeShelton', 'mike-shelton'), cls('MikeSmith', 'mike-smith'), cls('MooseAndMolly', 'moose-and-molly'), cls('MooseAndMollySpanish', 'quintin', lang='es'), - cls('MotherGooseAndGrimm', 'mother-goose-grimm'), cls('MrAbernathySpanish', 'don-abundio', lang='es'), cls('Mutts', 'mutts'), cls('MuttsSpanish', 'motas', lang='es'), + cls('NeverBeenDeader', 'never-been-deader'), cls('OfficeHours', 'office-hours'), + cls('OliveAndPopeye', 'olive-popeye'), cls('OnTheFastrack', 'on-the-fastrack'), cls('PajamaDiaries', 'pajama-diaries'), cls('PardonMyPlanet', 'pardon-my-planet'), cls('Phantom', 'phantom'), cls('PhantomSpanish', 'el-fantasma', lang='es'), - cls('PhantomSundays', 'phantom-sundays'), + cls('PlanetSyndicate', 'the_planet_syndicate'), cls('Popeye', 'popeye'), cls('PopeyesCartoonClub', 'popeyes-cartoon-club'), cls('PopeyeSpanish', 'popeye-spanish', lang='es'), cls('PrinceValiant', 'prince-valiant'), - cls('PrinceValiantSundays', 'prince-valiant-sundays'), cls('PrincipeValienteSpanish', 'principe-valiente', lang='es'), cls('ProsAndCons', 'pros-cons'), cls('Quincy', 'quincy'), @@ -148,7 +140,9 @@ class ComicsKingdom(ParserScraper): cls('RexMorganMDSpanish', 'rex-morgan-md-spanish', lang='es'), cls('RhymesWithOrange', 'rhymes-with-orange'), cls('RipKirby', 'rip-kirby'), + # Rosebuds has a duplicate in GoComics/Rosebuds cls('SafeHavens', 'safe-havens'), + cls('SagaOfBrannBjornson', 'the-saga-of-brann-bjornson'), cls('Sales', 'sales'), cls('SallyForth', 'sally-forth'), cls('SamAndSilo', 'sam-and-silo'), @@ -156,17 +150,18 @@ class ComicsKingdom(ParserScraper): cls('SecretAgentX9', 'secret-agent-x-9'), # Shoe has a duplicate in GoComics/Shoe cls('SixChix', 'six-chix'), - cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'), - cls('SlylockFoxAndComicsForKidsSpanish', 'solo-para-ninos', lang='es'), + cls('SlylockFox', 'slylock-fox-and-comics-for-kids'), + cls('SlylockFoxSpanish', 'solo-para-ninos', lang='es'), + cls('SuburbanFairyTales', 'suburban-fairy-tales'), cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'), cls('TheyllDoItEveryTimeSpanish', 'nunca-falta-alguien-asi', lang='es'), cls('ThimbleTheater', 'thimble-theater'), cls('Tiger', 'tiger'), cls('TigerSpanish', 'tigrillo', lang='es'), - cls('TigerVintage', 'tiger-1'), - cls('TigerVintageSundays', 'tiger-sundays'), cls('TinasGroove', 'tina-s-groove'), cls('ToddTheDinosaur', 'todd-the-dinosaur'), + cls('WillyBlack', 'willy-black'), + cls('WillyBlacksSpanish', 'willy-black-spanish', lang='es'), cls('ZippyThePinhead', 'zippy-the-pinhead'), cls('Zits', 'zits'), cls('ZitsSpanish', 'jeremias', lang='es'), diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index f7a2e1933..4b632cac9 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper, ParserScraper @@ -328,19 +328,14 @@ class DreamKeepersPrelude(_ParserScraper): help = 'Index format: n' -class DresdenCodak(_ParserScraper): +class DresdenCodak(ParserScraper): url = 'http://dresdencodak.com/' - startUrl = url + 'cat/comic/' firstStripUrl = url + '2007/02/08/pom/' imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]' prevSearch = '//a[img[contains(@src, "prev")]]' latestSearch = '//a[d:class("tc-grid-bg-link")]' starter = indirectStarter - # Blog and comic are mixed... - def shouldSkipUrl(self, url, data): - return not data.xpath(self.imageSearch) - class DrFun(_ParserScraper): baseUrl = ('https://web.archive.org/web/20180726145737/' @@ -355,14 +350,12 @@ class DrFun(_ParserScraper): help = 'Index format: nnnnn' -class Drive(_BasicScraper): +class Drive(ParserScraper): url = 'http://www.drivecomic.com/' - rurl = escape(url) - stripUrl = url + 'archive/%s.html' - firstStripUrl = stripUrl % '090815' - imageSearch = compile(tagre("img", "src", r'(http://cdn\.drivecomic\.com/strips/main/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl) + "Previous") - help = 'Index format: yymmdd' + firstStripUrl = url + 'comic/act-1-pg-001/' + imageSearch = ('//div[@id="unspliced-comic"]//img/@data-src-img', + '//div[@id="unspliced-comic"]//picture//img') + prevSearch = '//a[d:class("previous-comic")]' class DrMcNinja(_ParserScraper): diff --git a/dosagelib/plugins/derideal.py b/dosagelib/plugins/derideal.py index 7b8d2e298..ca75a2e73 100644 --- a/dosagelib/plugins/derideal.py +++ b/dosagelib/plugins/derideal.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from ..scraper import ParserScraper from ..helpers import indirectStarter @@ -27,7 +27,7 @@ class Derideal(ParserScraper): def starter(self): indexPage = self.getPage(self.url) - self.chapters = indexPage.xpath('//a[contains(text(), "Read this episode")]/@href') + self.chapters = self.match(indexPage, '//a[contains(text(), "Read this episode")]/@href') self.currentChapter = len(self.chapters) return indirectStarter(self) diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index d4ca493df..ad772be61 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -113,7 +113,7 @@ class Erfworld(ParserScraper): def shouldSkipUrl(self, url, data): """Skip pages without images.""" - return not data.xpath(self.imageSearch) + return not self.match(data, self.imageSearch) def namer(self, imageUrl, pageUrl): # Fix inconsistent filenames @@ -167,15 +167,6 @@ class Erstwhile(WordPressNavi): endOfLife = True -class Everblue(ComicControlScraper): - url = 'http://www.everblue-comic.com/comic/' - stripUrl = url + '%s' - firstStripUrl = stripUrl % '1' - - def namer(self, imageUrl, pageUrl): - return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1] - - class EverybodyLovesEricRaymond(_ParserScraper): url = 'http://geekz.co.uk/lovesraymond/' firstStripUrl = url + 'archive/slashdotted' @@ -190,9 +181,10 @@ class EvilDiva(WordPressScraper): endOfLife = True -class EvilInc(_ParserScraper): +class EvilInc(ParserScraper): url = 'https://www.evil-inc.com/' - imageSearch = '//div[@id="unspliced-comic"]/img/@data-src' + imageSearch = ('//div[@id="unspliced-comic"]/img', + '//div[@id="unspliced-comic"]/picture//img') prevSearch = '//a[./i[d:class("fa-chevron-left")]]' firstStripUrl = url + 'comic/monday-3/' @@ -263,7 +255,7 @@ class ExtraFabulousComics(WordPressScraper): return '_'.join((pagepart, imagename)) def shouldSkipUrl(self, url, data): - return data.xpath('//div[@id="comic"]//iframe') + return self.match(data, '//div[@id="comic"]//iframe') class ExtraLife(_BasicScraper): diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index d3f45ac98..360b6ba39 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -140,7 +140,7 @@ class FoxDad(ParserScraper): def namer(self, imageUrl, pageUrl): page = self.getPage(pageUrl) - post = page.xpath('//li[@class="timestamp"]/a/@href')[0] + post = self.match(page, '//li[d:class("timestamp")]/a/@href')[0] post = post.replace('https://foxdad.com/post/', '') if '-consider-support' in post: post = post.split('-consider-support')[0] @@ -171,7 +171,7 @@ class Fragile(_ParserScraper): endOfLife = True -class FredoAndPidjin(_ParserScraper): +class FredoAndPidjin(ParserScraper): url = 'https://www.pidjin.net/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2006/02/19/goofy-monday' @@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper): prevSearch = '//span[d:class("prev")]/a' latestSearch = '//section[d:class("latest")]//a' starter = indirectStarter - namer = joinPathPartsNamer((0, 1, 2)) + namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,)) class Freefall(_ParserScraper): @@ -216,7 +216,7 @@ class FriendsYouAreStuckWith(WordPressScraper): def namer(self, imageUrl, pageUrl): page = self.getPage(pageUrl) - strip = page.xpath('//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '') + strip = self.match(page, '//div[@id="comic-wrap"]/@class')[0].replace('comic-id-', '') return strip + '_' + imageUrl.rstrip('/').rsplit('/', 1)[-1] diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 0d5c1a5ce..7a59ed1f9 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -3,11 +3,11 @@ # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring -from re import compile, escape +from re import compile -from ..scraper import _BasicScraper, _ParserScraper +from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import indirectStarter -from ..util import tagre +from ..util import tagre, getQueryParams from .common import ComicControlScraper, WordPressScraper, WordPressNavi @@ -27,13 +27,9 @@ class Garanos(WordPressScraper): endOfLife = True -class GastroPhobia(_ParserScraper): - url = 'http://www.gastrophobia.com/' - stripUrl = url + 'index.php?date=%s' - firstStripUrl = stripUrl % '2008-07-30' - imageSearch = '//div[@id="comic"]//img' - prevSearch = '//div[@id="prev"]/a' - help = 'Index format: yyyy-mm-dd' +class GastroPhobia(ComicControlScraper): + url = 'https://gastrophobia.com/' + firstStripUrl = url + 'comix/the-mane-event' class Geeks(_ParserScraper): @@ -51,7 +47,7 @@ class GeeksNextDoor(_ParserScraper): url = 'http://www.geeksnextcomic.com/' stripUrl = url + '%s.html' firstStripUrl = stripUrl % '2007-03-27' # '2010-10-04' - imageSearch = '//p/img' + imageSearch = ('//p/img', '//p/span/img') prevSearch = ( '//a[img[contains(@src, "/nav_prev")]]', '//a[contains(text(), "< prev")]', # start page is different @@ -59,16 +55,12 @@ class GeeksNextDoor(_ParserScraper): help = 'Index format: yyyy-mm-dd' -class GirlGenius(_BasicScraper): - baseUrl = 'http://www.girlgeniusonline.com/' - rurl = escape(baseUrl) - url = baseUrl + 'comic.php' +class GirlGenius(ParserScraper): + url = 'https://www.girlgeniusonline.com/comic.php' stripUrl = url + '?date=%s' firstStripUrl = stripUrl % '20021104' - imageSearch = compile( - tagre("img", "src", r"(%sggmain/strips/[^']*)" % rurl, quote="'")) - prevSearch = compile(tagre("a", "id", "topprev", quote="\"", - before=r"(%s[^\"']+)" % rurl)) + imageSearch = '//img[@alt="Comic"]' + prevSearch = '//a[@id="topprev"]' multipleImagesPerStrip = True help = 'Index format: yyyymmdd' @@ -99,20 +91,18 @@ class GoGetARoomie(ComicControlScraper): url = 'http://www.gogetaroomie.com' -class GoneWithTheBlastwave(_BasicScraper): - url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' - starter = indirectStarter - stripUrl = url[:-1] + '%s' +class GoneWithTheBlastwave(ParserScraper): + stripUrl = 'http://www.blastwave-comic.com/index.php?p=comic&nro=%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(r'' + - r'' + - r']+?src=\'drop_shadow/previous.gif\'>') - help = 'Index format: yyyy-mm-dd' +class MacHall(ComicControlScraper): + url = 'https://www.machall.com/' + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % 'moving-in' class MadamAndEve(_BasicScraper): @@ -58,12 +54,12 @@ class MareInternum(WordPressScraper): firstStripUrl = stripUrl % 'intro-page-1' -class Marilith(_BasicScraper): - url = 'http://www.marilith.com/' +class Marilith(ParserScraper): + url = 'https://web.archive.org/web/20170619193143/http://www.marilith.com/' stripUrl = url + 'archive.php?date=%s' firstStripUrl = stripUrl % '20041215' - imageSearch = compile(r'') - help = 'Index Format: n' - - -class MyCartoons(_BasicScraper): - url = 'http://mycartoons.de/' - rurl = escape(url) - stripUrl = url + 'page/%s' - imageSearch = ( - compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)), - compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)), - ) - prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + - "«") - help = 'Index format: number' - lang = 'de' +class Moonsticks(ParserScraper): + url = "https://moonsticks.org/" + imageSearch = "//div[d:class('entry-content')]//img" + prevSearch = ('//a[@rel="prev"]', "//a[text()='\u00AB Prev']") class MyLifeWithFel(ParserScraper): diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py index 5706d2ba2..2f85ee765 100644 --- a/dosagelib/plugins/o.py +++ b/dosagelib/plugins/o.py @@ -11,6 +11,12 @@ from ..util import tagre from .common import WordPressScraper, WordPressNavi +class OccasionalComicsDisorder(WordPressScraper): + url = 'https://occasionalcomics.com/' + stripUrl = url + 'comic/%s/' + firstStripUrl = stripUrl % 'latest-comic-2' + + class OctopusPie(_ParserScraper): url = 'http://www.octopuspie.com/' rurl = escape(url) diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 11ee39045..abd282522 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -604,7 +604,6 @@ class Removed(Scraper): cls('WotNow'), # Removed in 3.0 - cls('CatenaManor/CatenaCafe'), cls('ComicFury/AdventuresOftheGreatCaptainMaggieandCrew'), cls('ComicFury/AWAKENING'), cls('ComicFury/Beebleville'), @@ -833,8 +832,6 @@ class Removed(Scraper): cls('ComicsKingdom/Redeye'), cls('ComicsKingdom/RedeyeSundays'), cls('CrapIDrewOnMyLunchBreak'), - cls('FalseStart'), - cls('Ginpu'), cls('GoComics/060'), cls('GoComics/2CowsAndAChicken'), cls('GoComics/ABitSketch'), @@ -995,11 +992,9 @@ class Removed(Scraper): cls('GoComics/Wrobbertcartoons'), cls('GoComics/Zootopia'), cls('JustAnotherEscape'), - cls('KemonoCafe/PrincessBunny'), cls('Laiyu', 'brk'), cls('MangaDex/DrStone', 'legal'), cls('MangaDex/HeavensDesignTeam', 'legal'), - cls('MangaDex/ImTheMaxLevelNewbie', 'legal'), cls('MangaDex/SPYxFAMILY', 'legal'), cls('Ryugou'), cls('SeelPeel'), @@ -1573,22 +1568,82 @@ class Removed(Scraper): cls('SnafuComics/Tin'), cls('SnafuComics/Titan'), cls('StudioKhimera/Eorah', 'mov'), - cls('StudioKhimera/Mousechevious'), cls('StuffNoOneToldMe'), cls('TaleOfTenThousand'), - cls('TalesAndTactics'), cls('TheCyantianChronicles/CookieCaper'), cls('TheCyantianChronicles/Pawprints'), - cls('VampireHunterBoyfriends'), cls('VGCats/Adventure'), cls('VGCats/Super'), cls('VictimsOfTheSystem'), cls('WebDesignerCOTW'), cls('WebToons/Adamsville'), cls('WebToons/CrapIDrewOnMyLunchBreak'), + cls('WintersLight'), + + # Removed in 3.1 + cls('AbbysAgency', 'brk'), + cls('AcademyVale'), + cls('AhoyEarth', 'block'), + cls('Anaria', 'del'), + cls('Angels2200', 'del'), + cls('BlackRose', 'brk'), + cls('CatenaManor/CatenaCafe'), + cls('ComicsKingdom/AmazingSpiderman'), + cls('ComicsKingdom/AmazingSpidermanSpanish'), + cls('ComicsKingdom/BigBenBoltSundays'), + cls('ComicsKingdom/BonersArkSundays'), + cls('ComicsKingdom/BrianDuffy'), + cls('ComicsKingdom/Crankshaft'), + cls('ComicsKingdom/FlashGordonSundays'), + cls('ComicsKingdom/FunkyWinkerbean'), + cls('ComicsKingdom/FunkyWinkerbeanSunday'), + cls('ComicsKingdom/FunkyWinkerbeanSundays'), + cls('ComicsKingdom/FunkyWinkerbeanVintage'), + cls('ComicsKingdom/HeartOfJulietJonesSundays'), + cls('ComicsKingdom/KatzenjammerKidsSundays'), + cls('ComicsKingdom/Lockhorns'), + cls('ComicsKingdom/MandrakeTheMagicianSundays'), + cls('ComicsKingdom/MarkTrailVintage'), + cls('ComicsKingdom/MikePeters'), + cls('ComicsKingdom/MotherGooseAndGrimm'), + cls('ComicsKingdom/PhantomSundays'), + cls('ComicsKingdom/PrinceValiantSundays'), + cls('ComicsKingdom/Retail'), + cls('ComicsKingdom/TigerSundays'), + cls('ComicsKingdom/TigerVintage'), + cls('ComicsKingdom/TigerVintageSundays'), + cls('Everblue', 'block'), + cls('FalseStart'), + cls('Ginpu'), + cls('GoComics/9ChickweedLaneClassics'), + cls('GoComics/Badlands'), + cls('GoComics/BigNateFirstClass'), + cls('GoComics/BreakOfDay'), + cls('GoComics/Candorville'), + cls('GoComics/DilbertClassics'), + cls('GoComics/DilbertEnEspanol'), + cls('GoComics/DumbwichCastle'), + cls('GoComics/EyebeamClassic'), + cls('GoComics/GarfieldClassics'), + cls('GoComics/MakingIt'), + cls('GoComics/MtPleasant'), + cls('GoComics/PCAndPixel'), + cls('GoComics/SaltNPepper'), + cls('GoComics/SigneWilkinson'), + cls('GoComics/Snowflakes'), + cls('GoComics/StoneSoupClassics'), + cls('GoComics/StuartCarlson'), + cls('KemonoCafe/PrincessBunny'), + cls('Lackadaisy', 'block'), + cls('MangaDex/ImTheMaxLevelNewbie', 'legal'), + cls('MrLovenstein', 'jsh'), + cls('MyCartoons'), + cls('Shivae/BlackRose', 'brk'), + cls('StudioKhimera/Mousechevious'), + cls('TalesAndTactics'), + cls('VampireHunterBoyfriends'), cls('WebToons/CrystalVirus'), cls('WebToons/OVERPOWERED'), - cls('WintersLight'), ) @@ -1667,10 +1722,8 @@ class Renamed(Scraper): # Renamed in 3.0 cls('AHClub', 'RickGriffinStudios/AHClub'), cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'), - cls('ComicsKingdom/FunkyWinkerbeanSundays', 'ComicsKingdom/FunkyWinkerbeanSunday'), cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'), cls('ComicsKingdom/TheLittleKing', 'ComicsKingdom/LittleKing'), - cls('ComicsKingdom/TigerSundays', 'ComicsKingdom/TigerVintageSundays'), cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'), cls('GoComics/Cathy', 'GoComics/CathyClassics'), cls('GoComics/DarrinBell', 'ComicsKingdom/DarrinBell'), @@ -1681,7 +1734,6 @@ class Renamed(Scraper): cls('GoComics/Widdershins', 'Widdershins'), cls('Guardia', 'ComicFury/Guardia'), cls('RadioactivePanda', 'Tapas/RadioactivePanda'), - cls('Shivae/BlackRose', 'BlackRose'), cls('SmackJeeves/BlackTapestries', 'ComicFury/BlackTapestries'), cls('SmackJeeves/ByTheBook', 'ByTheBook'), cls('SmackJeeves/FurryExperience', 'ComicFury/FurryExperience'), @@ -1694,6 +1746,9 @@ class Renamed(Scraper): cls('TracesOfThePast/NSFW', 'RickGriffinStudios/TracesOfThePastNSFW'), # Renamed in 3.1 + cls('ComicsKingdom/SlylockFoxAndComicsForKids', 'ComicsKingdom/SlylockFox'), + cls('ComicsKingdom/SlylockFoxAndComicsForKidsSpanish', 'ComicsKingdom/SlylockFoxSpanish'), cls('Exiern', 'ComicFury/Exiern'), + cls('MaxOveracts', 'OccasionalComicsDisorder'), cls('SafelyEndangered', 'WebToons/SafelyEndangered'), ) diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 41aad4a30..0a2cf0037 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper, ParserScraper @@ -34,16 +34,11 @@ class ParadigmShift(_BasicScraper): help = 'Index format: custom' -class ParallelUniversum(_BasicScraper): - url = 'http://www.paralleluniversum.net/' - rurl = escape(url) +class ParallelUniversum(WordPressScraper): + url = 'https://www.paralleluniversum.net/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '001-der-comic-ist-tot' - imageSearch = compile(tagre("img", "src", - r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) + - tagre("span", "class", "prev")) - help = 'Index format: number-stripname' + prevSearch = '//a[@rel="prev"]' lang = 'de' @@ -95,14 +90,12 @@ class PebbleVersion(_ParserScraper): help = 'Index format: n (unpadded)' -class PennyAndAggie(_BasicScraper): - url = 'http://pennyandaggie.com/' - rurl = escape(url) - stripUrl = url + 'index.php?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") + - tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote="")) - help = 'Index format: n (unpadded)' +class PennyAndAggie(ComicControlScraper): + url = 'https://pixietrixcomix.com/penny-and-aggie' + stripUrl = url + '/%s' + firstStripUrl = stripUrl % '2004-09-06' + endOfLife = True + help = 'Index format: yyyy-mm-dd' class PennyArcade(_ParserScraper): @@ -117,19 +110,17 @@ class PennyArcade(_ParserScraper): help = 'Index format: yyyy/mm/dd' -class PeppermintSaga(WordPressNavi): +class PeppermintSaga(WordPressScraper): url = 'http://www.pepsaga.com/' - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '3' - help = 'Index format: number' + stripUrl = url + 'comics/%s/' + firstStripUrl = stripUrl % 'the-sword-of-truth-vol1' adult = True -class PeppermintSagaBGR(WordPressNavi): +class PeppermintSagaBGR(WordPressScraper): url = 'http://bgr.pepsaga.com/' - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '4' - help = 'Index format: number' + stripUrl = url + '?comic=%s' + firstStripUrl = stripUrl % '04172011' adult = True @@ -150,14 +141,16 @@ class PeterAndWhitney(_ParserScraper): prevSearch = '//a[./img[contains(@src, "nav_previous")]]' -class PHDComics(_ParserScraper): +class PHDComics(ParserScraper): BROKEN_COMMENT_END = compile(r'--!>') baseUrl = 'http://phdcomics.com/' url = baseUrl + 'comics.php' stripUrl = baseUrl + 'comics/archive.php?comicid=%s' firstStripUrl = stripUrl % '1' - imageSearch = '//img[@id="comic2"]' + imageSearch = ('//img[@id="comic2"]', + r'//img[d:class("img-responsive") and re:test(@name, "comic\d+")]') + multipleImagesPerStrip = True prevSearch = '//a[img[contains(@src, "prev_button")]]' nextSearch = '//a[img[contains(@src, "next_button")]]' help = 'Index format: n (unpadded)' @@ -173,7 +166,7 @@ class PHDComics(_ParserScraper): # video self.stripUrl % '1880', self.stripUrl % '1669', - ) + ) or self.match(data, '//img[@id="comic" and contains(@src, "phd083123s")]') class Picklewhistle(ComicControlScraper): @@ -333,11 +326,12 @@ class PS238(_ParserScraper): class PvPOnline(ParserScraper): baseUrl = 'https://www.toonhoundstudios.com/' - url = baseUrl + 'pvp/' - stripUrl = baseUrl + 'comic/%s/' + stripUrl = baseUrl + 'comic/%s/?sid=372' + url = stripUrl % 'pvp-2022-09-16' firstStripUrl = stripUrl % '19980504' imageSearch = '//div[@id="spliced-comic"]//img/@data-src-img' prevSearch = '//a[d:class("prev")]' + endOfLife = True - def namer(self, imageUrl, pageUrl): - return 'pvp' + imageUrl.rsplit('/', 1)[-1] + def namer(self, image_url, page_url): + return 'pvp' + image_url.rsplit('/', 1)[-1] diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py index 5a10455cc..b20714d3a 100644 --- a/dosagelib/plugins/r.py +++ b/dosagelib/plugins/r.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2021 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile from urllib.parse import urljoin @@ -121,7 +121,7 @@ class Requiem(WordPressScraper): firstStripUrl = stripUrl % '2004-06-07-3' -class Replay(_ParserScraper): +class Replay(ParserScraper): url = 'http://replaycomic.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % 'red-desert' @@ -132,11 +132,11 @@ class Replay(_ParserScraper): def starter(self): # Retrieve archive page to identify chapters archivePage = self.getPage(self.url + 'archive') - archive = archivePage.xpath('//div[@class="comic-archive-chapter-wrap"]') + archive = self.match(archivePage, '//div[d:class("comic-archive-chapter-wrap")]') self.chapter = len(archive) - 1 self.startOfChapter = [] for archiveChapter in archive: - self.startOfChapter.append(archiveChapter.xpath('.//a')[0].get('href')) + self.startOfChapter.append(self.match(archiveChapter, './/a')[0].get('href')) return bounceStarter(self) def namer(self, imageUrl, pageUrl): diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index fb115b943..4ff001ac3 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -196,7 +196,7 @@ class Sharksplode(WordPressScraper): class Sheldon(ParserScraper): url = 'https://www.sheldoncomics.com/' firstStripUrl = url + 'comic/well-who-is-this/' - imageSearch = '//div[@id="comic"]//img' + imageSearch = '//div[@id="comic"]//img/@data-src-img' prevSearch = '//a[img[d:class("left")]]' @@ -435,7 +435,7 @@ class SpaceFurries(ParserScraper): def extract_image_urls(self, url, data): # Website requires JS, so build the list of image URLs manually imageurls = [] - current = int(data.xpath('//input[@name="pagnum"]')[0].get('value')) + current = int(self.match(data, '//input[@name="pagnum"]')[0].get('value')) for page in reversed(range(1, current + 1)): imageurls.append(self.url + 'comics/' + str(page) + '.jpg') return imageurls @@ -636,16 +636,16 @@ class StrongFemaleProtagonist(_ParserScraper): ) -class StupidFox(_ParserScraper): +class StupidFox(ParserScraper): url = 'http://stupidfox.net/' stripUrl = url + '%s' firstStripUrl = stripUrl % 'hello' - imageSearch = '//div[@class="comicmid"]//img' + imageSearch = '//div[d:class("comicmid")]//img' prevSearch = '//a[@accesskey="p"]' def namer(self, imageUrl, pageUrl): page = self.getPage(pageUrl) - title = page.xpath(self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-') + title = self.match(page, self.imageSearch + '/@title')[0].replace(' - ', '-').replace(' ', '-') return title + '.' + imageUrl.rsplit('.', 1)[-1] diff --git a/dosagelib/plugins/shivaestudios.py b/dosagelib/plugins/shivaestudios.py index ace417cbd..6bedc28a7 100644 --- a/dosagelib/plugins/shivaestudios.py +++ b/dosagelib/plugins/shivaestudios.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2021 Daniel Ring +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from .common import WordPressSpliced @@ -12,22 +12,20 @@ class _CommonMulti(WordPressSpliced): self.endOfLife = eol -class AbbysAgency(WordPressSpliced): - url = 'https://abbysagency.us/' - stripUrl = url + 'blog/comic/%s/' - firstStripUrl = stripUrl % 'a' - - class AlienDice(WordPressSpliced): url = 'https://aliendice.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % '05162001' + def shouldSkipUrl(self, url, data): + """Skip pages without images.""" + return not self.match(data, self.imageSearch) + def getPrevUrl(self, url, data): # Fix broken navigation if url == self.stripUrl % 'day-29-part-2-page-3-4': return self.stripUrl % 'day-29-part-2-page-3-2' - return super(AlienDice, self).getPrevUrl(url, data) + return super().getPrevUrl(url, data) def namer(self, imageUrl, pageUrl): # Fix inconsistent filename @@ -47,12 +45,6 @@ class AlienDiceLegacy(WordPressSpliced): return super().isfirststrip(url.rsplit('?', 1)[0]) -class BlackRose(WordPressSpliced): - url = 'https://www.blackrose.monster/' - stripUrl = url + 'comic/%s/' - firstStripUrl = stripUrl % '2004-11-01' - - class TheCyantianChronicles(_CommonMulti): baseUrl = 'https://cyantian.net/' @@ -81,9 +73,9 @@ class TheCyantianChronicles(_CommonMulti): class Shivae(WordPressSpliced): - url = 'https://shivae.com/' + url = 'https://shivae.net/' stripUrl = url + 'comic/%s/' - firstStripUrl = stripUrl % '09202001' + firstStripUrl = stripUrl % '2002-02-27' class ShivaeComics(_CommonMulti): diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index ebe864694..1919e274a 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -4,10 +4,7 @@ # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape, MULTILINE -try: - from functools import cached_property -except ImportError: - from cached_property import cached_property +from functools import cached_property from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import indirectStarter, joinPathPartsNamer @@ -275,7 +272,7 @@ class ToonHole(ParserScraper): prevSearch = '//a[@rel="prev"]' latestSearch = '//a[@rel="bookmark"]' starter = indirectStarter - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) class TrippingOverYou(_BasicScraper): diff --git a/dosagelib/plugins/tapas.py b/dosagelib/plugins/tapas.py index f3c6088fb..68b1ee9ac 100644 --- a/dosagelib/plugins/tapas.py +++ b/dosagelib/plugins/tapas.py @@ -3,7 +3,6 @@ # SPDX-FileCopyrightText: © 2019 Daniel Ring from ..output import out from ..scraper import ParserScraper -from ..xml import NS class Tapas(ParserScraper): @@ -21,7 +20,7 @@ class Tapas(ParserScraper): def starter(self): # Retrieve comic metadata from info page info = self.getPage(self.url) - series = info.xpath('//@data-series-id')[0] + series = self.match(info, '//@data-series-id')[0] # Retrieve comic metadata from API data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST') data.raise_for_status() @@ -43,7 +42,7 @@ class Tapas(ParserScraper): return self._cached_image_urls def shouldSkipUrl(self, url, data): - if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS): + if self.match(data, '//button[d:class("js-have-to-sign")]'): out.warn(f'Nothing to download on "{url}", because a login is required.') return True return False diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index 8254a1dbd..e9e2300a0 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -107,7 +107,7 @@ class Unsounded(ParserScraper): return urls def extract_css_bg(self, page) -> str | None: - comicdivs = page.xpath('//div[@id="comic"]') + comicdivs = self.match(page, '//div[@id="comic"]') if comicdivs: style = comicdivs[0].attrib.get('style') if style: diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index 33e26b317..5c931f2af 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from ..scraper import ParserScraper, _ParserScraper from ..helpers import bounceStarter, indirectStarter @@ -27,7 +27,7 @@ class VGCats(_ParserScraper): url = 'https://www.vgcats.com/comics/' stripUrl = url + '?strip_id=%s' firstStripUrl = stripUrl % '0' - imageSearch = '//td/img[contains(@src, "images/")]' + imageSearch = '//td/font/img[contains(@src, "images/")]' prevSearch = '//a[img[contains(@src, "back.")]]' help = 'Index format: n (unpadded)' @@ -44,15 +44,15 @@ class Vibe(ParserScraper): help = 'Index format: VIBEnnn (padded)' -class VickiFox(_ParserScraper): +class VickiFox(ParserScraper): url = 'http://www.vickifox.com/comic/strip' stripUrl = url + '?id=%s' firstStripUrl = stripUrl % '001' imageSearch = '//img[contains(@src, "comic/")]' prevSearch = '//button[@id="btnPrev"]/@value' - def getPrevUrl(self, url, data): - return self.stripUrl % self.getPage(url).xpath(self.prevSearch)[0] + def link_modifier(self, fromurl, tourl): + return self.stripUrl % tourl class ViiviJaWagner(_ParserScraper): diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index 0af93415b..11543ce0d 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape, IGNORECASE from ..scraper import ParserScraper, _BasicScraper, _ParserScraper @@ -17,7 +17,7 @@ class WapsiSquare(WordPressNaviIn): def shouldSkipUrl(self, url, data): """Skip pages without images.""" - return data.xpath('//iframe') # videos + return self.match(data, '//iframe') # videos class WastedTalent(_ParserScraper): diff --git a/dosagelib/plugins/webtoons.py b/dosagelib/plugins/webtoons.py index 81b2a3035..39051ffce 100644 --- a/dosagelib/plugins/webtoons.py +++ b/dosagelib/plugins/webtoons.py @@ -24,9 +24,9 @@ class WebToons(ParserScraper): self.session.cookies.set(cookie, 'false', domain='webtoons.com') # Find current episode number listPage = self.getPage(self.listUrl) - currentEpisode = listPage.xpath('//div[@class="detail_lst"]/ul/li')[0].attrib['data-episode-no'] + currentEpisode = self.match(listPage, '//div[d:class("detail_lst")]/ul/li')[0].attrib['data-episode-no'] # Check for completed tag - self.endOfLife = (listPage.xpath('//div[@id="_asideDetail"]//span[@class="txt_ico_completed2"]') != []) + self.endOfLife = not self.match(listPage, '//div[@id="_asideDetail"]//span[d:class("txt_ico_completed2")]') return self.stripUrl % currentEpisode def extract_image_urls(self, url, data): @@ -52,6 +52,7 @@ class WebToons(ParserScraper): cls('1111Animals', 'comedy/1111-animals', 437), cls('2015SpaceSeries', 'sf/2015-space-series', 391), cls('3SecondStrip', 'comedy/3-second-strip', 380), + cls('99ReinforcedStick', 'comedy/99-reinforced-wooden-stick', 4286), cls('ABittersweetLife', 'slice-of-life/a-bittersweet-life', 294), cls('AboutDeath', 'drama/about-death', 82), cls('ABudgiesLife', 'slice-of-life/its-a-budgies-life', 985), @@ -64,6 +65,7 @@ class WebToons(ParserScraper): cls('AGoodDayToBeADog', 'romance/a-good-day-tobe-a-dog', 1390), cls('Aisopos', 'drama/aisopos', 76), cls('AliceElise', 'fantasy/alice-elise', 1481), + cls('AlloyComics', 'canvas/alloy-comics', 747447), cls('AllThatWeHopeToBe', 'slice-of-life/all-that-we-hope-to-be', 470), cls('AllThatYouAre', 'drama/all-that-you-are', 403), cls('AlwaysHuman', 'romance/always-human', 557), @@ -128,6 +130,7 @@ class WebToons(ParserScraper): cls('CursedPrincessClub', 'comedy/cursed-princess-club', 1537), cls('Cyberbunk', 'sf/cyberbunk', 466), cls('Cyberforce', 'super-hero/cyberforce', 531), + cls('CydoniaShattering', 'fantasy/cydonia-shattering', 2881), cls('CykoKO', 'super-hero/cyko-ko', 560), cls('Darbi', 'action/darbi', 1098), cls('Darchon', 'challenge/darchon', 532053), @@ -153,6 +156,8 @@ class WebToons(ParserScraper): cls('DrawnToYou', 'challenge/drawn-to-you', 172022), cls('DrFrost', 'drama/dr-frost', 371), cls('DuelIdentity', 'challenge/duel-identity', 532064), + cls('DungeonCleaningLife', 'action/the-dungeon-cleaning-life-of-a-once-genius-hunter', 4677), + cls('DungeonsAndDoodlesTalesFromTheTables', 'canvas/dungeons-doodles-tales-from-the-tables', 682646), cls('DungeonMinis', 'challenge/dungeonminis', 64132), cls('Dustinteractive', 'comedy/dustinteractive', 907), cls('DutyAfterSchool', 'sf/duty-after-school', 370), @@ -170,6 +175,7 @@ class WebToons(ParserScraper): cls('FAMILYMAN', 'drama/family-man', 85), cls('FantasySketchTheGame', 'sf/fantasy-sketch', 1020), cls('Faust', 'supernatural/faust', 522), + cls('FinalRaidBoss', 'fantasy/the-final-raid-boss', 3921), cls('FINALITY', 'mystery/finality', 1457), cls('Firebrand', 'supernatural/firebrand', 877), cls('FirstDefense', 'challenge/first-defense', 532072), @@ -204,11 +210,13 @@ class WebToons(ParserScraper): cls('HeliosFemina', 'fantasy/helios-femina', 638), cls('HelloWorld', 'slice-of-life/hello-world', 827), cls('Hellper', 'fantasy/hellper', 185), + cls('Hench', 'canvas/hench/', 857225), cls('HeroineChic', 'super-hero/heroine-chic', 561), cls('HIVE', 'thriller/hive', 65), cls('Hooky', 'fantasy/hooky', 425), cls('HoovesOfDeath', 'fantasy/hooves-of-death', 1535), cls('HouseOfStars', 'fantasy/house-of-stars', 1620), + cls('HowToBeAMindReaver', 'canvas/how-to-be-a-mind-reaver', 301213), cls('HowToBecomeADragon', 'fantasy/how-to-become-a-dragon', 1973), cls('HowToLove', 'slice-of-life/how-to-love', 472), cls('IDontWantThisKindOfHero', 'super-hero/i-dont-want-this-kind-of-hero', 98), @@ -235,6 +243,7 @@ class WebToons(ParserScraper): cls('KindOfLove', 'slice-of-life/kind-of-love', 1850), cls('KissItGoodbye', 'challenge/kiss-it-goodbye', 443703), cls('KnightRun', 'sf/knight-run', 67), + cls('KnightUnderMyHeart', 'action/knight-under-my-heart', 4215), cls('Kubera', 'fantasy/kubera', 83), cls('LalinsCurse', 'supernatural/lalins-curse', 1601), cls('Lars', 'slice-of-life/lars', 358), @@ -261,6 +270,7 @@ class WebToons(ParserScraper): cls('LUMINE', 'fantasy/lumine', 1022), cls('Lunarbaboon', 'slice-of-life/lunarbaboon', 523), cls('MageAndDemonQueen', 'comedy/mage-and-demon-queen', 1438), + cls('MageAndMimic', 'comedy/mage-and-mimic', 5973), cls('Magical12thGraders', 'super-hero/magical-12th-graders', 90), cls('Magician', 'fantasy/magician', 70), cls('MagicSodaPop', 'fantasy/magic-soda-pop', 1947), @@ -292,6 +302,8 @@ class WebToons(ParserScraper): cls('MyGiantNerdBoyfriend', 'slice-of-life/my-giant-nerd-boyfriend', 958), cls('MyKittyAndOldDog', 'slice-of-life/my-kitty-and-old-dog', 184), cls('MyNameIsBenny', 'slice-of-life/my-name-is-benny', 1279), + cls('MySClassHunter', 'action/my-s-class-hunters', 3963), + cls('MythicItemObtained', 'fantasy/mythic-item-obtained', 4582), cls('MyWallflowerKiss', 'challenge/my-wallflower-kiss', 151869), cls('NanoList', 'sf/nano-list', 700), cls('NationalDogDay2016', 'slice-of-life/national-dog-day', 747), @@ -439,6 +451,7 @@ class WebToons(ParserScraper): cls('UpAndOut', 'slice-of-life/up-and-out', 488), cls('UrbanAnimal', 'super-hero/urban-animal', 1483), cls('Uriah', 'horror/uriah', 1607), + cls('VampireFamily', 'comedy/vampire-family', 6402), cls('VarsityNoir', 'mystery/varsity-noir', 1613), cls('VersionDayAndNight', 'drama/version-day-and-night', 1796), cls('WafflesAndPancakes', 'slice-of-life/waffles-and-pancakes', 1310), diff --git a/dosagelib/plugins/wrongside.py b/dosagelib/plugins/wrongside.py index 78bc4a080..ce75d38bf 100644 --- a/dosagelib/plugins/wrongside.py +++ b/dosagelib/plugins/wrongside.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2022 Daniel Ring +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring from ..scraper import ParserScraper from ..helpers import indirectStarter @@ -15,21 +15,21 @@ class Wrongside(ParserScraper): def starter(self): archivePage = self.getPage(self.url) - chapterUrls = archivePage.xpath('//ul[@class="albThumbs"]//a/@href') + chapterUrls = self.match(archivePage, '//ul[d:class("albThumbs")]//a/@href') self.archive = [] for chapterUrl in chapterUrls: chapterPage = self.getPage(chapterUrl) - self.archive.append(chapterPage.xpath('(//ul[@id="thumbnails"]//a/@href)[last()]')[0]) + self.archive.append(self.match(chapterPage, '(//ul[@id="thumbnails"]//a/@href)[last()]')[0]) return self.archive[0] def getPrevUrl(self, url, data): - if data.xpath(self.prevSearch) == [] and len(self.archive) > 0: + if self.match(data, self.prevSearch) == [] and len(self.archive) > 0: return self.archive.pop() return super(Wrongside, self).getPrevUrl(url, data) def namer(self, imageUrl, pageUrl): page = self.getPage(pageUrl) - title = page.xpath('//div[@class="browsePath"]/h2/text()')[0] + title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0] return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1] @@ -71,5 +71,5 @@ class WrongsideSideStories(ParserScraper): def namer(self, imageUrl, pageUrl): page = self.getPage(pageUrl) - title = page.xpath('//div[@class="browsePath"]/h2/text()')[0] + title = self.match(page, '//div[d:class("browsePath")]/h2/text()')[0] return title.replace('"', '') + '.' + imageUrl.rsplit('.', 1)[-1] diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index f7556110a..f5ef8e954 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -23,7 +23,7 @@ class Zapiro(ParserScraper): imageSearch = '//div[@id="cartoon"]/img' prevSearch = '//a[d:class("left")]' nextSearch = '//a[d:class("right")]' - namer = joinPathPartsNamer((-1,), ()) + namer = joinPathPartsNamer(pageparts=(-1,)) class ZenPencils(WordPressNavi): @@ -60,7 +60,7 @@ class Zwarwald(BasicScraper): tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'")) - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) help = 'Index format: number' def shouldSkipUrl(self, url, data): diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 5a411b9b4..b0f436744 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -119,45 +119,45 @@ class Scraper: if val: self._indexes = tuple(sorted(val)) - def __init__(self, name): + def __init__(self, name: str) -> None: """Initialize internal variables.""" self.name = name - self.urls = set() + self.urls: set[str] = set() self._indexes = () - self.skippedUrls = set() + self.skippedUrls: set[str] = set() self.hitFirstStripUrl = False - def __hash__(self): + def __hash__(self) -> int: """Get hash value from name and index list.""" return hash((self.name, self.indexes)) - def shouldSkipUrl(self, url, data): + def shouldSkipUrl(self, url: str, data) -> bool: """Determine if search for images in given URL should be skipped.""" return False - def getComicStrip(self, url, data): + def getComicStrip(self, url, data) -> ComicStrip: """Get comic strip downloader for given URL and data.""" - imageUrls = self.extract_image_urls(url, data) + urls = self.extract_image_urls(url, data) # map modifier function on image URLs - imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] + urls = [self.imageUrlModifier(x, data) for x in urls] # remove duplicate URLs - imageUrls = uniq(imageUrls) - if len(imageUrls) > 1 and not self.multipleImagesPerStrip: + urls = uniq(urls) + if len(urls) > 1 and not self.multipleImagesPerStrip: out.warn( u"Found %d images instead of 1 at %s with expressions %s" % - (len(imageUrls), url, prettyMatcherList(self.imageSearch))) - image = imageUrls[0] - out.warn(u"Choosing image %s" % image) - imageUrls = (image,) - elif not imageUrls: - out.warn(u"Found no images at %s with expressions %s" % (url, + (len(urls), url, prettyMatcherList(self.imageSearch))) + image = urls[0] + out.warn("Choosing image %s" % image) + urls = (image,) + elif not urls: + out.warn("Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch))) if self.textSearch: text = self.fetchText(url, data, self.textSearch, optional=self.textOptional) else: text = None - return ComicStrip(self, url, imageUrls, text=text) + return ComicStrip(self, url, urls, text=text) def getStrips(self, maxstrips=None): """Get comic strips.""" @@ -217,7 +217,7 @@ class Scraper: break url = prevUrl - def isfirststrip(self, url): + def isfirststrip(self, url: str) -> bool: """Check if the specified URL is the first strip of a comic. This is specially for comics taken from archive.org, since the base URL of archive.org changes whenever pages are taken from a different @@ -228,7 +228,7 @@ class Scraper: currenturl = ARCHIVE_ORG_URL.sub('', url) return firsturl == currenturl - def getPrevUrl(self, url, data): + def getPrevUrl(self, url: str, data) -> str | None: """Find previous URL.""" prevUrl = None if self.prevSearch: @@ -243,40 +243,40 @@ class Scraper: getHandler().comicPageLink(self, url, prevUrl) return prevUrl - def getIndexStripUrl(self, index): + def getIndexStripUrl(self, index: str) -> str: """Get comic strip URL from index.""" return self.stripUrl % index - def starter(self): + def starter(self) -> str: """Get starter URL from where to scrape comic strips.""" return self.url - def namer(self, image_url, page_url): + def namer(self, image_url: str, page_url: str) -> str | None: """Return filename for given image and page URL.""" return - def link_modifier(self, fromurl, tourl): + def link_modifier(self, fromurl: str, tourl: str) -> str: """Optional modification of parsed link (previous/back/latest) URLs. Useful if there are domain redirects. The default implementation does not modify the URL. """ return tourl - def imageUrlModifier(self, image_url, data): + def imageUrlModifier(self, image_url: str, data) -> str: """Optional modification of parsed image URLs. Useful if the URL needs to be fixed before usage. The default implementation does not modify the URL. The given data is the URL page data. """ return image_url - def vote(self): + def vote(self) -> None: """Cast a public vote for this comic.""" uid = get_system_uid() data = {"name": self.name.replace('/', '_'), "uid": uid} response = self.session.post(configuration.VoteUrl, data=data) response.raise_for_status() - def get_download_dir(self, basepath): + def get_download_dir(self, basepath: str) -> str: """Try to find the corect download directory, ignoring case differences.""" path = basepath @@ -294,16 +294,16 @@ class Scraper: path = os.path.join(path, part) return path - def getCompleteFile(self, basepath): + def getCompleteFile(self, basepath: str) -> str: """Get filename indicating all comics are downloaded.""" dirname = self.get_download_dir(basepath) return os.path.join(dirname, "complete.txt") - def isComplete(self, basepath): + def isComplete(self, basepath: str) -> bool: """Check if all comics are downloaded.""" return os.path.isfile(self.getCompleteFile(basepath)) - def setComplete(self, basepath): + def setComplete(self, basepath: str) -> None: """Set complete flag for this comic, ie. all comics are downloaded.""" if self.endOfLife: filename = self.getCompleteFile(basepath) @@ -521,15 +521,10 @@ class ParserScraper(Scraper): return text.strip() def _matchPattern(self, data, patterns): - if self.css: - searchFun = data.cssselect - else: - def searchFun(s): - return data.xpath(s, namespaces=NS) patterns = makeSequence(patterns) for search in patterns: matched = False - for match in searchFun(search): + for match in self.match(data, search): matched = True yield match, search @@ -537,6 +532,13 @@ class ParserScraper(Scraper): # do not search other links if one pattern matched break + def match(self, data, pattern): + """Match a pattern (XPath/CSS) against a page.""" + if self.css: + return data.cssselect(pattern) + else: + return data.xpath(pattern, namespaces=NS) + def getDisabledReasons(self): res = {} if self.css and cssselect is None: diff --git a/pyproject.toml b/pyproject.toml index c5217a4c0..10c294f4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -27,15 +26,13 @@ classifiers = [ "Topic :: Multimedia :: Graphics", ] keywords = ["comic", "webcomic", "downloader", "archiver", "crawler"] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "colorama", "imagesize", "lxml>=4.0.0", "platformdirs", "requests>=2.0", - "cached_property;python_version<'3.8'", - "importlib_metadata;python_version<'3.8'", "importlib_resources>=5.0.0;python_version<'3.9'", ] dynamic = ["version"] @@ -101,7 +98,7 @@ ignore = [ ] noqa-require-code = true no-accept-encodings = true -min-version = "3.7" +min-version = "3.8" extend-exclude = [ '.venv', 'build', diff --git a/scripts/comicskingdom.py b/scripts/comicskingdom.py index b792bd675..c5ee04c85 100755 --- a/scripts/comicskingdom.py +++ b/scripts/comicskingdom.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: MIT -# Copyright (C) 2019-2022 Tobias Gruetzmacher -# Copyright (C) 2019 Thomas W. Littauer +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer """ Script to get a list of comicskingdom.com comics and save the info in a JSON file for further processing. @@ -19,39 +19,17 @@ class ComicsKingdomUpdater(ComicListUpdater): "ComicGenesis/%s", ) - def handle_startpage(self, page): - """Parse list of comics from the bottom of the start page.""" - for li in page.xpath('//div[d:class("comics-list")]//li', namespaces=NS): - link = li.xpath('./a')[0] + def handle_listing(self, page): + for link in page.xpath('//ul[d:class("index")]//a', namespaces=NS): + name = link.text_content().removeprefix('The ') url = link.attrib['href'] - name = link.text.removeprefix('The ') + lang = 'es' if ' (Spanish)' in name else None - self.add_comic(name, (url, None)) - - def handle_listing(self, page, lang: str = None, add: str = ''): - - hasnew = True - while hasnew: - hasnew = False - for comicdiv in page.xpath('//div[d:class("tile")]', namespaces=NS): - nametag = comicdiv.xpath('./a/comic-name') - if len(nametag) == 0: - continue - name = nametag[0].text.removeprefix('The ') + add - url = comicdiv.xpath('./a')[0].attrib['href'] - - if self.add_comic(name, (url, lang)): - hasnew = True - - nextlink = page.xpath('//a[./img[contains(@src, "page-right")]]') - page = self.get_url(nextlink[0].attrib['href']) + self.add_comic(name, (url, lang)) def collect_results(self): """Parse all search result pages.""" - page = self.get_url('https://www.comicskingdom.com/') - self.handle_startpage(page) - self.handle_listing(page) - self.handle_listing(self.get_url('https://www.comicskingdom.com/spanish'), 'es', 'Spanish') + self.handle_listing(self.get_url('https://comicskingdom.com/features')) def get_entry(self, name: str, data: tuple[str, str]): opt = f", lang='{data[1]}'" if data[1] else '' diff --git a/scripts/dosage.spec b/scripts/dosage.spec index eb9883c25..68d00026a 100644 --- a/scripts/dosage.spec +++ b/scripts/dosage.spec @@ -1,28 +1,30 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2017-2020 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2017 Tobias Gruetzmacher + +import re +from importlib import metadata # Idea from # https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Setuptools-Entry-Point, # but with importlib -def Entrypoint(group, name, **kwargs): - import re - try: - from importlib.metadata import entry_points - except ImportError: - from importlib_metadata import entry_points - +def entrypoint(group, name, **kwargs): # get the entry point - eps = entry_points()[group] - ep = next(ep for ep in eps if ep.name == name) - module, attr = re.split(r'\s*:\s*', ep.value, 1) + eps = metadata.entry_points() + if 'select' in dir(eps): + # modern + ep = eps.select(group=group)[name] + else: + # legacy (pre-3.10) + ep = next(ep for ep in eps[group] if ep.name == name) + module, attr = re.split(r'\s*:\s*', ep.value, maxsplit=1) # script name must not be a valid module name to avoid name clashes on import script_path = os.path.join(workpath, name + '-script.py') print("creating script for entry point", group, name) - with open(script_path, 'w') as fh: + with open(script_path, mode='w', encoding='utf-8') as fh: print("import sys", file=fh) print("import", module, file=fh) - print("sys.exit(%s.%s())" % (module, attr), file=fh) + print(f"sys.exit({module}.{attr}())", file=fh) return Analysis( [script_path] + kwargs.get('scripts', []), @@ -30,7 +32,7 @@ def Entrypoint(group, name, **kwargs): ) -a = Entrypoint('console_scripts', 'dosage') +a = entrypoint('console_scripts', 'dosage') a.binaries = [x for x in a.binaries if not x[1].lower().startswith(r'c:\windows')] diff --git a/scripts/gocomics.py b/scripts/gocomics.py index 653c605ec..6637682a7 100755 --- a/scripts/gocomics.py +++ b/scripts/gocomics.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher """ Script to get a list of gocomics and save the info in a JSON file for further processing. @@ -20,6 +20,8 @@ class GoComicsUpdater(ComicListUpdater): excluded_comics = ( # too short 'LukeyMcGarrysTLDR', + # Has its own module + 'Widdershins', ) def handle_gocomics(self, url, outercss='a.gc-blended-link', lang=None): diff --git a/scripts/order-symlinks.py b/scripts/order-symlinks.py index c38511676..a0079c806 100755 --- a/scripts/order-symlinks.py +++ b/scripts/order-symlinks.py @@ -61,7 +61,10 @@ def create_symlinks(d): else: order.extend(data["pages"][work]["images"].values()) if "prev" in data["pages"][work]: - work = data["pages"][work]["prev"] + if data["pages"][work]["prev"] == work: + work = None + else: + work = data["pages"][work]["prev"] else: work = None order.reverse() diff --git a/tests/test_comicnames.py b/tests/test_comicnames.py index 8d3b11e5e..a9d69c4a5 100644 --- a/tests/test_comicnames.py +++ b/tests/test_comicnames.py @@ -3,12 +3,15 @@ # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2022 Tobias Gruetzmacher import re +from operator import attrgetter + +import pytest from dosagelib.scraper import scrapers from dosagelib.plugins import old -class TestComicNames(object): +class TestComicNames: def test_names(self): for scraperobj in scrapers.all(): @@ -20,11 +23,11 @@ class TestComicNames(object): comicname = name assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname - def test_renamed(self): - for scraperobj in scrapers.all(include_removed=True): - if not isinstance(scraperobj, old.Renamed): - continue - assert len(scraperobj.getDisabledReasons()) > 0 - # Renamed scraper should only point to an non-disabled scraper - newscraper = scrapers.find(scraperobj.newname) - assert len(newscraper.getDisabledReasons()) == 0 + @pytest.mark.parametrize(('scraperobj'), + [obj for obj in scrapers.all(include_removed=True) + if isinstance(obj, old.Renamed)], ids=attrgetter('name')) + def test_renamed(self, scraperobj): + assert len(scraperobj.getDisabledReasons()) > 0 + # Renamed scraper should only point to an non-disabled scraper + newscraper = scrapers.find(scraperobj.newname) + assert len(newscraper.getDisabledReasons()) == 0 diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 598a74fc4..8c13c89ca 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher from dosagelib.helpers import joinPathPartsNamer, queryNamer -class TestNamer(object): +class TestNamer: """ Tests for comic namer. """ @@ -16,6 +16,8 @@ class TestNamer(object): def test_joinPathPartsNamer(self): imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png' pageurl = 'https://HOST/2019/03/11/12450/' - assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png' - assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png' - assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450' + assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self, + imgurl, pageurl) == '2019_03_11_tennis5wp-1.png' + assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self, + imgurl, pageurl) == '2019-03-11-tennis5wp-1.png' + assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450' diff --git a/tox.ini b/tox.ini index 27eed7f37..02b4ffe3f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,9 @@ [tox] -envlist = py37, py38, py39, py310, py311, py312, flake8 +envlist = py38, py39, py310, py311, py312, flake8 isolated_build = True [gh-actions] python = - 3.7: py37 3.8: py38 3.9: py39 3.10: py310