From 05b9be4cd9da4730bbf7c87b07d49c552c1b260e Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 19 Feb 2024 00:53:36 +0100 Subject: [PATCH] Fix some more modules --- dosagelib/plugins/a.py | 81 ++++++++++++---------------------------- dosagelib/plugins/old.py | 4 ++ 2 files changed, 27 insertions(+), 58 deletions(-) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 32a5b42ce..2a4ef5b70 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -1,18 +1,18 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2022 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring -from re import compile, escape, MULTILINE +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring +from re import compile, MULTILINE from ..util import tagre -from ..scraper import BasicScraper, ParserScraper, _BasicScraper, _ParserScraper -from ..helpers import regexNamer, bounceStarter, indirectStarter +from ..scraper import ParserScraper, _BasicScraper, _ParserScraper +from ..helpers import joinPathPartsNamer, bounceStarter, indirectStarter from .common import WordPressScraper, WordPressNavi, WordPressWebcomic -class AbstruseGoose(_ParserScraper): - url = 'https://abstrusegoose.com/' +class AbstruseGoose(ParserScraper): + url = 'https://web.archive.org/web/20230930172141/https://abstrusegoose.com/' starter = bounceStarter stripUrl = url + '%s' firstStripUrl = stripUrl % '1' @@ -41,24 +41,16 @@ class AbsurdNotions(_BasicScraper): help = 'Index format: n (unpadded)' -class AcademyVale(_BasicScraper): - url = 'http://www.imagerie.com/vale/' - stripUrl = url + 'avarch.cgi?%s' - firstStripUrl = stripUrl % '001' - imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) - prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + - tagre('img', 'src', r'AVNavBack\.gif')) - help = 'Index format: nnn' - - -class Achewood(_ParserScraper): - url = 'https://www.achewood.com/' - stripUrl = url + 'index.php?date=%s' - firstStripUrl = stripUrl % '10012001' - imageSearch = '//p[@id="comic_body"]//img' - prevSearch = '//span[d:class("left")]/a[d:class("dateNav")]' - help = 'Index format: mmddyyyy' - namer = regexNamer(compile(r'date=(\d+)')) +class Achewood(ParserScraper): + baseUrl = 'https://achewood.com/' + stripUrl = baseUrl + '%s/title.html' + url = stripUrl % '2016/12/25' + firstStripUrl = stripUrl % '2001/10/01' + imageSearch = '//img[d:class("comicImage")]' + prevSearch = '//a[d:class("comic_prev")]' + namer = joinPathPartsNamer(pageparts=range(0, 2)) + help = 'Index format: yyyy/mm/dd' + endOfLife = True class AdventuresOfFifne(_ParserScraper): @@ -117,12 +109,8 @@ class AhoiPolloi(_ParserScraper): help = 'Index format: yyyymmdd' -class AhoyEarth(WordPressNavi): - url = 'http://www.ahoyearth.com/' - - class AirForceBlues(WordPressScraper): - url = 'http://farvatoons.com/' + url = 'https://web.archive.org/web/20210102113825/http://farvatoons.com/' firstStripUrl = url + 'comic/in-texas-there-are-texans/' @@ -207,14 +195,11 @@ class AltermetaOld(_ParserScraper): help = 'Index format: n (unpadded)' -class AmazingSuperPowers(_BasicScraper): - url = 'http://www.amazingsuperpowers.com/' - rurl = escape(url) +class AmazingSuperPowers(WordPressNavi): + url = 'https://www.amazingsuperpowers.com/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2007/09/heredity' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/name' + imageSearch = '//div[d:class("comicpane")]/img' def shouldSkipUrl(self, url, data): """Skip pages without images.""" @@ -243,18 +228,6 @@ class Amya(WordPressScraper): url = 'http://www.amyachronicles.com/' -class Anaria(_ParserScraper): - url = 'https://www.leahbriere.com/anaria-the-witchs-dream/' - firstStripUrl = url - imageSearch = '//div[contains(@class, "gallery")]//a' - multipleImagesPerStrip = True - endOfLife = True - - def namer(self, imageUrl, pageUrl): - filename = imageUrl.rsplit('/', 1)[-1] - return filename.replace('00.jpg', 'new00.jpg').replace('new', '1') - - class Angband(_ParserScraper): url = 'http://angband.calamarain.net/' stripUrl = url + '%s' @@ -272,14 +245,6 @@ class Angband(_ParserScraper): return self.pages[self.pages.index(url) - 1] -class Angels2200(_BasicScraper): - url = 'http://www.janahoffmann.com/angels/' - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous") - help = 'Index format: yyyy/mm/dd/part--comic-' - - class Annyseed(_ParserScraper): baseUrl = ('https://web.archive.org/web/20190511031451/' 'http://www.mirrorwoodcomics.com/') diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 7cda0dc1a..72f423f94 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -1582,6 +1582,10 @@ class Removed(Scraper): # Removed in 3.1 cls('AbbysAgency', 'brk'), + cls('AcademyVale'), + cls('AhoyEarth', 'block'), + cls('Anaria', 'del'), + cls('Angels2200', 'del'), cls('BlackRose', 'brk'), cls('CatenaManor/CatenaCafe'), cls('FalseStart'),