Fix some more modules

This commit is contained in:
Tobias Gruetzmacher 2024-02-19 00:53:36 +01:00
parent da60636b8a
commit 05b9be4cd9
No known key found for this signature in database
2 changed files with 27 additions and 58 deletions

View file

@ -1,18 +1,18 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# Copyright (C) 2015-2022 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile, escape, MULTILINE from re import compile, MULTILINE
from ..util import tagre from ..util import tagre
from ..scraper import BasicScraper, ParserScraper, _BasicScraper, _ParserScraper from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter from ..helpers import joinPathPartsNamer, bounceStarter, indirectStarter
from .common import WordPressScraper, WordPressNavi, WordPressWebcomic from .common import WordPressScraper, WordPressNavi, WordPressWebcomic
class AbstruseGoose(_ParserScraper): class AbstruseGoose(ParserScraper):
url = 'https://abstrusegoose.com/' url = 'https://web.archive.org/web/20230930172141/https://abstrusegoose.com/'
starter = bounceStarter starter = bounceStarter
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
@ -41,24 +41,16 @@ class AbsurdNotions(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AcademyVale(_BasicScraper): class Achewood(ParserScraper):
url = 'http://www.imagerie.com/vale/' baseUrl = 'https://achewood.com/'
stripUrl = url + 'avarch.cgi?%s' stripUrl = baseUrl + '%s/title.html'
firstStripUrl = stripUrl % '001' url = stripUrl % '2016/12/25'
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) firstStripUrl = stripUrl % '2001/10/01'
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + imageSearch = '//img[d:class("comicImage")]'
tagre('img', 'src', r'AVNavBack\.gif')) prevSearch = '//a[d:class("comic_prev")]'
help = 'Index format: nnn' namer = joinPathPartsNamer(pageparts=range(0, 2))
help = 'Index format: yyyy/mm/dd'
endOfLife = True
class Achewood(_ParserScraper):
url = 'https://www.achewood.com/'
stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '10012001'
imageSearch = '//p[@id="comic_body"]//img'
prevSearch = '//span[d:class("left")]/a[d:class("dateNav")]'
help = 'Index format: mmddyyyy'
namer = regexNamer(compile(r'date=(\d+)'))
class AdventuresOfFifne(_ParserScraper): class AdventuresOfFifne(_ParserScraper):
@ -117,12 +109,8 @@ class AhoiPolloi(_ParserScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class AhoyEarth(WordPressNavi):
url = 'http://www.ahoyearth.com/'
class AirForceBlues(WordPressScraper): class AirForceBlues(WordPressScraper):
url = 'http://farvatoons.com/' url = 'https://web.archive.org/web/20210102113825/http://farvatoons.com/'
firstStripUrl = url + 'comic/in-texas-there-are-texans/' firstStripUrl = url + 'comic/in-texas-there-are-texans/'
@ -207,14 +195,11 @@ class AltermetaOld(_ParserScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AmazingSuperPowers(_BasicScraper): class AmazingSuperPowers(WordPressNavi):
url = 'http://www.amazingsuperpowers.com/' url = 'https://www.amazingsuperpowers.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2007/09/heredity' firstStripUrl = stripUrl % '2007/09/heredity'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = '//div[d:class("comicpane")]/img'
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
@ -243,18 +228,6 @@ class Amya(WordPressScraper):
url = 'http://www.amyachronicles.com/' url = 'http://www.amyachronicles.com/'
class Anaria(_ParserScraper):
url = 'https://www.leahbriere.com/anaria-the-witchs-dream/'
firstStripUrl = url
imageSearch = '//div[contains(@class, "gallery")]//a'
multipleImagesPerStrip = True
endOfLife = True
def namer(self, imageUrl, pageUrl):
filename = imageUrl.rsplit('/', 1)[-1]
return filename.replace('00.jpg', 'new00.jpg').replace('new', '1')
class Angband(_ParserScraper): class Angband(_ParserScraper):
url = 'http://angband.calamarain.net/' url = 'http://angband.calamarain.net/'
stripUrl = url + '%s' stripUrl = url + '%s'
@ -272,14 +245,6 @@ class Angband(_ParserScraper):
return self.pages[self.pages.index(url) - 1] return self.pages[self.pages.index(url) - 1]
class Angels2200(_BasicScraper):
url = 'http://www.janahoffmann.com/angels/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous")
help = 'Index format: yyyy/mm/dd/part-<n>-comic-<n>'
class Annyseed(_ParserScraper): class Annyseed(_ParserScraper):
baseUrl = ('https://web.archive.org/web/20190511031451/' baseUrl = ('https://web.archive.org/web/20190511031451/'
'http://www.mirrorwoodcomics.com/') 'http://www.mirrorwoodcomics.com/')

View file

@ -1582,6 +1582,10 @@ class Removed(Scraper):
# Removed in 3.1 # Removed in 3.1
cls('AbbysAgency', 'brk'), cls('AbbysAgency', 'brk'),
cls('AcademyVale'),
cls('AhoyEarth', 'block'),
cls('Anaria', 'del'),
cls('Angels2200', 'del'),
cls('BlackRose', 'brk'), cls('BlackRose', 'brk'),
cls('CatenaManor/CatenaCafe'), cls('CatenaManor/CatenaCafe'),
cls('FalseStart'), cls('FalseStart'),