Try to fix some more comics using the Internet Archive
This commit is contained in:
parent
5b3bfdd09e
commit
87f4049347
10 changed files with 27 additions and 50 deletions
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
from re import compile, escape
|
||||
|
||||
|
@ -121,14 +121,14 @@ class ForestHill(_WordPressScraper):
|
|||
url = 'https://www.foresthillcomic.org/'
|
||||
|
||||
|
||||
class ForLackOfABetterComic(_BasicScraper):
|
||||
url = 'http://forlackofabettercomic.com/'
|
||||
rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
|
||||
class ForLackOfABetterComic(_ParserScraper):
|
||||
url = 'https://web.archive.org/web/20200224010115/http://forlackofabettercomic.com/'
|
||||
stripUrl = url + '?id=%s'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = compile(tagre("img", "src", r'(%simg/comic/\d+[^"]+)' % rurl, after="comicimg"))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s\?id\=\d+)' % rurl) + r'Prev')
|
||||
imageSearch = '//img[@id="comicimg"]'
|
||||
prevSearch = '//a[text()="Prev"]'
|
||||
help = 'Index format: number'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class FoxDad(_ParserScraper):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
@ -44,14 +44,3 @@ class JohnnyWander(_ComicControlScraper):
|
|||
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
||||
_ComicControlScraper.imageSearch)
|
||||
url = 'http://www.johnnywander.com/'
|
||||
|
||||
|
||||
class JustAnotherEscape(_BasicScraper):
|
||||
url = 'http://www.justanotherescape.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + 'index.cgi?date=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href",
|
||||
r'(%s/index\.cgi\?date=\d+)' % rurl) +
|
||||
tagre("img", "alt", "Previous Comic"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
|
|
@ -22,7 +22,8 @@ class OctopusPie(_ParserScraper):
|
|||
|
||||
|
||||
class OffWhite(_ParserScraper):
|
||||
stripUrl = 'http://off-white.eu/comic/%s/'
|
||||
baseUrl = 'https://web.archive.org/web/20200627222318/http://off-white.eu/'
|
||||
stripUrl = baseUrl + 'comic/%s/'
|
||||
firstStripUrl = stripUrl % 'prologue-page-1-2'
|
||||
url = firstStripUrl
|
||||
imageSearch = '//img[@class="comic-page"]'
|
||||
|
@ -34,7 +35,7 @@ class OffWhite(_ParserScraper):
|
|||
def fetchUrls(self, url, data, urlSearch):
|
||||
# Fix missing page
|
||||
if url == self.stripUrl % 'page-37':
|
||||
return ['http://off-white.eu/ow_v2/wp-content/uploads/2011/01/new-037.jpg']
|
||||
return [self.baseUrl + 'ow_v2/wp-content/uploads/2011/01/new-037.jpg']
|
||||
return super(OffWhite, self).fetchUrls(url, data, urlSearch)
|
||||
|
||||
def getPrevUrl(self, url, data):
|
||||
|
|
|
@ -673,10 +673,12 @@ class Removed(Scraper):
|
|||
cls('GoComics/UncleArtsFunland'),
|
||||
cls('GoComics/USAcres'),
|
||||
cls('GoComics/WorldOfWonder'),
|
||||
cls('JustAnotherEscape'),
|
||||
cls('Laiyu', 'brk'),
|
||||
cls('MangaDex/DrStone', 'legal'),
|
||||
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
||||
cls('MangaDex/SPYxFAMILY', 'legal'),
|
||||
cls('Ryugou'),
|
||||
cls('SmackJeeves/20TimesKirby'),
|
||||
cls('SmackJeeves/2Kingdoms'),
|
||||
cls('SmackJeeves/355Days'),
|
||||
|
|
|
@ -308,13 +308,14 @@ class ProphecyOfTheCircle(_WPNavi):
|
|||
|
||||
|
||||
class Prototype(_ParserScraper):
|
||||
stripUrl = 'http://planetprototype.com/%s/'
|
||||
stripUrl = 'https://web.archive.org/web/20201030035444/http://planetprototype.com/%s/'
|
||||
firstStripUrl = stripUrl % '2018/03/30/vol-1-ch-1-front-cover'
|
||||
url = firstStripUrl
|
||||
imageSearch = '//img[contains(@class, "wp-post-image")]'
|
||||
prevSearch = '//a[.//text()="Previous"]'
|
||||
latestSearch = '//a[.//text()="Latest"]'
|
||||
starter = indirectStarter
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class PS238(_ParserScraper):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
from re import compile
|
||||
from urllib.parse import urljoin
|
||||
|
@ -153,22 +153,3 @@ class Ruthe(_BasicScraper):
|
|||
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)'))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class Ryugou(_WPWebcomic):
|
||||
url = 'http://ryugou.swashbuckledcomics.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
firstStripUrl = 'ryugou-chapter-1-cover'
|
||||
starter = bounceStarter
|
||||
adult = True
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
title = pageUrl.rstrip('/').rsplit('/', 1)[-1]
|
||||
ext = imageUrl.rsplit('.', 1)[-1]
|
||||
return title + '.' + ext
|
||||
|
||||
def fetchUrls(self, url, data, urlSearch):
|
||||
imageUrls = super(Ryugou, self).fetchUrls(url, data, urlSearch)
|
||||
if url == self.stripUrl % '1-3':
|
||||
imageUrls = [imageUrls[1]]
|
||||
return imageUrls
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
from re import compile, escape, IGNORECASE, sub
|
||||
from os.path import splitext
|
||||
|
@ -345,11 +345,13 @@ class SnowFlame(_WordPressScraper):
|
|||
|
||||
|
||||
class SodiumEyes(_WordPressScraper):
|
||||
url = 'http://sodiumeyes.com/'
|
||||
url = 'https://web.archive.org/web/20200220041406/http://sodiumeyes.com/'
|
||||
starter = indirectStarter
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class SoloLeveling(_ParserScraper):
|
||||
url = 'https://w1.sololeveling.net/'
|
||||
url = 'https://w3.sololeveling.net/'
|
||||
stripUrl = url + 'manga/solo-leveling-chapter-%s/'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//div[@class="img_container"]//img'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
|
@ -27,12 +27,13 @@ class Zapiro(_ParserScraper):
|
|||
|
||||
|
||||
class ZenPencils(_WPNavi):
|
||||
url = 'https://zenpencils.com/'
|
||||
url = 'https://web.archive.org/web/20200723091741/https://zenpencils.com/'
|
||||
multipleImagesPerStrip = True
|
||||
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
||||
starter = bounceStarter
|
||||
prevSearch = '//a[d:class("navi-prev")]'
|
||||
nextSearch = '//a[d:class("navi-next")]'
|
||||
endOfLife = True
|
||||
|
||||
|
||||
class ZombieHunters(_BasicScraper):
|
||||
|
|
|
@ -155,9 +155,9 @@ class TestDosage(object):
|
|||
|
||||
@responses.activate
|
||||
def test_json_page_key_bounce_and_multi_image(self, tmpdir):
|
||||
httpmocks.page('https://zenpencils.com/', 'zp-home')
|
||||
httpmocks.page('https://zenpencils.com/comic/missing/', 'zp-223')
|
||||
httpmocks.page('https://zenpencils.com/comic/lifejacket/', 'zp-222')
|
||||
httpmocks.page(re.compile(r'.*com/$'), 'zp-home')
|
||||
httpmocks.page(re.compile(r'.*com/comic/missing/$'), 'zp-223')
|
||||
httpmocks.page(re.compile(r'.*com/comic/lifejacket/$'), 'zp-222')
|
||||
httpmocks.jpeg(re.compile(r'https://cdn-.*\.jpg'))
|
||||
|
||||
cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
|
||||
|
|
|
@ -44,7 +44,7 @@ class TestModules(object):
|
|||
|
||||
@responses.activate
|
||||
def test_sololeveling_geoblock(self, tmpdir):
|
||||
responses.add(responses.GET, 'https://w1.sololeveling.net/',
|
||||
responses.add(responses.GET, 'https://w3.sololeveling.net/',
|
||||
'<span>1020</span>', status=403)
|
||||
|
||||
with pytest.raises(GeoblockedException):
|
||||
|
|
Loading…
Reference in a new issue