Try to fix some more comics using the Internet Archive
This commit is contained in:
parent
5b3bfdd09e
commit
87f4049347
10 changed files with 27 additions and 50 deletions
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
|
@ -121,14 +121,14 @@ class ForestHill(_WordPressScraper):
|
||||||
url = 'https://www.foresthillcomic.org/'
|
url = 'https://www.foresthillcomic.org/'
|
||||||
|
|
||||||
|
|
||||||
class ForLackOfABetterComic(_BasicScraper):
|
class ForLackOfABetterComic(_ParserScraper):
|
||||||
url = 'http://forlackofabettercomic.com/'
|
url = 'https://web.archive.org/web/20200224010115/http://forlackofabettercomic.com/'
|
||||||
rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
|
|
||||||
stripUrl = url + '?id=%s'
|
stripUrl = url + '?id=%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%simg/comic/\d+[^"]+)' % rurl, after="comicimg"))
|
imageSearch = '//img[@id="comicimg"]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\?id\=\d+)' % rurl) + r'Prev')
|
prevSearch = '//a[text()="Prev"]'
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class FoxDad(_ParserScraper):
|
class FoxDad(_ParserScraper):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
@ -44,14 +44,3 @@ class JohnnyWander(_ComicControlScraper):
|
||||||
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
|
||||||
_ComicControlScraper.imageSearch)
|
_ComicControlScraper.imageSearch)
|
||||||
url = 'http://www.johnnywander.com/'
|
url = 'http://www.johnnywander.com/'
|
||||||
|
|
||||||
|
|
||||||
class JustAnotherEscape(_BasicScraper):
|
|
||||||
url = 'http://www.justanotherescape.com/'
|
|
||||||
rurl = escape(url)
|
|
||||||
stripUrl = url + 'index.cgi?date=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
|
||||||
prevSearch = compile(tagre("a", "href",
|
|
||||||
r'(%s/index\.cgi\?date=\d+)' % rurl) +
|
|
||||||
tagre("img", "alt", "Previous Comic"))
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
|
@ -22,7 +22,8 @@ class OctopusPie(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class OffWhite(_ParserScraper):
|
class OffWhite(_ParserScraper):
|
||||||
stripUrl = 'http://off-white.eu/comic/%s/'
|
baseUrl = 'https://web.archive.org/web/20200627222318/http://off-white.eu/'
|
||||||
|
stripUrl = baseUrl + 'comic/%s/'
|
||||||
firstStripUrl = stripUrl % 'prologue-page-1-2'
|
firstStripUrl = stripUrl % 'prologue-page-1-2'
|
||||||
url = firstStripUrl
|
url = firstStripUrl
|
||||||
imageSearch = '//img[@class="comic-page"]'
|
imageSearch = '//img[@class="comic-page"]'
|
||||||
|
@ -34,7 +35,7 @@ class OffWhite(_ParserScraper):
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
# Fix missing page
|
# Fix missing page
|
||||||
if url == self.stripUrl % 'page-37':
|
if url == self.stripUrl % 'page-37':
|
||||||
return ['http://off-white.eu/ow_v2/wp-content/uploads/2011/01/new-037.jpg']
|
return [self.baseUrl + 'ow_v2/wp-content/uploads/2011/01/new-037.jpg']
|
||||||
return super(OffWhite, self).fetchUrls(url, data, urlSearch)
|
return super(OffWhite, self).fetchUrls(url, data, urlSearch)
|
||||||
|
|
||||||
def getPrevUrl(self, url, data):
|
def getPrevUrl(self, url, data):
|
||||||
|
|
|
@ -673,10 +673,12 @@ class Removed(Scraper):
|
||||||
cls('GoComics/UncleArtsFunland'),
|
cls('GoComics/UncleArtsFunland'),
|
||||||
cls('GoComics/USAcres'),
|
cls('GoComics/USAcres'),
|
||||||
cls('GoComics/WorldOfWonder'),
|
cls('GoComics/WorldOfWonder'),
|
||||||
|
cls('JustAnotherEscape'),
|
||||||
cls('Laiyu', 'brk'),
|
cls('Laiyu', 'brk'),
|
||||||
cls('MangaDex/DrStone', 'legal'),
|
cls('MangaDex/DrStone', 'legal'),
|
||||||
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
cls('MangaDex/HeavensDesignTeam', 'legal'),
|
||||||
cls('MangaDex/SPYxFAMILY', 'legal'),
|
cls('MangaDex/SPYxFAMILY', 'legal'),
|
||||||
|
cls('Ryugou'),
|
||||||
cls('SmackJeeves/20TimesKirby'),
|
cls('SmackJeeves/20TimesKirby'),
|
||||||
cls('SmackJeeves/2Kingdoms'),
|
cls('SmackJeeves/2Kingdoms'),
|
||||||
cls('SmackJeeves/355Days'),
|
cls('SmackJeeves/355Days'),
|
||||||
|
|
|
@ -308,13 +308,14 @@ class ProphecyOfTheCircle(_WPNavi):
|
||||||
|
|
||||||
|
|
||||||
class Prototype(_ParserScraper):
|
class Prototype(_ParserScraper):
|
||||||
stripUrl = 'http://planetprototype.com/%s/'
|
stripUrl = 'https://web.archive.org/web/20201030035444/http://planetprototype.com/%s/'
|
||||||
firstStripUrl = stripUrl % '2018/03/30/vol-1-ch-1-front-cover'
|
firstStripUrl = stripUrl % '2018/03/30/vol-1-ch-1-front-cover'
|
||||||
url = firstStripUrl
|
url = firstStripUrl
|
||||||
imageSearch = '//img[contains(@class, "wp-post-image")]'
|
imageSearch = '//img[contains(@class, "wp-post-image")]'
|
||||||
prevSearch = '//a[.//text()="Previous"]'
|
prevSearch = '//a[.//text()="Previous"]'
|
||||||
latestSearch = '//a[.//text()="Latest"]'
|
latestSearch = '//a[.//text()="Latest"]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class PS238(_ParserScraper):
|
class PS238(_ParserScraper):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile
|
from re import compile
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
@ -153,22 +153,3 @@ class Ruthe(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)'))
|
prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)'))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class Ryugou(_WPWebcomic):
|
|
||||||
url = 'http://ryugou.swashbuckledcomics.com/'
|
|
||||||
stripUrl = url + 'comic/%s/'
|
|
||||||
firstStripUrl = 'ryugou-chapter-1-cover'
|
|
||||||
starter = bounceStarter
|
|
||||||
adult = True
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
title = pageUrl.rstrip('/').rsplit('/', 1)[-1]
|
|
||||||
ext = imageUrl.rsplit('.', 1)[-1]
|
|
||||||
return title + '.' + ext
|
|
||||||
|
|
||||||
def fetchUrls(self, url, data, urlSearch):
|
|
||||||
imageUrls = super(Ryugou, self).fetchUrls(url, data, urlSearch)
|
|
||||||
if url == self.stripUrl % '1-3':
|
|
||||||
imageUrls = [imageUrls[1]]
|
|
||||||
return imageUrls
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
from re import compile, escape, IGNORECASE, sub
|
from re import compile, escape, IGNORECASE, sub
|
||||||
from os.path import splitext
|
from os.path import splitext
|
||||||
|
@ -345,11 +345,13 @@ class SnowFlame(_WordPressScraper):
|
||||||
|
|
||||||
|
|
||||||
class SodiumEyes(_WordPressScraper):
|
class SodiumEyes(_WordPressScraper):
|
||||||
url = 'http://sodiumeyes.com/'
|
url = 'https://web.archive.org/web/20200220041406/http://sodiumeyes.com/'
|
||||||
|
starter = indirectStarter
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class SoloLeveling(_ParserScraper):
|
class SoloLeveling(_ParserScraper):
|
||||||
url = 'https://w1.sololeveling.net/'
|
url = 'https://w3.sololeveling.net/'
|
||||||
stripUrl = url + 'manga/solo-leveling-chapter-%s/'
|
stripUrl = url + 'manga/solo-leveling-chapter-%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = '//div[@class="img_container"]//img'
|
imageSearch = '//div[@class="img_container"]//img'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2021 Tobias Gruetzmacher
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
|
@ -27,12 +27,13 @@ class Zapiro(_ParserScraper):
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(_WPNavi):
|
class ZenPencils(_WPNavi):
|
||||||
url = 'https://zenpencils.com/'
|
url = 'https://web.archive.org/web/20200723091741/https://zenpencils.com/'
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
prevSearch = '//a[d:class("navi-prev")]'
|
prevSearch = '//a[d:class("navi-prev")]'
|
||||||
nextSearch = '//a[d:class("navi-next")]'
|
nextSearch = '//a[d:class("navi-next")]'
|
||||||
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class ZombieHunters(_BasicScraper):
|
class ZombieHunters(_BasicScraper):
|
||||||
|
|
|
@ -155,9 +155,9 @@ class TestDosage(object):
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_json_page_key_bounce_and_multi_image(self, tmpdir):
|
def test_json_page_key_bounce_and_multi_image(self, tmpdir):
|
||||||
httpmocks.page('https://zenpencils.com/', 'zp-home')
|
httpmocks.page(re.compile(r'.*com/$'), 'zp-home')
|
||||||
httpmocks.page('https://zenpencils.com/comic/missing/', 'zp-223')
|
httpmocks.page(re.compile(r'.*com/comic/missing/$'), 'zp-223')
|
||||||
httpmocks.page('https://zenpencils.com/comic/lifejacket/', 'zp-222')
|
httpmocks.page(re.compile(r'.*com/comic/lifejacket/$'), 'zp-222')
|
||||||
httpmocks.jpeg(re.compile(r'https://cdn-.*\.jpg'))
|
httpmocks.jpeg(re.compile(r'https://cdn-.*\.jpg'))
|
||||||
|
|
||||||
cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
|
cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
|
||||||
|
|
|
@ -44,7 +44,7 @@ class TestModules(object):
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_sololeveling_geoblock(self, tmpdir):
|
def test_sololeveling_geoblock(self, tmpdir):
|
||||||
responses.add(responses.GET, 'https://w1.sololeveling.net/',
|
responses.add(responses.GET, 'https://w3.sololeveling.net/',
|
||||||
'<span>1020</span>', status=403)
|
'<span>1020</span>', status=403)
|
||||||
|
|
||||||
with pytest.raises(GeoblockedException):
|
with pytest.raises(GeoblockedException):
|
||||||
|
|
Loading…
Reference in a new issue