Try to fix some more comics using the Internet Archive

2021-01-31 23:40:21 +01:00 · 2021-01-31 23:40:21 +01:00 · 87f4049347
commit 87f4049347
parent 5b3bfdd09e
10 changed files with 27 additions and 50 deletions
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from re import compile, escape
@ -121,14 +121,14 @@ class ForestHill(_WordPressScraper):
    url = 'https://www.foresthillcomic.org/'
-class ForLackOfABetterComic(_BasicScraper):
+class ForLackOfABetterComic(_ParserScraper):
-    url = 'http://forlackofabettercomic.com/'
+    url = 'https://web.archive.org/web/20200224010115/http://forlackofabettercomic.com/'
    rurl = r'http://(?:www\.)?forlackofabettercomic\.com/'
    stripUrl = url + '?id=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("img", "src", r'(%simg/comic/\d+[^"]+)' % rurl, after="comicimg"))
+    imageSearch = '//img[@id="comicimg"]'
-    prevSearch = compile(tagre("a", "href", r'(%s\?id\=\d+)' % rurl) + r'Prev')
+    prevSearch = '//a[text()="Prev"]'
    help = 'Index format: number'
    endOfLife = True
 class FoxDad(_ParserScraper):
--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 from re import compile, escape
 from ..scraper import _BasicScraper
@ -44,14 +44,3 @@ class JohnnyWander(_ComicControlScraper):
    imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
                   _ComicControlScraper.imageSearch)
    url = 'http://www.johnnywander.com/'
 class JustAnotherEscape(_BasicScraper):
    url = 'http://www.justanotherescape.com/'
    rurl = escape(url)
    stripUrl = url + 'index.cgi?date=%s'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href",
                               r'(%s/index\.cgi\?date=\d+)' % rurl) +
                         tagre("img", "alt", "Previous Comic"))
    help = 'Index format: yyyymmdd'
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -22,7 +22,8 @@ class OctopusPie(_ParserScraper):
 class OffWhite(_ParserScraper):
-    stripUrl = 'http://off-white.eu/comic/%s/'
+    baseUrl = 'https://web.archive.org/web/20200627222318/http://off-white.eu/'
    stripUrl = baseUrl + 'comic/%s/'
    firstStripUrl = stripUrl % 'prologue-page-1-2'
    url = firstStripUrl
    imageSearch = '//img[@class="comic-page"]'
@ -34,7 +35,7 @@ class OffWhite(_ParserScraper):
    def fetchUrls(self, url, data, urlSearch):
        # Fix missing page
        if url == self.stripUrl % 'page-37':
-            return ['http://off-white.eu/ow_v2/wp-content/uploads/2011/01/new-037.jpg']
+            return [self.baseUrl + 'ow_v2/wp-content/uploads/2011/01/new-037.jpg']
        return super(OffWhite, self).fetchUrls(url, data, urlSearch)
    def getPrevUrl(self, url, data):
--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -673,10 +673,12 @@ class Removed(Scraper):
            cls('GoComics/UncleArtsFunland'),
            cls('GoComics/USAcres'),
            cls('GoComics/WorldOfWonder'),
            cls('JustAnotherEscape'),
            cls('Laiyu', 'brk'),
            cls('MangaDex/DrStone', 'legal'),
            cls('MangaDex/HeavensDesignTeam', 'legal'),
            cls('MangaDex/SPYxFAMILY', 'legal'),
            cls('Ryugou'),
            cls('SmackJeeves/20TimesKirby'),
            cls('SmackJeeves/2Kingdoms'),
            cls('SmackJeeves/355Days'),
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -308,13 +308,14 @@ class ProphecyOfTheCircle(_WPNavi):
 class Prototype(_ParserScraper):
-    stripUrl = 'http://planetprototype.com/%s/'
+    stripUrl = 'https://web.archive.org/web/20201030035444/http://planetprototype.com/%s/'
    firstStripUrl = stripUrl % '2018/03/30/vol-1-ch-1-front-cover'
    url = firstStripUrl
    imageSearch = '//img[contains(@class, "wp-post-image")]'
    prevSearch = '//a[.//text()="Previous"]'
    latestSearch = '//a[.//text()="Latest"]'
    starter = indirectStarter
    endOfLife = True
 class PS238(_ParserScraper):
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from re import compile
 from urllib.parse import urljoin
@ -153,22 +153,3 @@ class Ruthe(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)'))
    help = 'Index format: number'
 class Ryugou(_WPWebcomic):
    url = 'http://ryugou.swashbuckledcomics.com/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = 'ryugou-chapter-1-cover'
    starter = bounceStarter
    adult = True
    def namer(self, imageUrl, pageUrl):
        title = pageUrl.rstrip('/').rsplit('/', 1)[-1]
        ext = imageUrl.rsplit('.', 1)[-1]
        return title + '.' + ext
    def fetchUrls(self, url, data, urlSearch):
        imageUrls = super(Ryugou, self).fetchUrls(url, data, urlSearch)
        if url == self.stripUrl % '1-3':
            imageUrls = [imageUrls[1]]
        return imageUrls
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from re import compile, escape, IGNORECASE, sub
 from os.path import splitext
@ -345,11 +345,13 @@ class SnowFlame(_WordPressScraper):
 class SodiumEyes(_WordPressScraper):
-    url = 'http://sodiumeyes.com/'
+    url = 'https://web.archive.org/web/20200220041406/http://sodiumeyes.com/'
    starter = indirectStarter
    endOfLife = True
 class SoloLeveling(_ParserScraper):
-    url = 'https://w1.sololeveling.net/'
+    url = 'https://w3.sololeveling.net/'
    stripUrl = url + 'manga/solo-leveling-chapter-%s/'
    firstStripUrl = stripUrl % '1'
    imageSearch = '//div[@class="img_container"]//img'
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 from re import compile, escape
 from ..scraper import _BasicScraper, _ParserScraper
@ -27,12 +27,13 @@ class Zapiro(_ParserScraper):
 class ZenPencils(_WPNavi):
-    url = 'https://zenpencils.com/'
+    url = 'https://web.archive.org/web/20200723091741/https://zenpencils.com/'
    multipleImagesPerStrip = True
    firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
    starter = bounceStarter
    prevSearch = '//a[d:class("navi-prev")]'
    nextSearch = '//a[d:class("navi-next")]'
    endOfLife = True
 class ZombieHunters(_BasicScraper):
--- a/tests/test_dosage.py
+++ b/tests/test_dosage.py
@ -155,9 +155,9 @@ class TestDosage(object):
    @responses.activate
    def test_json_page_key_bounce_and_multi_image(self, tmpdir):
-        httpmocks.page('https://zenpencils.com/', 'zp-home')
+        httpmocks.page(re.compile(r'.*com/$'), 'zp-home')
-        httpmocks.page('https://zenpencils.com/comic/missing/', 'zp-223')
+        httpmocks.page(re.compile(r'.*com/comic/missing/$'), 'zp-223')
-        httpmocks.page('https://zenpencils.com/comic/lifejacket/', 'zp-222')
+        httpmocks.page(re.compile(r'.*com/comic/lifejacket/$'), 'zp-222')
        httpmocks.jpeg(re.compile(r'https://cdn-.*\.jpg'))
        cmd_ok("-v", "-b", str(tmpdir), "-o", "json", "ZenPencils")
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@ -44,7 +44,7 @@ class TestModules(object):
    @responses.activate
    def test_sololeveling_geoblock(self, tmpdir):
-        responses.add(responses.GET, 'https://w1.sololeveling.net/',
+        responses.add(responses.GET, 'https://w3.sololeveling.net/',
            '<span>1020</span>', status=403)
        with pytest.raises(GeoblockedException):