Fix some old modules using the Internet Archive

2020-01-09 17:38:13 +01:00 · 2020-01-09 17:38:13 +01:00 · 752525c3e9
commit 752525c3e9
parent 275370a835
16 changed files with 173 additions and 137 deletions
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2018 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -213,14 +213,12 @@ class AlienShores(_WordPressScraper):
    firstStripUrl = url + 'AScomic/updated-cover/'


-class AllTheGrowingThings(_BasicScraper):
-    url = 'http://growingthings.typodmary.com/'
-    rurl = escape(url)
+class AllTheGrowingThings(_WordPressScraper):
+    url = ('https://web.archive.org/web/20160611212229/'
+        'http://growingthings.typodmary.com/')
    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things'
-    imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
-    help = 'Index format: yyyy/mm/dd/strip-name'
+    firstStripUrl = stripUrl % 'all-the-growing-things'
+    endOfLife = True


 class AlphaLuna(_ParserScraper):
@ -329,11 +327,14 @@ class Angels2200(_BasicScraper):


 class Annyseed(_ParserScraper):
-    baseUrl = 'http://www.mirrorwoodcomics.com/'
-    url = baseUrl + 'AnnyseedLatest.htm'
+    baseUrl = ('https://web.archive.org/web/20190511031451/'
+        'http://www.mirrorwoodcomics.com/')
    stripUrl = baseUrl + 'Annyseed%s.htm'
+    url = stripUrl % 'Latest'
+    firstStripUrl = stripUrl % '000'
    imageSearch = '//div/img[contains(@src, "Annyseed")]'
    prevSearch = '//a[img[@name="Previousbtn"]]'
+    endOfLife = True
    help = 'Index format: nnn'
    FIX_RE = compile(r'Annyseed/Finished%20For%20Print/')

--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -156,12 +156,13 @@ class BiggerThanCheeses(_BasicScraper):


 class BillyTheDunce(_ParserScraper):
-    url = 'http://www.duncepress.com/'
-    firstStripUrl = url + '2009/06/an-introduction-of-sorts'
+    stripUrl = ('https://web.archive.org/web/20180404142544/'
+        'http://www.duncepress.com/%s/')
+    url = stripUrl % '2012/02/losing-more'
+    firstStripUrl = stripUrl % '2009/06/an-introduction-of-sorts'
    imageSearch = '//div[@class="entry"]/p[1]/a'
    prevSearch = '//a[@rel="prev"]'
-    latestSearch = '//h2[@class="post-title"]/a'
-    starter = indirectStarter
+    endOfLife = True


 class BittersweetCandyBowl(_ParserScraper):
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -125,8 +125,8 @@ class CatenaCafe(_WordPressScraper):


 class CatenaManor(_ParserScraper):
-    # Retrieve comic from the Internet Archive
-    baseUrl = 'https://web.archive.org/web/20141027141116/http://catenamanor.com/'
+    baseUrl = ('https://web.archive.org/web/20141027141116/'
+        'http://catenamanor.com/')
    url = baseUrl + 'archives'
    stripUrl = baseUrl + '%s/'
    firstStripUrl = stripUrl % '2003/07'
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -139,7 +139,8 @@ class DemolitionSquad(_ParserScraper):


 class DerTodUndDasMaedchen(_ParserScraper):
-    url = 'http://www.cartoontomb.de/deutsch/tod2.php'
+    url = ('https://web.archive.org/web/20180106180134/'
+        'http://www.cartoontomb.de/deutsch/tod2.php')
    stripUrl = url + '?bild=%s.jpg'
    firstStripUrl = stripUrl % '00_01_01'
    imageSearch = '//img[contains(@src, "images/tod/teil2")]'
@ -305,16 +306,17 @@ class DresdenCodak(_ParserScraper):
        return not data.xpath(self.imageSearch)


-class DrFun(_BasicScraper):
-    baseUrl = 'http://www.ibiblio.org/Dave/'
-    url = baseUrl + 'ar00502.htm'
+class DrFun(_ParserScraper):
+    baseUrl = ('https://web.archive.org/web/20180726145737/'
+        'http://www.ibiblio.org/Dave/')
    stripUrl = baseUrl + 'ar%s.htm'
+    url = stripUrl % '00502'
    firstStripUrl = stripUrl % '00001'
-    imageSearch = compile(tagre("a", "href", r'(Dr-Fun/df\d+/df[^"]+)'))
+    imageSearch = '//a[contains(@href, "Dr-Fun/df")]'
    multipleImagesPerStrip = True
-    prevSearch = compile(tagre("a", "href", r'([^"]+)') + 'Previous Week,')
-    help = 'Index format: nnnnn'
+    prevSearch = '//a[contains(text(), "Previous Week")]'
    endOfLife = True
+    help = 'Index format: nnnnn'


 class Drive(_BasicScraper):
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -181,10 +181,11 @@ class EverybodyLovesEricRaymond(_ParserScraper):
    prevSearch = '//a[@rel="prev"]'


-# Seems to be GeoBlocked from Germany?
 class EvilDiva(_WordPressScraper):
-    url = 'http://www.evildivacomics.com/'
+    url = ('https://web.archive.org/web/20190221223751/'
+        'https://www.evildivacomics.com/')
    firstStripUrl = url + 'comic/evil-diva-issue-1-cover/'
+    endOfLife = True


 class EvilInc(_WordPressScraper):
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function
-from re import compile, escape, IGNORECASE
+from re import compile, escape

 from ..util import tagre
 from ..scraper import _BasicScraper, _ParserScraper
@ -27,14 +27,15 @@ class Faneurysm(_WPNaviIn):
    endOfLife = True


-class FantasyRealms(_BasicScraper):
-    url = 'http://www.fantasyrealmsonline.com/'
-    stripUrl = url + 'manga/%s.php'
-    imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
-    prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
-    latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
+class FantasyRealms(_ParserScraper):
+    stripUrl = ('https://web.archive.org/web/20161204192651/'
+        'http://fantasyrealmsonline.com/manga/%s.php')
+    url = stripUrl % '091'
+    firstStripUrl = stripUrl % '001'
+    imageSearch = '//img[contains(@src, "/manga/0")]'
+    prevSearch = '//a[img[contains(@src, "nav-back")]]'
+    endOfLife = True
    help = 'Index format: nnn'
-    starter = indirectStarter


 class FarToTheNorth(_ComicControlScraper):
@ -57,16 +58,15 @@ class FireflyCross(_WordPressScraper):
    firstStripUrl = url + '?comic=05062002'


-class FirstWorldProblems(_BasicScraper):
-    url = 'http://bradcolbow.com/archive/C5/'
+class FirstWorldProblems(_ParserScraper):
+    url = ('https://web.archive.org/web/20150710053456/'
+        'http://bradcolbow.com/archive/C5/')
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % 'P10'
-    imageSearch = compile(tagre("img", "src",
-        r'(http://(?:fwpcomics\.s3\.amazonaws\.com|s3\.amazonaws\.com/fwpcomics)/s1-[^"]+)'))
-    prevSearch = compile(tagre("a", "href",
-        r'(http://bradcolbow\.com/archive/C5/[^"]+)', before="prev"))
+    imageSearch = '//div[{}]//img'.format(xpath_class('entry'))
+    prevSearch = '//a[{}]'.format(xpath_class('prev'))
    multipleImagesPerStrip = True
-    help = 'Index format: a letter and a number'
+    endOfLife = True


 class FlakyPastry(_BasicScraper):
@ -79,12 +79,14 @@ class FlakyPastry(_BasicScraper):
    help = 'Index format: nnnn'


-class Flemcomics(_BasicScraper):
-    url = 'http://www.flemcomics.com/'
+class Flemcomics(_ParserScraper):
+    url = ('https://web.archive.org/web/20180414110349/'
+        'http://www.flemcomics.com/')
    stripUrl = url + 'd/%s.html'
-    imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
-                         tagre("img", "src", r'/images/previous_day\.jpg'))
+    firstStripUrl = stripUrl % '19980101'
+    imageSearch = '//img[{}]'.format(xpath_class('ksc'))
+    prevSearch = '//a[@rel="prev"]'
+    endOfLife = True
    help = 'Index format: yyyymmdd'


@ -160,10 +162,12 @@ class FoxTails(_ParserScraper):


 class Fragile(_ParserScraper):
-    url = 'http://www.fragilestory.com/'
+    url = ('https://web.archive.org/web/20190308203109/'
+        'http://www.fragilestory.com/')
    imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img'
    prevSearch = '//div[@id="nav_comic_a"]/a[2]'
    firstStripUrl = url + 'strips/chapter_01'
+    endOfLife = True


 class FredoAndPidjin(_ParserScraper):
@ -214,19 +218,22 @@ class FullFrontalNerdity(_BasicScraper):
    help = 'Index format: number'


-class FunInJammies(_BasicScraper):
-    url = 'http://www.funinjammies.com/'
+class FunInJammies(_WordPressScraper):
+    url = ('https://web.archive.org/web/20170205105241/'
+        'http://www.funinjammies.com/')
    stripUrl = url + 'comic.php?issue=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(r'(/comics/.+?)"')
-    prevSearch = compile(r'(/comic.php.+?)" id.+?prev')
+    prevSearch = '//a[text()="< Prev"]'
+    endOfLife = True
    help = 'Index format: n (unpadded)'


 class FurPiled(_ParserScraper):
-    stripUrl = 'https://web.archive.org/web/20160404074145/http://www.liondogworks.com/images/fp-%03d.jpg'
+    stripUrl = ('https://web.archive.org/web/20160404074145/'
+        'http://www.liondogworks.com/images/fp-%03d.jpg')
    url = stripUrl % 427
    firstStripUrl = stripUrl % 1
+    endOfLife = True

    def getPrevUrl(self, url, data):
        # Skip missing pages
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -21,20 +21,12 @@ class Galaxion(_WPNavi):
    help = 'Index format: n-comic/book-n/chapter-n/title-nnn'


-class Garanos(_BasicScraper):
-    baseUrl = 'http://garanos.alexheberling.com/'
-    rurl = escape(baseUrl)
-    url = baseUrl + 'pages/page-1/'
-    starter = indirectStarter
-    stripUrl = baseUrl + 'pages/page-%s'
-    imageSearch = compile(
-        tagre("img", "src",
-              r'(%swp-content/uploads/sites/\d+/\d+/\d+/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
-                               after="prev"))
-    latestSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
-                                 after="nav-last"))
-    help = 'Index format: n (unpadded)'
+class Garanos(_WordPressScraper):
+    stripUrl = ('https://web.archive.org/web/20180314181433/'
+        'http://garanos.alexheberling.com/pages/%s/')
+    url = stripUrl % 'page-487'
+    firstStripUrl = stripUrl % 'vol01'
+    endOfLife = True


 class GastroPhobia(_ParserScraper):
@ -46,13 +38,14 @@ class GastroPhobia(_ParserScraper):
    help = 'Index format: yyyy-mm-dd'


-class Geeks(_BasicScraper):
-    url = 'http://sevenfloorsdown.com/geeks/'
+class Geeks(_ParserScraper):
+    url = ('https://web.archive.org/web/20190527194921/'
+        'http://sevenfloorsdown.com/geeks/')
    stripUrl = url + 'archives/%s'
    firstStripUrl = stripUrl % '10'
-    imageSearch = compile(
-        r'<img src=\'(http://sevenfloorsdown.com/geeks/comics/.+?)\'')
-    prevSearch = compile(r'<a href="(.+?)">&laquo; Previous')
+    imageSearch = '//div[@id="comic"]/img'
+    prevSearch = '//a[contains(text(), "Previous")]'
+    endOfLife = True
    help = 'Index format: nnn'


@ -116,15 +109,12 @@ class GlassHalfEmpty(_BasicScraper):
    help = 'Index format: nnn'


-class GleefulNihilism(_BasicScraper):
-    url = 'http://gleefulnihilism.com/'
-    rurl = escape(url)
+class GleefulNihilism(_WordPressScraper):
+    url = ('https://web.archive.org/web/20170911203122/'
+        'http://gleefulnihilism.com/')
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % 'amoeba'
-    imageSearch = compile(
-        tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
-    prevSearch = compile(
-        tagre("a", "href", r'(%scomic/[^"]+)' % rurl) + '&lsaquo;')
+    endOfLife = True
    help = 'Index format: stripname'


--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -68,9 +68,11 @@ class LetsSpeakEnglish(_ComicControlScraper):


 class LifeAintNoPonyFarm(_WordPressScraper):
-    url = 'http://sarahburrini.com/en/'
+    url = ('https://web.archive.org/web/20181221154155/'
+        'http://sarahburrini.com/en/')
    firstStripUrl = url + 'comic/my-first-webcomic/'
    multipleImagesPerStrip = True
+    endOfLife = True


 class LilithsWord(_ComicControlScraper):
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter
+from ..helpers import indirectStarter, xpath_class
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi

@ -88,12 +88,13 @@ class Newshounds(_ParserScraper):
        return super().getPrevUrl(url, data)


-class NewWorld(_BasicScraper):
-    url = 'http://www.tfsnewworld.com/'
+class NewWorld(_WordPressScraper):
+    url = ('https://web.archive.org/web/20190718012133/'
+        'http://www.tfsnewworld.com/')
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2007/08/30/63'
-    imageSearch = compile(r'<img src="(http://www.tfsnewworld.com/comics/.+?)"')
-    prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
+    prevSearch = '//a[@rel="prev"]'
+    endOfLife = True
    help = 'Index format: yyyy/mm/dd/stripn'


@ -109,7 +110,9 @@ class NichtLustig(_BasicScraper):


 class Nicky510(_WPNavi):
-    url = 'http://www.nickyitis.com/'
+    url = ('https://web.archive.org/web/20160510215718/'
+        'http://www.nickyitis.com/')
+    endOfLife = True


 class NicoleAndDerek(_ParserScraper):
@ -140,13 +143,13 @@ class Nightshift(_ParserScraper):
        return chapter + '_' + page


-class Nimona(_BasicScraper):
-    url = 'http://gingerhaze.com/nimona/'
+class Nimona(_ParserScraper):
+    url = ('https://web.archive.org/web/20141008095502/'
+        'http://gingerhaze.com/nimona/')
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % "page-1"
-    imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)'))
-    prevSearch = compile(r'<a href="(/nimona/comic/[^"]+)"><img src="http://gingerhaze\.com/sites/default/files/comicdrop/comicdrop_prev_label_file\.png"')
-    help = 'Index format: stripname'
+    imageSearch = '//div[{}]//img'.format(xpath_class('field-name-field-comic-page'))
+    prevSearch = '//a[img[contains(@src, "/comicdrop_prev_label")]]'
    endOfLife = True


--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -125,7 +125,9 @@ class OnTheFastrack(_BasicScraper):


 class OopsComicAdventure(_WordPressScraper):
-    url = 'http://oopscomicadventure.com/'
+    url = ('https://web.archive.org/web/20190102215141/'
+        'http://oopscomicadventure.com/')
+    endOfLife = True


 class Optipess(_WPNavi):
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -44,8 +44,10 @@ class ParallelUniversum(_BasicScraper):


 class PartiallyClips(_WordPressScraper):
-    url = 'http://partiallyclips.com/'
+    url = ('https://web.archive.org/web/20180509161332/'
+        'http://partiallyclips.com/')
    firstStripUrl = url + 'comic/screaming-woman/'
+    endOfLife = True


 class PastelDefender(_BasicScraper):
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -119,34 +119,36 @@ class SchlockMercenary(_ParserScraper):
    help = 'Index format: yyyy-mm-dd'


-class SchoolBites(_BasicScraper):
-    url = 'http://schoolbites.net/'
+class SchoolBites(_ParserScraper):
+    url = ('https://web.archive.org/web/20170215065523/'
+        'http://schoolbites.net/')
    stripUrl = url + 'd/%s.html'
-    imageSearch = compile(tagre("img", "src", r'(http://cdn\.schoolbites\.net/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://schoolbites\.net/d/\d+\.html)', after="prev"))
+    imageSearch = '//img[{}]'.format(xpath_class('ksc'))
+    prevSearch = '//a[@rel="prev"]'
+    endOfLife = True
    help = 'Index format: yyyymmdd'


-class Schuelert(_BasicScraper):
-    url = 'http://www.schuelert.de/'
-    rurl = escape(url)
+class Schuelert(_ParserScraper):
+    url = ('https://web.archive.org/web/20190103022830/'
+        'http://www.schuelert.de/')
    stripUrl = url + 'index.php?paged=%s'
-    firstStripUrl = stripUrl % '5'
-    imageSearch = compile(tagre("img", "src", r"(%swp-content/[^']+)" % rurl, quote="'"))
-    prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?paged=\d+)' % rurl) + "&laquo;")
+    firstStripUrl = stripUrl % '3'
+    imageSearch = '//img[contains(@src, "wp-content")]'
+    prevSearch = '//span[{}]/a'.format(xpath_class('prevlink'))
    multipleImagesPerStrip = True
-    help = 'Index format: none'
+    endOfLife = True
    lang = 'de'


-class Science(_BasicScraper):
-    url = 'http://sci-ence.org/'
-    rurl = escape(url)
-    stripUrl = url + '%s/'
+class Science(_ParserScraper):
+    stripUrl = ('https://web.archive.org/web/20180616152753/'
+        'http://sci-ence.org/%s/')
+    url = stripUrl % 'new-york-comic-con-2013'
    firstStripUrl = stripUrl % 'periodic-table-element-ass'
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl, after="prev"))
-    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
-    help = 'Index format: stripname'
+    prevSearch = '//a[{}]'.format(xpath_class('navi-prev'))
+    imageSearch = '//div[@class="comicpane"]//img'
+    endOfLife = True


 class SeelPeel(_WPNaviIn):
@ -321,10 +323,12 @@ class SMBC(_ComicControlScraper):


 class SnowFlame(_WordPressScraper):
-    url = 'http://www.snowflamecomic.com/'
+    url = ('https://web.archive.org/web/20160905071051/'
+        'http://www.snowflamecomic.com/')
    stripUrl = url + '?comic=snowflame-%s-%s'
    firstStripUrl = stripUrl % ('01', '01')
    starter = bounceStarter
+    endOfLife = True
    help = 'Index format: chapter-page'

    def getIndexStripUrl(self, index):
@ -493,12 +497,14 @@ class StandStillStaySilent(_ParserScraper):


 class StarCrossdDestiny(_ParserScraper):
-    baseUrl = 'http://starcrossd.net/'
+    baseUrl = ('https://web.archive.org/web/20190918132321/'
+        'http://starcrossd.net/')
    url = baseUrl + 'comic.html'
    stripUrl = baseUrl + 'archives/%s.html'
    firstStripUrl = stripUrl % '00000001'
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[text()="prev"]'
+    endOfLife = True
    help = 'Index format: nnnnnnnn'

    def namer(self, image_url, page_url):
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -48,10 +48,14 @@ class Tamberlane(_ParserScraper):


 class TheBrads(_ParserScraper):
-    url = 'http://bradcolbow.com/archive/'
-    imageSearch = '//div[%s]//img' % xpath_class('entry')
-    prevSearch = '//a[%s]' % xpath_class('prev')
+    url = ('https://web.archive.org/web/20171211154809/'
+        'http://bradcolbow.com/archive/C4/')
+    stripUrl = url + '%s/'
+    firstStripUrl = stripUrl % 'P125'
+    imageSearch = '//div[{}]//img'.format(xpath_class('entry'))
+    prevSearch = '//a[{}]'.format(xpath_class('prev'))
    multipleImagesPerStrip = True
+    endOfLife = True


 class TheClassMenagerie(_ParserScraper):
@ -107,15 +111,14 @@ class TheJunkHyenasDiner(_WordPressScraper):
    firstStripUrl = stripUrl % 'intro'


-class TheLandscaper(_BasicScraper):
-    stripUrl = 'http://landscaper.visual-assault.net/comic/%s'
+class TheLandscaper(_ParserScraper):
+    stripUrl = ('https://web.archive.org/web/20171129163510/'
+        'http://landscaper.visual-assault.net/comic/%s')
    url = stripUrl % 'latest'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("img", "src",
-                                r'(/comics/comic/comic_page/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)') +
-                         '&lsaquo; Previous')
-    help = 'Index format: name'
+    imageSearch = '//article[{}]//img[1]'.format(xpath_class('comic'))
+    prevSearch = '//a[contains(text(), "Previous")]'
+    endOfLife = True


 class TheMelvinChronicles(_WordPressScraper):
--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -14,8 +14,10 @@ from .common import _ComicControlScraper, _WordPressScraper, _WPNavi


 class Underling(_WPNavi):
-    url = 'http://underlingcomic.com/'
+    url = ('https://web.archive.org/web/20190806120425/'
+        'http://underlingcomic.com/')
    firstStripUrl = url + 'page-one/'
+    endOfLife = True


 class Undertow(_BasicScraper):
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -30,6 +30,9 @@ from .output import out
 from .events import getHandler


+ARCHIVE_ORG_URL = re.compile(r'https?://web\.archive\.org/web/[^/]*/')
+
+
 class Scraper(object):
    '''Base class for all comic scraper, but without a specific scrape
    implementation.'''
@ -183,7 +186,7 @@ class Scraper(object):
                except ValueError as msg:
                    # image not found
                    out.exception(msg)
-            if self.firstStripUrl == url:
+            if self.isfirststrip(url):
                out.debug(u"Stop at first URL %s" % url)
                self.hitFirstStripUrl = True
                break
@ -199,6 +202,17 @@ class Scraper(object):
                break
            url = prevUrl

+    def isfirststrip(self, url):
+        """Check if the specified URL is the first strip of a comic. This is
+        specially for comics taken from archive.org, since the base URL of
+        archive.org changes whenever pages are taken from a different
+        snapshot."""
+        if not self.firstStripUrl:
+            return False
+        firsturl = ARCHIVE_ORG_URL.sub('', self.firstStripUrl)
+        currenturl = ARCHIVE_ORG_URL.sub('', url)
+        return firsturl == currenturl
+
    def getPrevUrl(self, url, data):
        """Find previous URL."""
        prevUrl = None