diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index f4f060359..d334cec0a 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2018 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -213,14 +213,12 @@ class AlienShores(_WordPressScraper): firstStripUrl = url + 'AScomic/updated-cover/' -class AllTheGrowingThings(_BasicScraper): - url = 'http://growingthings.typodmary.com/' - rurl = escape(url) +class AllTheGrowingThings(_WordPressScraper): + url = ('https://web.archive.org/web/20160611212229/' + 'http://growingthings.typodmary.com/') stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things' - imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/strip-name' + firstStripUrl = stripUrl % 'all-the-growing-things' + endOfLife = True class AlphaLuna(_ParserScraper): @@ -329,11 +327,14 @@ class Angels2200(_BasicScraper): class Annyseed(_ParserScraper): - baseUrl = 'http://www.mirrorwoodcomics.com/' - url = baseUrl + 'AnnyseedLatest.htm' + baseUrl = ('https://web.archive.org/web/20190511031451/' + 'http://www.mirrorwoodcomics.com/') stripUrl = baseUrl + 'Annyseed%s.htm' + url = stripUrl % 'Latest' + firstStripUrl = stripUrl % '000' imageSearch = '//div/img[contains(@src, "Annyseed")]' prevSearch = '//a[img[@name="Previousbtn"]]' + endOfLife = True help = 'Index format: nnn' FIX_RE = compile(r'Annyseed/Finished%20For%20Print/') diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 35f98d387..88b35b705 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -156,12 +156,13 @@ class BiggerThanCheeses(_BasicScraper): class BillyTheDunce(_ParserScraper): - url = 'http://www.duncepress.com/' - firstStripUrl = url + '2009/06/an-introduction-of-sorts' + stripUrl = ('https://web.archive.org/web/20180404142544/' + 'http://www.duncepress.com/%s/') + url = stripUrl % '2012/02/losing-more' + firstStripUrl = stripUrl % '2009/06/an-introduction-of-sorts' imageSearch = '//div[@class="entry"]/p[1]/a' prevSearch = '//a[@rel="prev"]' - latestSearch = '//h2[@class="post-title"]/a' - starter = indirectStarter + endOfLife = True class BittersweetCandyBowl(_ParserScraper): diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index 4a23685d9..7431657c4 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -125,8 +125,8 @@ class CatenaCafe(_WordPressScraper): class CatenaManor(_ParserScraper): - # Retrieve comic from the Internet Archive - baseUrl = 'https://web.archive.org/web/20141027141116/http://catenamanor.com/' + baseUrl = ('https://web.archive.org/web/20141027141116/' + 'http://catenamanor.com/') url = baseUrl + 'archives' stripUrl = baseUrl + '%s/' firstStripUrl = stripUrl % '2003/07' diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index fe48717e9..3fe596d2f 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -139,7 +139,8 @@ class DemolitionSquad(_ParserScraper): class DerTodUndDasMaedchen(_ParserScraper): - url = 'http://www.cartoontomb.de/deutsch/tod2.php' + url = ('https://web.archive.org/web/20180106180134/' + 'http://www.cartoontomb.de/deutsch/tod2.php') stripUrl = url + '?bild=%s.jpg' firstStripUrl = stripUrl % '00_01_01' imageSearch = '//img[contains(@src, "images/tod/teil2")]' @@ -305,16 +306,17 @@ class DresdenCodak(_ParserScraper): return not data.xpath(self.imageSearch) -class DrFun(_BasicScraper): - baseUrl = 'http://www.ibiblio.org/Dave/' - url = baseUrl + 'ar00502.htm' +class DrFun(_ParserScraper): + baseUrl = ('https://web.archive.org/web/20180726145737/' + 'http://www.ibiblio.org/Dave/') stripUrl = baseUrl + 'ar%s.htm' + url = stripUrl % '00502' firstStripUrl = stripUrl % '00001' - imageSearch = compile(tagre("a", "href", r'(Dr-Fun/df\d+/df[^"]+)')) + imageSearch = '//a[contains(@href, "Dr-Fun/df")]' multipleImagesPerStrip = True - prevSearch = compile(tagre("a", "href", r'([^"]+)') + 'Previous Week,') - help = 'Index format: nnnnn' + prevSearch = '//a[contains(text(), "Previous Week")]' endOfLife = True + help = 'Index format: nnnnn' class Drive(_BasicScraper): diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index 16cc27985..e7b85480c 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -181,10 +181,11 @@ class EverybodyLovesEricRaymond(_ParserScraper): prevSearch = '//a[@rel="prev"]' -# Seems to be GeoBlocked from Germany? class EvilDiva(_WordPressScraper): - url = 'http://www.evildivacomics.com/' + url = ('https://web.archive.org/web/20190221223751/' + 'https://www.evildivacomics.com/') firstStripUrl = url + 'comic/evil-diva-issue-1-cover/' + endOfLife = True class EvilInc(_WordPressScraper): diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index 119e9427f..adc4ea069 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function -from re import compile, escape, IGNORECASE +from re import compile, escape from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper @@ -27,14 +27,15 @@ class Faneurysm(_WPNaviIn): endOfLife = True -class FantasyRealms(_BasicScraper): - url = 'http://www.fantasyrealmsonline.com/' - stripUrl = url + 'manga/%s.php' - imageSearch = compile(r'« Previous') + imageSearch = '//div[@id="comic"]/img' + prevSearch = '//a[contains(text(), "Previous")]' + endOfLife = True help = 'Index format: nnn' @@ -116,15 +109,12 @@ class GlassHalfEmpty(_BasicScraper): help = 'Index format: nnn' -class GleefulNihilism(_BasicScraper): - url = 'http://gleefulnihilism.com/' - rurl = escape(url) +class GleefulNihilism(_WordPressScraper): + url = ('https://web.archive.org/web/20170911203122/' + 'http://gleefulnihilism.com/') stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % 'amoeba' - imageSearch = compile( - tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)) - prevSearch = compile( - tagre("a", "href", r'(%scomic/[^"]+)' % rurl) + '‹') + endOfLife = True help = 'Index format: stripname' diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py index 10b2a16e3..93fa3ca29 100644 --- a/dosagelib/plugins/l.py +++ b/dosagelib/plugins/l.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -68,9 +68,11 @@ class LetsSpeakEnglish(_ComicControlScraper): class LifeAintNoPonyFarm(_WordPressScraper): - url = 'http://sarahburrini.com/en/' + url = ('https://web.archive.org/web/20181221154155/' + 'http://sarahburrini.com/en/') firstStripUrl = url + 'comic/my-first-webcomic/' multipleImagesPerStrip = True + endOfLife = True class LilithsWord(_ComicControlScraper): diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index d75360d89..a9794948f 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2019 Tobias Gruetzmacher +# Copyright (C) 2015-2020 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper -from ..helpers import indirectStarter +from ..helpers import indirectStarter, xpath_class from ..util import tagre from .common import _ComicControlScraper, _WordPressScraper, _WPNavi @@ -88,12 +88,13 @@ class Newshounds(_ParserScraper): return super().getPrevUrl(url, data) -class NewWorld(_BasicScraper): - url = 'http://www.tfsnewworld.com/' +class NewWorld(_WordPressScraper): + url = ('https://web.archive.org/web/20190718012133/' + 'http://www.tfsnewworld.com/') stripUrl = url + '%s/' firstStripUrl = stripUrl % '2007/08/30/63' - imageSearch = compile(r'