Remove some comics which disappeared

2020-01-09 18:04:09 +01:00 · 2020-01-09 18:04:09 +01:00 · 3c930167c6
commit 3c930167c6
parent ff3b7f2cbe
11 changed files with 21 additions and 155 deletions
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -10,7 +10,7 @@ from re import compile, escape
 from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import bounceStarter, indirectStarter
 from ..util import tagre
-from .common import _TumblrScraper, _WordPressScraper, _WPNavi
+from .common import _WordPressScraper, _WPNavi


 class CampComic(_BasicScraper):
@ -210,19 +210,6 @@ class ChainsawSuit(_WordPressScraper):
    help = 'Index format: yyyy/mm/dd/stripname'


-class Champ2010(_BasicScraper):
-    baseUrl = 'http://jedcollins.com/champ2010/'
-    rurl = escape(baseUrl)
-    # the latest URL is hard coded since the comic is discontinued
-    url = baseUrl + 'champ-12-30-10.html'
-    stripUrl = baseUrl + '%s.html'
-    firstStripUrl = stripUrl % 'champ1-1-10-fuck'
-    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
-                               after="Previous"))
-    help = 'Index format: yy-dd-mm'
-
-
 class ChannelAte(_WPNavi):
    url = 'http://www.channelate.com/'

@ -294,15 +281,6 @@ class Cloudscratcher(_ParserScraper):
    starter = indirectStarter


-class Collar6(_TumblrScraper):
-    url = 'http://collar6.tumblr.com/'
-    firstStripUrl = url + 'post/138117470810/the-very-first-strip-from-when-i-thought-it-was'
-    imageSearch = '//figure[@class="photo-hires-item"]//img'
-    prevSearch = '//a[@class="previous-button"]'
-    latestSearch = '//li[@class="timestamp"]/a'
-    adult = True
-
-
 class CollegeCatastrophe(_ParserScraper):
    url = 'https://www.tigerknight.com/cc'
    stripUrl = url + '/%s'
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -13,21 +13,6 @@ from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn


-class DamnLol(_ParserScraper):
-    url = 'http://www.damnlol.com/'
-    # Classes for next and previous seem to be swapped...
-    prevSearch = '//a[%s]' % xpath_class("next")
-    nextSearch = '//a[%s]' % xpath_class("previous")
-    imageSearch = '//img[@id="post-image"]'
-    starter = bounceStarter
-
-    def namer(self, image_url, page_url):
-        ext = image_url.rsplit('.', 1)[1]
-        path = page_url.rsplit('/', 1)[1][:-5]
-        stripname, number = path.rsplit('-', 1)
-        return '%s-%s.%s' % (number, stripname, ext)
-
-
 class Damonk(_BasicScraper):
    url = 'http://www.damonk.com/'
    stripUrl = url + 'd/%s.html'
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -98,17 +98,6 @@ class GirlsWithSlingshots(_BasicScraper):
    help = 'Index format: stripname'


-class GlassHalfEmpty(_BasicScraper):
-    url = 'http://www.defectivity.com/ghe/index.php'
-    stripUrl = url + '?strip_id=%s'
-    firstStripUrl = stripUrl % '0'
-    imageSearch = compile(r'src="(comics/.+?)"')
-    prevSearch = compile(
-        tagre("a", "href", r'(\?strip_id=\d+)') +
-        tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
-    help = 'Index format: nnn'
-
-
 class GleefulNihilism(_WordPressScraper):
    url = ('https://web.archive.org/web/20170911203122/'
        'http://gleefulnihilism.com/')
--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

 from re import compile, escape

-from ..scraper import _BasicScraper, _ParserScraper
+from ..scraper import _BasicScraper
 from ..util import tagre
 from ..helpers import indirectStarter, xpath_class
 from .common import _ComicControlScraper
@ -23,15 +23,6 @@ class JackCannon(_BasicScraper):
    help = 'Index format: yyyy/mm/dd/page-nnn'


-class JerkCity(_ParserScraper):
-    url = 'http://www.jerkcity.com/'
-    stripUrl = url + 'jerkcity%s.html'
-    firstStripUrl = stripUrl % '1'
-    imageSearch = '//div[@id="rapist"]//img'
-    prevSearch = '//div[@id="rapist"]/a'
-    help = 'Index format: n'
-
-
 class JimBenton(_BasicScraper):
    url = 'http://www.jimbenton.com/page14/page14.html'
    stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
--- a/dosagelib/plugins/keenspot.py
+++ b/dosagelib/plugins/keenspot.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -77,7 +77,6 @@ class KeenSpot(_ParserScraper):
            cls('InHere', 'inhere'),
            cls('JadeWarriors', 'jadewarriors'),
            cls('Katrina', 'katrina'),
-            cls('Landis', 'landis'),
            cls('LutherStrode', 'lutherstrode'),
            cls('MakeshiftMiracle', 'makeshiftmiracle'),
            cls('Marksmen', 'marksmen'),
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -106,24 +106,6 @@ class OnTheEdge(_WordPressScraper):
    firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'


-class OnTheFastrack(_BasicScraper):
-    url = 'http://onthefastrack.com/'
-    stripUrl = url + 'comics/%s'
-    firstStripUrl = stripUrl % 'november-13-2000'
-    imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
-    prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
-    help = 'Index format: monthname-dd-yyyy'
-
-    def namer(self, image_url, page_url):
-        name = page_url.rsplit('/', 3)[2]
-        if name == "onthefastrack.com":
-            import datetime
-            name = datetime.date.today().strftime("%B-%d-%Y")
-        # name.title ensures that the comics are named the same
-        # as in the previous scraper
-        return "%s.gif" % name.title()
-
-
 class OopsComicAdventure(_WordPressScraper):
    url = ('https://web.archive.org/web/20190102215141/'
        'http://oopscomicadventure.com/')
--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -18,6 +18,7 @@ class Removed(Scraper):
        'brk': 'Comic navigation is broken.',
        'mov': 'Comic moved to a new hoster and no new module was written.',
        'mis': 'Pages are missing from the comic.',
+        'acc': 'Account is needed to access site.',
    }

    def __init__(self, name, reason='del'):
@ -51,7 +52,10 @@ class Removed(Scraper):
            cls('BrightlyWound'),
            cls('Caggage'),
            cls('Carciphona', 'jsh'),
+            cls('Champ2010'),
            cls('CheckerboardNightmare'),
+            # Patreon & Pixiv (https://www.patreon.com/Collar6)
+            cls('Collar6', 'mov'),
            cls('ComicFury/30years'),
            cls('ComicFury/AAB'),
            cls('ComicFury/AdventuresofMaggie'),
@ -207,6 +211,7 @@ class Removed(Scraper):
            cls('CtrlAltDel', 'block'),
            cls('CtrlAltDel/Sillies', 'block'),
            cls('DailyDose'),
+            cls('DamnLol'),
            cls('DeathToTheExtremist'),
            cls('DoctorCat', 'brk'),
            cls('DungeonsAndDenizens'),
@ -217,6 +222,7 @@ class Removed(Scraper):
            cls('FeyWinds'),
            cls('FilibusterCartoons'),
            cls('FowlLanguage', 'block'),
+            cls('GlassHalfEmpty'),
            cls('GoComics/ABootsAndPupComic'),
            cls('GoComics/AdventuresofDaisy'),
            cls('GoComics/AdventuresofMartyandTurkey'),
@ -407,8 +413,10 @@ class Removed(Scraper):
            cls('GoComics/ZacharyNixonJohnson'),
            cls('GunnerkrigCourt'),
            cls('HorribleVille'),
+            cls('JerkCity'),
            cls('KatzenfutterGeleespritzer'),
            cls('KeenSpot/Adventurers', 'mov'),
+            cls('KeenSpot/Landis'),
            cls('Key'),
            cls('KillerKomics'),
            cls('Kukuburi'),
@ -420,7 +428,6 @@ class Removed(Scraper):
            cls('Nnewts'),
            cls('OddFish'),
            cls('OneQuestion'),
-            cls('OnTheFasttrack'),
            cls('OrnerBoy'),
            cls('PensAndTales/Evilish'),
            cls('PensAndTales/FireflyCross'),
@ -431,7 +438,10 @@ class Removed(Scraper):
            cls('Pimpette'),
            cls('PunksAndNerds', 'mis'),
            cls('PunksAndNerdsOld'),
+            # Moved to tapas.io, which blocks us
+            cls('RadioactivePanda', 'block'),
            cls('RedsPlanet'),
+            cls('RedString'),
            cls('SmackJeeves/Aarrevaara'),
            cls('SmackJeeves/AchievementStuck'),
            cls('SmackJeeves/AGirlAndHerShadow'),
@ -581,6 +591,7 @@ class Removed(Scraper):
            cls('SuburbanTribe'),
            cls('TheOuterQuarter'),
            cls('TheParkingLotIsFull'),
+            cls('TheThinHLine', 'acc'),
            cls('ThunderAndLightning'),
            cls('TinyKittenTeeth'),
            cls('TwoTwoOneFour'),
@ -592,6 +603,7 @@ class Removed(Scraper):
            cls('WebcomicsNation/AgnesQuill'),
            cls('WebcomicsNation/MyMuse'),
            cls('WebcomicsNation/NekkoAndJoruba'),
+            cls('WeCanSleepTomorrow'),
            cls('WhiteNinja'),
            cls('WLP/ShadowChasers'),
            cls('WotNow'),
@ -712,8 +724,10 @@ class Renamed(Scraper):
            cls('KeenSpot/Newshounds', 'Newshounds'),
            cls('KeenSpot/SinFest', 'SinFest'),
            cls('KeenSpot/TheGodChild', 'GodChild'),
+            cls('OnTheFasttrack', 'ComicsKingdom/OnTheFastrack'),
            cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
            cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
+            cls('ShermansLagoon', 'ComicsKingdom/ShermansLagoon'),
            cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
            cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
            cls('SmackJeeves/ForestHill', 'ForestHill'),
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -14,14 +14,6 @@ from ..util import tagre
 from .common import _WordPressScraper


-class RadioactivePanda(_BasicScraper):
-    url = 'http://www.radioactivepanda.com/'
-    stripUrl = url + 'comic/%s'
-    imageSearch = compile(r'<img src="(/Assets/.*?)".+?"comicimg"')
-    prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
-    help = 'Index format: n (no padding)'
-
-
 class RalfTheDestroyer(_WordPressScraper):
    url = 'http://ralfthedestroyer.com/'

@ -75,15 +67,6 @@ class RedMeat(_ParserScraper):
        return '_'.join(parts[1:3])


-class RedString(_BasicScraper):
-    url = 'http://www.redstring.strawberrycomics.com/'
-    stripUrl = url + 'index.php?id=%s'
-    firstStripUrl = stripUrl % '434'
-    imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
-    help = 'Index format: nnn'
-
-
 class Replay(_ParserScraper):
    url = 'http://replaycomic.com/'
    stripUrl = url + 'comic/%s/'
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -7,7 +7,6 @@ from __future__ import absolute_import, division, print_function

 from re import compile, escape, IGNORECASE, sub
 from os.path import splitext
-import datetime

 from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
@ -198,24 +197,6 @@ class Sheldon(_BasicScraper):
    help = 'Index format: yymmdd'


-class ShermansLagoon(_BasicScraper):
-    url = 'http://shermanslagoon.com/'
-    stripUrl = url + 'comics/%s'
-    firstStripUrl = stripUrl % '/december-29-2003/'
-    imageSearch = compile(tagre("img", "src",
-        r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+?)'))
-    prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
-    help = 'Index format: monthname-day-year'
-
-    def namer(self, image_url, page_url):
-        name = page_url.rsplit('/', 3)[2]
-        if name == "shermanslagoon.com":
-            name = datetime.date.today().strftime("%B-%d-%Y").lower()
-        # name is monthname-day-year
-        month, day, year = name.split('-')
-        return "%s-%s-%s" % (year, month, day)
-
-
 class ShipInABottle(_WPNavi):
    url = 'http://shipinbottle.pepsaga.com/'
    stripUrl = url + '?p=%s'
@ -248,12 +229,6 @@ class SinFest(_BasicScraper):
    help = 'Index format: yyyy-mm-dd'


-class Sithrah(_ParserScraper):
-    url = 'http://sithrah.com/'
-    imageSearch = '//div[@class="webcomic-image"]/img'
-    prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
-
-
 class SixPackOfOtters(_ParserScraper):
    url = 'http://sixpackofotters.com/'
    stripUrl = url + 'pages/%s/'
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -14,7 +14,7 @@ except ImportError:
 from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import indirectStarter, xpath_class
 from ..util import tagre
-from .common import _ComicControlScraper, _TumblrScraper, _WordPressScraper, _WPNavi, _WPNaviIn
+from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn


 class TailsAndTactics(_ParserScraper):
@ -182,23 +182,6 @@ class TheRockCocks(_BasicScraper):
    adult = True


-class TheThinHLine(_TumblrScraper):
-    url = 'http://thinhline.tumblr.com/'
-    firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
-    imageSearch = '//img[@id="content-image"]/@data-src'
-    prevSearch = '//div[@id="pagination"]/a[text()=">"]'
-    latestSearch = '//a[@class="timestamp"]'
-    adult = True
-
-    indirectImageSearch = '//div[@id="post"]//a[not(@rel) and img]'
-
-    def getComicStrip(self, url, data):
-        """The comic strip image is in a separate page."""
-        subPage = self.fetchUrl(url, data, self.indirectImageSearch)
-        pageData = self.getPage(subPage)
-        return super(TheThinHLine, self).getComicStrip(subPage, pageData)
-
-
 class TheWhiteboard(_ParserScraper):
    BROKEN_PAGE_MIDDLE = compile(r'</body></html>\n<')
    url = 'http://www.the-whiteboard.com/'
--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -48,15 +48,6 @@ class WebDesignerCOTW(_ParserScraper):
        return "%s-%s" % (week, imagename)


-class WeCanSleepTomorrow(_BasicScraper):
-    url = 'http://wecansleeptomorrow.com/'
-    rurl = escape(url)
-    stripUrl = url + '%s/'
-    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
-    help = 'Index format: yyyy/mm/dd/stripname'
-
-
 class Weregeek(_ParserScraper):
    url = 'http://www.weregeek.com/'
    stripUrl = url + '%s/'
@ -209,10 +200,6 @@ class WorldOfMrToast(_BasicScraper):
            return None


-class WorldOfWarcraftEh(_WordPressScraper):
-    url = 'http://woweh.com/'
-
-
 class WormWorldSaga(_BasicScraper):
    url = 'http://www.wormworldsaga.com/'
    stripUrl = url + 'chapters/%s/index.php'