Remove some comics which disappeared

This commit is contained in:
Tobias Gruetzmacher 2020-01-09 18:04:09 +01:00
parent ff3b7f2cbe
commit 3c930167c6
11 changed files with 21 additions and 155 deletions

View file

@ -10,7 +10,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import bounceStarter, indirectStarter
from ..util import tagre from ..util import tagre
from .common import _TumblrScraper, _WordPressScraper, _WPNavi from .common import _WordPressScraper, _WPNavi
class CampComic(_BasicScraper): class CampComic(_BasicScraper):
@ -210,19 +210,6 @@ class ChainsawSuit(_WordPressScraper):
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class Champ2010(_BasicScraper):
baseUrl = 'http://jedcollins.com/champ2010/'
rurl = escape(baseUrl)
# the latest URL is hard coded since the comic is discontinued
url = baseUrl + 'champ-12-30-10.html'
stripUrl = baseUrl + '%s.html'
firstStripUrl = stripUrl % 'champ1-1-10-fuck'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="Previous"))
help = 'Index format: yy-dd-mm'
class ChannelAte(_WPNavi): class ChannelAte(_WPNavi):
url = 'http://www.channelate.com/' url = 'http://www.channelate.com/'
@ -294,15 +281,6 @@ class Cloudscratcher(_ParserScraper):
starter = indirectStarter starter = indirectStarter
class Collar6(_TumblrScraper):
url = 'http://collar6.tumblr.com/'
firstStripUrl = url + 'post/138117470810/the-very-first-strip-from-when-i-thought-it-was'
imageSearch = '//figure[@class="photo-hires-item"]//img'
prevSearch = '//a[@class="previous-button"]'
latestSearch = '//li[@class="timestamp"]/a'
adult = True
class CollegeCatastrophe(_ParserScraper): class CollegeCatastrophe(_ParserScraper):
url = 'https://www.tigerknight.com/cc' url = 'https://www.tigerknight.com/cc'
stripUrl = url + '/%s' stripUrl = url + '/%s'

View file

@ -13,21 +13,6 @@ from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
class DamnLol(_ParserScraper):
url = 'http://www.damnlol.com/'
# Classes for next and previous seem to be swapped...
prevSearch = '//a[%s]' % xpath_class("next")
nextSearch = '//a[%s]' % xpath_class("previous")
imageSearch = '//img[@id="post-image"]'
starter = bounceStarter
def namer(self, image_url, page_url):
ext = image_url.rsplit('.', 1)[1]
path = page_url.rsplit('/', 1)[1][:-5]
stripname, number = path.rsplit('-', 1)
return '%s-%s.%s' % (number, stripname, ext)
class Damonk(_BasicScraper): class Damonk(_BasicScraper):
url = 'http://www.damonk.com/' url = 'http://www.damonk.com/'
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'

View file

@ -98,17 +98,6 @@ class GirlsWithSlingshots(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class GlassHalfEmpty(_BasicScraper):
url = 'http://www.defectivity.com/ghe/index.php'
stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(
tagre("a", "href", r'(\?strip_id=\d+)') +
tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
help = 'Index format: nnn'
class GleefulNihilism(_WordPressScraper): class GleefulNihilism(_WordPressScraper):
url = ('https://web.archive.org/web/20170911203122/' url = ('https://web.archive.org/web/20170911203122/'
'http://gleefulnihilism.com/') 'http://gleefulnihilism.com/')

View file

@ -1,13 +1,13 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher # Copyright (C) 2015-2020 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
from ..helpers import indirectStarter, xpath_class from ..helpers import indirectStarter, xpath_class
from .common import _ComicControlScraper from .common import _ComicControlScraper
@ -23,15 +23,6 @@ class JackCannon(_BasicScraper):
help = 'Index format: yyyy/mm/dd/page-nnn' help = 'Index format: yyyy/mm/dd/page-nnn'
class JerkCity(_ParserScraper):
url = 'http://www.jerkcity.com/'
stripUrl = url + 'jerkcity%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@id="rapist"]//img'
prevSearch = '//div[@id="rapist"]/a'
help = 'Index format: n'
class JimBenton(_BasicScraper): class JimBenton(_BasicScraper):
url = 'http://www.jimbenton.com/page14/page14.html' url = 'http://www.jimbenton.com/page14/page14.html'
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html' stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher # Copyright (C) 2015-2020 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
@ -77,7 +77,6 @@ class KeenSpot(_ParserScraper):
cls('InHere', 'inhere'), cls('InHere', 'inhere'),
cls('JadeWarriors', 'jadewarriors'), cls('JadeWarriors', 'jadewarriors'),
cls('Katrina', 'katrina'), cls('Katrina', 'katrina'),
cls('Landis', 'landis'),
cls('LutherStrode', 'lutherstrode'), cls('LutherStrode', 'lutherstrode'),
cls('MakeshiftMiracle', 'makeshiftmiracle'), cls('MakeshiftMiracle', 'makeshiftmiracle'),
cls('Marksmen', 'marksmen'), cls('Marksmen', 'marksmen'),

View file

@ -106,24 +106,6 @@ class OnTheEdge(_WordPressScraper):
firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/' firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'
class OnTheFastrack(_BasicScraper):
url = 'http://onthefastrack.com/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % 'november-13-2000'
imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
help = 'Index format: monthname-dd-yyyy'
def namer(self, image_url, page_url):
name = page_url.rsplit('/', 3)[2]
if name == "onthefastrack.com":
import datetime
name = datetime.date.today().strftime("%B-%d-%Y")
# name.title ensures that the comics are named the same
# as in the previous scraper
return "%s.gif" % name.title()
class OopsComicAdventure(_WordPressScraper): class OopsComicAdventure(_WordPressScraper):
url = ('https://web.archive.org/web/20190102215141/' url = ('https://web.archive.org/web/20190102215141/'
'http://oopscomicadventure.com/') 'http://oopscomicadventure.com/')

View file

@ -18,6 +18,7 @@ class Removed(Scraper):
'brk': 'Comic navigation is broken.', 'brk': 'Comic navigation is broken.',
'mov': 'Comic moved to a new hoster and no new module was written.', 'mov': 'Comic moved to a new hoster and no new module was written.',
'mis': 'Pages are missing from the comic.', 'mis': 'Pages are missing from the comic.',
'acc': 'Account is needed to access site.',
} }
def __init__(self, name, reason='del'): def __init__(self, name, reason='del'):
@ -51,7 +52,10 @@ class Removed(Scraper):
cls('BrightlyWound'), cls('BrightlyWound'),
cls('Caggage'), cls('Caggage'),
cls('Carciphona', 'jsh'), cls('Carciphona', 'jsh'),
cls('Champ2010'),
cls('CheckerboardNightmare'), cls('CheckerboardNightmare'),
# Patreon & Pixiv (https://www.patreon.com/Collar6)
cls('Collar6', 'mov'),
cls('ComicFury/30years'), cls('ComicFury/30years'),
cls('ComicFury/AAB'), cls('ComicFury/AAB'),
cls('ComicFury/AdventuresofMaggie'), cls('ComicFury/AdventuresofMaggie'),
@ -207,6 +211,7 @@ class Removed(Scraper):
cls('CtrlAltDel', 'block'), cls('CtrlAltDel', 'block'),
cls('CtrlAltDel/Sillies', 'block'), cls('CtrlAltDel/Sillies', 'block'),
cls('DailyDose'), cls('DailyDose'),
cls('DamnLol'),
cls('DeathToTheExtremist'), cls('DeathToTheExtremist'),
cls('DoctorCat', 'brk'), cls('DoctorCat', 'brk'),
cls('DungeonsAndDenizens'), cls('DungeonsAndDenizens'),
@ -217,6 +222,7 @@ class Removed(Scraper):
cls('FeyWinds'), cls('FeyWinds'),
cls('FilibusterCartoons'), cls('FilibusterCartoons'),
cls('FowlLanguage', 'block'), cls('FowlLanguage', 'block'),
cls('GlassHalfEmpty'),
cls('GoComics/ABootsAndPupComic'), cls('GoComics/ABootsAndPupComic'),
cls('GoComics/AdventuresofDaisy'), cls('GoComics/AdventuresofDaisy'),
cls('GoComics/AdventuresofMartyandTurkey'), cls('GoComics/AdventuresofMartyandTurkey'),
@ -407,8 +413,10 @@ class Removed(Scraper):
cls('GoComics/ZacharyNixonJohnson'), cls('GoComics/ZacharyNixonJohnson'),
cls('GunnerkrigCourt'), cls('GunnerkrigCourt'),
cls('HorribleVille'), cls('HorribleVille'),
cls('JerkCity'),
cls('KatzenfutterGeleespritzer'), cls('KatzenfutterGeleespritzer'),
cls('KeenSpot/Adventurers', 'mov'), cls('KeenSpot/Adventurers', 'mov'),
cls('KeenSpot/Landis'),
cls('Key'), cls('Key'),
cls('KillerKomics'), cls('KillerKomics'),
cls('Kukuburi'), cls('Kukuburi'),
@ -420,7 +428,6 @@ class Removed(Scraper):
cls('Nnewts'), cls('Nnewts'),
cls('OddFish'), cls('OddFish'),
cls('OneQuestion'), cls('OneQuestion'),
cls('OnTheFasttrack'),
cls('OrnerBoy'), cls('OrnerBoy'),
cls('PensAndTales/Evilish'), cls('PensAndTales/Evilish'),
cls('PensAndTales/FireflyCross'), cls('PensAndTales/FireflyCross'),
@ -431,7 +438,10 @@ class Removed(Scraper):
cls('Pimpette'), cls('Pimpette'),
cls('PunksAndNerds', 'mis'), cls('PunksAndNerds', 'mis'),
cls('PunksAndNerdsOld'), cls('PunksAndNerdsOld'),
# Moved to tapas.io, which blocks us
cls('RadioactivePanda', 'block'),
cls('RedsPlanet'), cls('RedsPlanet'),
cls('RedString'),
cls('SmackJeeves/Aarrevaara'), cls('SmackJeeves/Aarrevaara'),
cls('SmackJeeves/AchievementStuck'), cls('SmackJeeves/AchievementStuck'),
cls('SmackJeeves/AGirlAndHerShadow'), cls('SmackJeeves/AGirlAndHerShadow'),
@ -581,6 +591,7 @@ class Removed(Scraper):
cls('SuburbanTribe'), cls('SuburbanTribe'),
cls('TheOuterQuarter'), cls('TheOuterQuarter'),
cls('TheParkingLotIsFull'), cls('TheParkingLotIsFull'),
cls('TheThinHLine', 'acc'),
cls('ThunderAndLightning'), cls('ThunderAndLightning'),
cls('TinyKittenTeeth'), cls('TinyKittenTeeth'),
cls('TwoTwoOneFour'), cls('TwoTwoOneFour'),
@ -592,6 +603,7 @@ class Removed(Scraper):
cls('WebcomicsNation/AgnesQuill'), cls('WebcomicsNation/AgnesQuill'),
cls('WebcomicsNation/MyMuse'), cls('WebcomicsNation/MyMuse'),
cls('WebcomicsNation/NekkoAndJoruba'), cls('WebcomicsNation/NekkoAndJoruba'),
cls('WeCanSleepTomorrow'),
cls('WhiteNinja'), cls('WhiteNinja'),
cls('WLP/ShadowChasers'), cls('WLP/ShadowChasers'),
cls('WotNow'), cls('WotNow'),
@ -712,8 +724,10 @@ class Renamed(Scraper):
cls('KeenSpot/Newshounds', 'Newshounds'), cls('KeenSpot/Newshounds', 'Newshounds'),
cls('KeenSpot/SinFest', 'SinFest'), cls('KeenSpot/SinFest', 'SinFest'),
cls('KeenSpot/TheGodChild', 'GodChild'), cls('KeenSpot/TheGodChild', 'GodChild'),
cls('OnTheFasttrack', 'ComicsKingdom/OnTheFastrack'),
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'), cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'), cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
cls('ShermansLagoon', 'ComicsKingdom/ShermansLagoon'),
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'), cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'), cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
cls('SmackJeeves/ForestHill', 'ForestHill'), cls('SmackJeeves/ForestHill', 'ForestHill'),

View file

@ -14,14 +14,6 @@ from ..util import tagre
from .common import _WordPressScraper from .common import _WordPressScraper
class RadioactivePanda(_BasicScraper):
url = 'http://www.radioactivepanda.com/'
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(/Assets/.*?)".+?"comicimg"')
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
help = 'Index format: n (no padding)'
class RalfTheDestroyer(_WordPressScraper): class RalfTheDestroyer(_WordPressScraper):
url = 'http://ralfthedestroyer.com/' url = 'http://ralfthedestroyer.com/'
@ -75,15 +67,6 @@ class RedMeat(_ParserScraper):
return '_'.join(parts[1:3]) return '_'.join(parts[1:3])
class RedString(_BasicScraper):
url = 'http://www.redstring.strawberrycomics.com/'
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '434'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
help = 'Index format: nnn'
class Replay(_ParserScraper): class Replay(_ParserScraper):
url = 'http://replaycomic.com/' url = 'http://replaycomic.com/'
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'

View file

@ -7,7 +7,6 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE, sub from re import compile, escape, IGNORECASE, sub
from os.path import splitext from os.path import splitext
import datetime
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
@ -198,24 +197,6 @@ class Sheldon(_BasicScraper):
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
class ShermansLagoon(_BasicScraper):
url = 'http://shermanslagoon.com/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % '/december-29-2003/'
imageSearch = compile(tagre("img", "src",
r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+?)'))
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
help = 'Index format: monthname-day-year'
def namer(self, image_url, page_url):
name = page_url.rsplit('/', 3)[2]
if name == "shermanslagoon.com":
name = datetime.date.today().strftime("%B-%d-%Y").lower()
# name is monthname-day-year
month, day, year = name.split('-')
return "%s-%s-%s" % (year, month, day)
class ShipInABottle(_WPNavi): class ShipInABottle(_WPNavi):
url = 'http://shipinbottle.pepsaga.com/' url = 'http://shipinbottle.pepsaga.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
@ -248,12 +229,6 @@ class SinFest(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class Sithrah(_ParserScraper):
url = 'http://sithrah.com/'
imageSearch = '//div[@class="webcomic-image"]/img'
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
class SixPackOfOtters(_ParserScraper): class SixPackOfOtters(_ParserScraper):
url = 'http://sixpackofotters.com/' url = 'http://sixpackofotters.com/'
stripUrl = url + 'pages/%s/' stripUrl = url + 'pages/%s/'

View file

@ -14,7 +14,7 @@ except ImportError:
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class from ..helpers import indirectStarter, xpath_class
from ..util import tagre from ..util import tagre
from .common import _ComicControlScraper, _TumblrScraper, _WordPressScraper, _WPNavi, _WPNaviIn from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn
class TailsAndTactics(_ParserScraper): class TailsAndTactics(_ParserScraper):
@ -182,23 +182,6 @@ class TheRockCocks(_BasicScraper):
adult = True adult = True
class TheThinHLine(_TumblrScraper):
url = 'http://thinhline.tumblr.com/'
firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
imageSearch = '//img[@id="content-image"]/@data-src'
prevSearch = '//div[@id="pagination"]/a[text()=">"]'
latestSearch = '//a[@class="timestamp"]'
adult = True
indirectImageSearch = '//div[@id="post"]//a[not(@rel) and img]'
def getComicStrip(self, url, data):
"""The comic strip image is in a separate page."""
subPage = self.fetchUrl(url, data, self.indirectImageSearch)
pageData = self.getPage(subPage)
return super(TheThinHLine, self).getComicStrip(subPage, pageData)
class TheWhiteboard(_ParserScraper): class TheWhiteboard(_ParserScraper):
BROKEN_PAGE_MIDDLE = compile(r'</body></html>\n<') BROKEN_PAGE_MIDDLE = compile(r'</body></html>\n<')
url = 'http://www.the-whiteboard.com/' url = 'http://www.the-whiteboard.com/'

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2019 Tobias Gruetzmacher # Copyright (C) 2015-2020 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
@ -48,15 +48,6 @@ class WebDesignerCOTW(_ParserScraper):
return "%s-%s" % (week, imagename) return "%s-%s" % (week, imagename)
class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Weregeek(_ParserScraper): class Weregeek(_ParserScraper):
url = 'http://www.weregeek.com/' url = 'http://www.weregeek.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -209,10 +200,6 @@ class WorldOfMrToast(_BasicScraper):
return None return None
class WorldOfWarcraftEh(_WordPressScraper):
url = 'http://woweh.com/'
class WormWorldSaga(_BasicScraper): class WormWorldSaga(_BasicScraper):
url = 'http://www.wormworldsaga.com/' url = 'http://www.wormworldsaga.com/'
stripUrl = url + 'chapters/%s/index.php' stripUrl = url + 'chapters/%s/index.php'