Remove some comics which disappeared
This commit is contained in:
parent
ff3b7f2cbe
commit
3c930167c6
11 changed files with 21 additions and 155 deletions
|
@ -10,7 +10,7 @@ from re import compile, escape
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import tagre
|
||||
from .common import _TumblrScraper, _WordPressScraper, _WPNavi
|
||||
from .common import _WordPressScraper, _WPNavi
|
||||
|
||||
|
||||
class CampComic(_BasicScraper):
|
||||
|
@ -210,19 +210,6 @@ class ChainsawSuit(_WordPressScraper):
|
|||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class Champ2010(_BasicScraper):
|
||||
baseUrl = 'http://jedcollins.com/champ2010/'
|
||||
rurl = escape(baseUrl)
|
||||
# the latest URL is hard coded since the comic is discontinued
|
||||
url = baseUrl + 'champ-12-30-10.html'
|
||||
stripUrl = baseUrl + '%s.html'
|
||||
firstStripUrl = stripUrl % 'champ1-1-10-fuck'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
|
||||
after="Previous"))
|
||||
help = 'Index format: yy-dd-mm'
|
||||
|
||||
|
||||
class ChannelAte(_WPNavi):
|
||||
url = 'http://www.channelate.com/'
|
||||
|
||||
|
@ -294,15 +281,6 @@ class Cloudscratcher(_ParserScraper):
|
|||
starter = indirectStarter
|
||||
|
||||
|
||||
class Collar6(_TumblrScraper):
|
||||
url = 'http://collar6.tumblr.com/'
|
||||
firstStripUrl = url + 'post/138117470810/the-very-first-strip-from-when-i-thought-it-was'
|
||||
imageSearch = '//figure[@class="photo-hires-item"]//img'
|
||||
prevSearch = '//a[@class="previous-button"]'
|
||||
latestSearch = '//li[@class="timestamp"]/a'
|
||||
adult = True
|
||||
|
||||
|
||||
class CollegeCatastrophe(_ParserScraper):
|
||||
url = 'https://www.tigerknight.com/cc'
|
||||
stripUrl = url + '/%s'
|
||||
|
|
|
@ -13,21 +13,6 @@ from ..util import tagre
|
|||
from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn
|
||||
|
||||
|
||||
class DamnLol(_ParserScraper):
|
||||
url = 'http://www.damnlol.com/'
|
||||
# Classes for next and previous seem to be swapped...
|
||||
prevSearch = '//a[%s]' % xpath_class("next")
|
||||
nextSearch = '//a[%s]' % xpath_class("previous")
|
||||
imageSearch = '//img[@id="post-image"]'
|
||||
starter = bounceStarter
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
ext = image_url.rsplit('.', 1)[1]
|
||||
path = page_url.rsplit('/', 1)[1][:-5]
|
||||
stripname, number = path.rsplit('-', 1)
|
||||
return '%s-%s.%s' % (number, stripname, ext)
|
||||
|
||||
|
||||
class Damonk(_BasicScraper):
|
||||
url = 'http://www.damonk.com/'
|
||||
stripUrl = url + 'd/%s.html'
|
||||
|
|
|
@ -98,17 +98,6 @@ class GirlsWithSlingshots(_BasicScraper):
|
|||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class GlassHalfEmpty(_BasicScraper):
|
||||
url = 'http://www.defectivity.com/ghe/index.php'
|
||||
stripUrl = url + '?strip_id=%s'
|
||||
firstStripUrl = stripUrl % '0'
|
||||
imageSearch = compile(r'src="(comics/.+?)"')
|
||||
prevSearch = compile(
|
||||
tagre("a", "href", r'(\?strip_id=\d+)') +
|
||||
tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class GleefulNihilism(_WordPressScraper):
|
||||
url = ('https://web.archive.org/web/20170911203122/'
|
||||
'http://gleefulnihilism.com/')
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile, escape
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import indirectStarter, xpath_class
|
||||
from .common import _ComicControlScraper
|
||||
|
@ -23,15 +23,6 @@ class JackCannon(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/page-nnn'
|
||||
|
||||
|
||||
class JerkCity(_ParserScraper):
|
||||
url = 'http://www.jerkcity.com/'
|
||||
stripUrl = url + 'jerkcity%s.html'
|
||||
firstStripUrl = stripUrl % '1'
|
||||
imageSearch = '//div[@id="rapist"]//img'
|
||||
prevSearch = '//div[@id="rapist"]/a'
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class JimBenton(_BasicScraper):
|
||||
url = 'http://www.jimbenton.com/page14/page14.html'
|
||||
stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -77,7 +77,6 @@ class KeenSpot(_ParserScraper):
|
|||
cls('InHere', 'inhere'),
|
||||
cls('JadeWarriors', 'jadewarriors'),
|
||||
cls('Katrina', 'katrina'),
|
||||
cls('Landis', 'landis'),
|
||||
cls('LutherStrode', 'lutherstrode'),
|
||||
cls('MakeshiftMiracle', 'makeshiftmiracle'),
|
||||
cls('Marksmen', 'marksmen'),
|
||||
|
|
|
@ -106,24 +106,6 @@ class OnTheEdge(_WordPressScraper):
|
|||
firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'
|
||||
|
||||
|
||||
class OnTheFastrack(_BasicScraper):
|
||||
url = 'http://onthefastrack.com/'
|
||||
stripUrl = url + 'comics/%s'
|
||||
firstStripUrl = stripUrl % 'november-13-2000'
|
||||
imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||
help = 'Index format: monthname-dd-yyyy'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
name = page_url.rsplit('/', 3)[2]
|
||||
if name == "onthefastrack.com":
|
||||
import datetime
|
||||
name = datetime.date.today().strftime("%B-%d-%Y")
|
||||
# name.title ensures that the comics are named the same
|
||||
# as in the previous scraper
|
||||
return "%s.gif" % name.title()
|
||||
|
||||
|
||||
class OopsComicAdventure(_WordPressScraper):
|
||||
url = ('https://web.archive.org/web/20190102215141/'
|
||||
'http://oopscomicadventure.com/')
|
||||
|
|
|
@ -18,6 +18,7 @@ class Removed(Scraper):
|
|||
'brk': 'Comic navigation is broken.',
|
||||
'mov': 'Comic moved to a new hoster and no new module was written.',
|
||||
'mis': 'Pages are missing from the comic.',
|
||||
'acc': 'Account is needed to access site.',
|
||||
}
|
||||
|
||||
def __init__(self, name, reason='del'):
|
||||
|
@ -51,7 +52,10 @@ class Removed(Scraper):
|
|||
cls('BrightlyWound'),
|
||||
cls('Caggage'),
|
||||
cls('Carciphona', 'jsh'),
|
||||
cls('Champ2010'),
|
||||
cls('CheckerboardNightmare'),
|
||||
# Patreon & Pixiv (https://www.patreon.com/Collar6)
|
||||
cls('Collar6', 'mov'),
|
||||
cls('ComicFury/30years'),
|
||||
cls('ComicFury/AAB'),
|
||||
cls('ComicFury/AdventuresofMaggie'),
|
||||
|
@ -207,6 +211,7 @@ class Removed(Scraper):
|
|||
cls('CtrlAltDel', 'block'),
|
||||
cls('CtrlAltDel/Sillies', 'block'),
|
||||
cls('DailyDose'),
|
||||
cls('DamnLol'),
|
||||
cls('DeathToTheExtremist'),
|
||||
cls('DoctorCat', 'brk'),
|
||||
cls('DungeonsAndDenizens'),
|
||||
|
@ -217,6 +222,7 @@ class Removed(Scraper):
|
|||
cls('FeyWinds'),
|
||||
cls('FilibusterCartoons'),
|
||||
cls('FowlLanguage', 'block'),
|
||||
cls('GlassHalfEmpty'),
|
||||
cls('GoComics/ABootsAndPupComic'),
|
||||
cls('GoComics/AdventuresofDaisy'),
|
||||
cls('GoComics/AdventuresofMartyandTurkey'),
|
||||
|
@ -407,8 +413,10 @@ class Removed(Scraper):
|
|||
cls('GoComics/ZacharyNixonJohnson'),
|
||||
cls('GunnerkrigCourt'),
|
||||
cls('HorribleVille'),
|
||||
cls('JerkCity'),
|
||||
cls('KatzenfutterGeleespritzer'),
|
||||
cls('KeenSpot/Adventurers', 'mov'),
|
||||
cls('KeenSpot/Landis'),
|
||||
cls('Key'),
|
||||
cls('KillerKomics'),
|
||||
cls('Kukuburi'),
|
||||
|
@ -420,7 +428,6 @@ class Removed(Scraper):
|
|||
cls('Nnewts'),
|
||||
cls('OddFish'),
|
||||
cls('OneQuestion'),
|
||||
cls('OnTheFasttrack'),
|
||||
cls('OrnerBoy'),
|
||||
cls('PensAndTales/Evilish'),
|
||||
cls('PensAndTales/FireflyCross'),
|
||||
|
@ -431,7 +438,10 @@ class Removed(Scraper):
|
|||
cls('Pimpette'),
|
||||
cls('PunksAndNerds', 'mis'),
|
||||
cls('PunksAndNerdsOld'),
|
||||
# Moved to tapas.io, which blocks us
|
||||
cls('RadioactivePanda', 'block'),
|
||||
cls('RedsPlanet'),
|
||||
cls('RedString'),
|
||||
cls('SmackJeeves/Aarrevaara'),
|
||||
cls('SmackJeeves/AchievementStuck'),
|
||||
cls('SmackJeeves/AGirlAndHerShadow'),
|
||||
|
@ -581,6 +591,7 @@ class Removed(Scraper):
|
|||
cls('SuburbanTribe'),
|
||||
cls('TheOuterQuarter'),
|
||||
cls('TheParkingLotIsFull'),
|
||||
cls('TheThinHLine', 'acc'),
|
||||
cls('ThunderAndLightning'),
|
||||
cls('TinyKittenTeeth'),
|
||||
cls('TwoTwoOneFour'),
|
||||
|
@ -592,6 +603,7 @@ class Removed(Scraper):
|
|||
cls('WebcomicsNation/AgnesQuill'),
|
||||
cls('WebcomicsNation/MyMuse'),
|
||||
cls('WebcomicsNation/NekkoAndJoruba'),
|
||||
cls('WeCanSleepTomorrow'),
|
||||
cls('WhiteNinja'),
|
||||
cls('WLP/ShadowChasers'),
|
||||
cls('WotNow'),
|
||||
|
@ -712,8 +724,10 @@ class Renamed(Scraper):
|
|||
cls('KeenSpot/Newshounds', 'Newshounds'),
|
||||
cls('KeenSpot/SinFest', 'SinFest'),
|
||||
cls('KeenSpot/TheGodChild', 'GodChild'),
|
||||
cls('OnTheFasttrack', 'ComicsKingdom/OnTheFastrack'),
|
||||
cls('PetiteSymphony/Djandora', 'ComicsBreak/Djandora'),
|
||||
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
|
||||
cls('ShermansLagoon', 'ComicsKingdom/ShermansLagoon'),
|
||||
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
|
||||
cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
|
||||
cls('SmackJeeves/ForestHill', 'ForestHill'),
|
||||
|
|
|
@ -14,14 +14,6 @@ from ..util import tagre
|
|||
from .common import _WordPressScraper
|
||||
|
||||
|
||||
class RadioactivePanda(_BasicScraper):
|
||||
url = 'http://www.radioactivepanda.com/'
|
||||
stripUrl = url + 'comic/%s'
|
||||
imageSearch = compile(r'<img src="(/Assets/.*?)".+?"comicimg"')
|
||||
prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
|
||||
help = 'Index format: n (no padding)'
|
||||
|
||||
|
||||
class RalfTheDestroyer(_WordPressScraper):
|
||||
url = 'http://ralfthedestroyer.com/'
|
||||
|
||||
|
@ -75,15 +67,6 @@ class RedMeat(_ParserScraper):
|
|||
return '_'.join(parts[1:3])
|
||||
|
||||
|
||||
class RedString(_BasicScraper):
|
||||
url = 'http://www.redstring.strawberrycomics.com/'
|
||||
stripUrl = url + 'index.php?id=%s'
|
||||
firstStripUrl = stripUrl % '434'
|
||||
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Replay(_ParserScraper):
|
||||
url = 'http://replaycomic.com/'
|
||||
stripUrl = url + 'comic/%s/'
|
||||
|
|
|
@ -7,7 +7,6 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
from re import compile, escape, IGNORECASE, sub
|
||||
from os.path import splitext
|
||||
import datetime
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
|
||||
|
@ -198,24 +197,6 @@ class Sheldon(_BasicScraper):
|
|||
help = 'Index format: yymmdd'
|
||||
|
||||
|
||||
class ShermansLagoon(_BasicScraper):
|
||||
url = 'http://shermanslagoon.com/'
|
||||
stripUrl = url + 'comics/%s'
|
||||
firstStripUrl = stripUrl % '/december-29-2003/'
|
||||
imageSearch = compile(tagre("img", "src",
|
||||
r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+?)'))
|
||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||
help = 'Index format: monthname-day-year'
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
name = page_url.rsplit('/', 3)[2]
|
||||
if name == "shermanslagoon.com":
|
||||
name = datetime.date.today().strftime("%B-%d-%Y").lower()
|
||||
# name is monthname-day-year
|
||||
month, day, year = name.split('-')
|
||||
return "%s-%s-%s" % (year, month, day)
|
||||
|
||||
|
||||
class ShipInABottle(_WPNavi):
|
||||
url = 'http://shipinbottle.pepsaga.com/'
|
||||
stripUrl = url + '?p=%s'
|
||||
|
@ -248,12 +229,6 @@ class SinFest(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class Sithrah(_ParserScraper):
|
||||
url = 'http://sithrah.com/'
|
||||
imageSearch = '//div[@class="webcomic-image"]/img'
|
||||
prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')
|
||||
|
||||
|
||||
class SixPackOfOtters(_ParserScraper):
|
||||
url = 'http://sixpackofotters.com/'
|
||||
stripUrl = url + 'pages/%s/'
|
||||
|
|
|
@ -14,7 +14,7 @@ except ImportError:
|
|||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, xpath_class
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper, _TumblrScraper, _WordPressScraper, _WPNavi, _WPNaviIn
|
||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn
|
||||
|
||||
|
||||
class TailsAndTactics(_ParserScraper):
|
||||
|
@ -182,23 +182,6 @@ class TheRockCocks(_BasicScraper):
|
|||
adult = True
|
||||
|
||||
|
||||
class TheThinHLine(_TumblrScraper):
|
||||
url = 'http://thinhline.tumblr.com/'
|
||||
firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
|
||||
imageSearch = '//img[@id="content-image"]/@data-src'
|
||||
prevSearch = '//div[@id="pagination"]/a[text()=">"]'
|
||||
latestSearch = '//a[@class="timestamp"]'
|
||||
adult = True
|
||||
|
||||
indirectImageSearch = '//div[@id="post"]//a[not(@rel) and img]'
|
||||
|
||||
def getComicStrip(self, url, data):
|
||||
"""The comic strip image is in a separate page."""
|
||||
subPage = self.fetchUrl(url, data, self.indirectImageSearch)
|
||||
pageData = self.getPage(subPage)
|
||||
return super(TheThinHLine, self).getComicStrip(subPage, pageData)
|
||||
|
||||
|
||||
class TheWhiteboard(_ParserScraper):
|
||||
BROKEN_PAGE_MIDDLE = compile(r'</body></html>\n<')
|
||||
url = 'http://www.the-whiteboard.com/'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -48,15 +48,6 @@ class WebDesignerCOTW(_ParserScraper):
|
|||
return "%s-%s" % (week, imagename)
|
||||
|
||||
|
||||
class WeCanSleepTomorrow(_BasicScraper):
|
||||
url = 'http://wecansleeptomorrow.com/'
|
||||
rurl = escape(url)
|
||||
stripUrl = url + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class Weregeek(_ParserScraper):
|
||||
url = 'http://www.weregeek.com/'
|
||||
stripUrl = url + '%s/'
|
||||
|
@ -209,10 +200,6 @@ class WorldOfMrToast(_BasicScraper):
|
|||
return None
|
||||
|
||||
|
||||
class WorldOfWarcraftEh(_WordPressScraper):
|
||||
url = 'http://woweh.com/'
|
||||
|
||||
|
||||
class WormWorldSaga(_BasicScraper):
|
||||
url = 'http://www.wormworldsaga.com/'
|
||||
stripUrl = url + 'chapters/%s/index.php'
|
||||
|
|
Loading…
Reference in a new issue