Remove some comics that are gone or block us.

This commit is contained in:
Tobias Gruetzmacher 2016-04-17 19:42:43 +02:00
parent 1fbc844077
commit 13a3409854
10 changed files with 1 additions and 302 deletions

View file

@ -14,14 +14,6 @@ from .common import (_ComicControlScraper, _WordPressScraper, WP_PREV_SEARCH,
xpath_class) xpath_class)
class BackwaterPlanet(_BasicScraper):
url = 'http://www.backwaterplanet.com/current.htm'
stripUrl = 'http://www.backwaterplanet.com/archive/bwp%s.htm'
imageSearch = compile(r'<img src="(/images/comic/bwp.+?)">')
prevSearch = compile(r'<a href="(/archive/bwp.+?)"><img src="(images/Previous.jpg|/images/Previous.jpg)"')
help = 'Index format: yymmdd'
class BadassMuthas(_BasicScraper): class BadassMuthas(_BasicScraper):
url = 'http://badassmuthas.com/pages/comic.php' url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s' stripUrl = url + '?%s'
@ -111,15 +103,6 @@ class BetweenFailures(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class BigFatWhale(_BasicScraper):
url = 'http://www.bigfatwhale.com/'
stripUrl = url + 'archives/bfw_%s.htm'
imageSearch = compile(tagre("img", "src",
r'(archives/bfw_[^"]+|bfw_[^"]+)'))
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
help = 'Index format: nnn'
class BiggerThanCheeses(_BasicScraper): class BiggerThanCheeses(_BasicScraper):
url = 'http://www.biggercheese.com/' url = 'http://www.biggercheese.com/'
stripUrl = url + 'index.php?comic=%s' stripUrl = url + 'index.php?comic=%s'
@ -159,20 +142,6 @@ class BlankIt(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class Blip(_BasicScraper):
url = 'http://blipcomic.com/'
stripUrl = url + 'index.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev')
help = 'Index format: n'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
class BloodBound(_WordPressScraper): class BloodBound(_WordPressScraper):
url = 'http://bloodboundcomic.com/' url = 'http://bloodboundcomic.com/'
firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/' firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/'
@ -199,18 +168,6 @@ class BMovieComic(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class BobWhite(_BasicScraper):
url = 'http://www.bobwhitecomics.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s'
firstStripUrl = stripUrl % '20110504'
imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
prevSearch = compile(tagre("a", "href",
"(%s\?webcomic_post=\d+)" % rurl) +
r'[^"]+Previous')
help = 'Index format: yyyymmdd'
class BookOfBiff(_BasicScraper): class BookOfBiff(_BasicScraper):
url = 'http://thebookofbiff.com/' url = 'http://thebookofbiff.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -231,23 +188,6 @@ class BoredAndEvil(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class BoxerHockey(_BasicScraper):
url = 'http://boxerhockey.fireball20xl.com/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '56'
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)',
after="comicimg"))
prevSearch = compile(tagre("a", "href",
r'(http://www\.boxerhockey\.com/\?id=\d+)') +
r'[^>]+Previous')
help = 'Index format: n (unpadded)'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("www.boxerhockey.com", "boxerhockey.fireball20xl.com")
class BoyOnAStickAndSlither(_BasicScraper): class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/' url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s' stripUrl = url + 'page/%s'
@ -266,54 +206,6 @@ class BratHalla(_WordPressScraper):
url = 'http://brat-halla.com/' url = 'http://brat-halla.com/'
class BrentalFloss(_BasicScraper):
url = 'http://brentalflossthecomic.com/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
help = 'Index format: n'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("www.", "")
class BrentalFlossFit(BrentalFloss):
name = 'BrentalFloss/FlossedInTime'
url = 'http://brentalflossthecomic.com/fit/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '1'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("\n", "")
@classmethod
def imageUrlModifier(cls, url, data):
if url:
return url.replace("\n", "")
class BrentalFlossGuest(BrentalFloss):
name = 'BrentalFloss/GuestComics'
url = 'http://brentalflossthecomic.com/guestcomics/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '1'
class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'"))
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn'
class Brink(_BasicScraper): class Brink(_BasicScraper):
url = 'http://paperfangs.com/brink/' url = 'http://paperfangs.com/brink/'
rurl = escape(url) rurl = escape(url)

View file

@ -176,15 +176,6 @@ class ChasingTheSunset(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class CheckerboardNightmare(_ParserScraper):
url = 'http://www.checkerboardnightmare.com/'
stripUrl = url + 'd/%s'
firstStripUrl = stripUrl % '20001110.html'
imageSearch = '//td[@colspan="4"]//img'
prevSearch = '//td[2]/a'
help = 'Index format: yyyymmdd'
class Chester5000XYV(_BasicScraper): class Chester5000XYV(_BasicScraper):
url = 'http://jessfink.com/Chester5000XYV/' url = 'http://jessfink.com/Chester5000XYV/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
@ -231,17 +222,6 @@ class Comedity(_BasicScraper):
help = 'Index format: n (no padding)' help = 'Index format: n (no padding)'
class Commissioned(_BasicScraper):
url = 'http://www.commissionedcomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '139'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="prev"))
help = 'Index format: n'
class CompanyY(_BasicScraper): class CompanyY(_BasicScraper):
url = 'http://company-y.com/' url = 'http://company-y.com/'
rurl = escape(url) rurl = escape(url)
@ -319,28 +299,6 @@ class CrimsonDark(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class CtrlAltDel(_BasicScraper):
url = 'http://www.cad-comic.com/cad/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://v\.cdn\.cad-comic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="nav-back"))
help = 'Index format: yyyymmdd'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Remove random junk from image names."""
imgname = imageUrl.split('/')[-1]
imgbase = imgname.rsplit('-', 1)[0]
imgext = imgname.rsplit('.', 1)[1]
return '%s.%s' % (imgbase, imgext)
class CtrlAltDelSillies(CtrlAltDel):
name = 'CtrlAltDel/Sillies'
url = 'http://www.cad-comic.com/sillies/'
stripUrl = url + '%s'
class CucumberQuest(_BasicScraper): class CucumberQuest(_BasicScraper):
url = 'http://cucumber.gigidigi.com/' url = 'http://cucumber.gigidigi.com/'
rurl = escape(url) rurl = escape(url)

View file

@ -49,10 +49,6 @@ class GirlsAndSports(_Creators):
path = 'girls-and-sports' path = 'girls-and-sports'
class GirlsandSportsSpanish(_CreatorsEs):
path = 'girls-and-sports-spanish'
class HomeOffice(_Creators): class HomeOffice(_Creators):
path = 'stay-at-home-dad' path = 'stay-at-home-dad'
@ -81,10 +77,6 @@ class Rugrats(_Creators):
path = 'rugrats' path = 'rugrats'
class RugratsSpanish(_CreatorsEs):
path = 'rugrats-spanish'
class TheQuigmans(_Creators): class TheQuigmans(_Creators):
path = 'the-quigmans' path = 'the-quigmans'

View file

@ -86,15 +86,6 @@ class DeadWinter(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class DeathToTheExtremist(_BasicScraper):
url = 'http://www.dtecomic.com/'
stripUrl = url + '?n=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'"(comics/.*?)"')
prevSearch = compile(r'</a> <a href="(\?n=.*?)"><.+?/aprev.gif"')
help = 'Index format: nnn'
class DeepFried(_BasicScraper): class DeepFried(_BasicScraper):
url = 'http://www.whatisdeepfried.com/' url = 'http://www.whatisdeepfried.com/'
rurl = escape(url) rurl = escape(url)

View file

@ -151,22 +151,6 @@ class EmergencyExit(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class Eriadan(_BasicScraper):
url = 'http://www.shockdom.com/webcomics/eriadan/'
rurl = escape(url)
stripUrl = url + '%s/'
multipleImagesPerStrip = True
imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl,
after='width="[68]00"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
)
class ErrantStory(_BasicScraper): class ErrantStory(_BasicScraper):
url = 'http://www.errantstory.com/' url = 'http://www.errantstory.com/'
stripUrl = url + '%s' stripUrl = url + '%s'

View file

@ -9,7 +9,6 @@ from re import compile, escape, IGNORECASE
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from .common import _WordPressScraper
class FalconTwin(_BasicScraper): class FalconTwin(_BasicScraper):
@ -40,17 +39,6 @@ class FauxPas(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class FeyWinds(_BasicScraper):
baseUrl = 'http://kitsune.rydia.net/'
url = baseUrl + 'index.html'
stripUrl = baseUrl + 'comic/page.php?id=%s'
imageSearch = compile(r"(../comic/pages//.+?)'")
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
latestSearch = compile(r'(comic/page.php\?id.+?)"')
help = 'Index format: n (unpadded)'
starter = indirectStarter
class FilibusterCartoons(_BasicScraper): class FilibusterCartoons(_BasicScraper):
url = 'http://www.filibustercartoons.com/' url = 'http://www.filibustercartoons.com/'
rurl = escape(url) rurl = escape(url)
@ -130,10 +118,6 @@ class ForLackOfABetterComic(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class FowlLanguage(_WordPressScraper):
url = 'http://www.fowllanguagecomics.com/'
class Fragile(_ParserScraper): class Fragile(_ParserScraper):
url = 'http://www.fragilestory.com/' url = 'http://www.fragilestory.com/'
imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img' imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img'
@ -141,13 +125,6 @@ class Fragile(_ParserScraper):
firstStripUrl = url + 'strips/chapter_01' firstStripUrl = url + 'strips/chapter_01'
class FragileSpanish(_ParserScraper):
url = 'http://es.fragilestory.com/'
imageSearch = '//div[@id="content_comics"]/a[@class="nobg"]/img'
prevSearch = '//a[@class="comicnav" and contains(text(),"Anterior")]'
lang = 'es'
class FredoAndPidjin(_BasicScraper): class FredoAndPidjin(_BasicScraper):
url = 'http://www.pidjin.net/' url = 'http://www.pidjin.net/'
stripUrl = url + '%s/' stripUrl = url + '%s/'

View file

@ -75,25 +75,6 @@ class LetsSpeakEnglish(_ComicControlScraper):
url = 'http://www.marycagle.com' url = 'http://www.marycagle.com'
class Lint(_BasicScraper):
url = 'http://www.purnicellin.com/lint/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2004/01/10/01102004'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/num-name'
class LinuxComFridayFunnies(_BasicScraper):
url = 'https://www.linux.com/news/friday-funnies/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'the-road-to-japan'
imageSearch = compile(tagre("img", "src", r'(/news/friday-funnies/episode/[^"]+\?format=image[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/news/friday-funnies/[^"]+)') + "Previous")
help = 'Index format: stripname'
class LittleGamers(_BasicScraper): class LittleGamers(_BasicScraper):
url = 'http://www.little-gamers.com/' url = 'http://www.little-gamers.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'

View file

@ -7,7 +7,7 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre
from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH, from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH,
@ -49,23 +49,6 @@ class Nedroid(_WordPressScraper):
prevSearch = '//a[@rel="prev"]' prevSearch = '//a[@rel="prev"]'
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '7'
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NekoTheKitty(_ParserScraper):
url = 'http://www.nekothekitty.net/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % '936393/001-video-games'
imageSearch = '//a[@id="comic_image"]/img'
prevSearch = '//a[text()="<-"]'
class NeoEarth(_BasicScraper): class NeoEarth(_BasicScraper):
url = 'http://www.neo-earth.com/NE/' url = 'http://www.neo-earth.com/NE/'
stripUrl = url + 'index.php?date=%s' stripUrl = url + 'index.php?date=%s'
@ -80,13 +63,6 @@ class NerfNow(_WordPressScraper):
prevSearch = '//li[@id="nav_previous"]/a' prevSearch = '//li[@id="nav_previous"]/a'
class NewAdventuresOfBobbin(_BasicScraper):
url = 'http://www.bobbin-comic.com/bobbin_strips/'
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
multipleImagesPerStrip = True
help = 'Index format: none'
class NewWorld(_BasicScraper): class NewWorld(_BasicScraper):
url = 'http://www.tfsnewworld.com/' url = 'http://www.tfsnewworld.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'

View file

@ -65,16 +65,6 @@ class OmakeTheater(_ParserScraper):
help = 'Index format: number (unpadded)' help = 'Index format: number (unpadded)'
class OneQuestion(_BasicScraper):
url = 'http://onequestioncomic.com/'
stripUrl = url + 'comic.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') +
tagre("img", "src", r'img/arrow_prev\.jpg'))
help = 'Index format: n (unpadded)'
class OnTheEdge(_WordPressScraper): class OnTheEdge(_WordPressScraper):
url = 'http://ontheedgecomics.com/' url = 'http://ontheedgecomics.com/'
firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/' firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/'
@ -106,18 +96,6 @@ class Optipess(_WordPressScraper):
textSearch = '//div[@id="comic"]//img/@alt' textSearch = '//div[@id="comic"]//img/@alt'
class OrnerBoy(_BasicScraper):
url = 'http://www.orneryboy.com/'
rurl = escape(url)
stripUrl = url + 'index.php?comicID=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href",
r'(%sindex\.php\?comicID=\d+)' % rurl) +
tagre("img", "src", r'images/prev_a\.gif'))
help = 'Index format: number'
class OurHomePlanet(_BasicScraper): class OurHomePlanet(_BasicScraper):
url = 'http://gdk.gd-kun.net/' url = 'http://gdk.gd-kun.net/'
stripUrl = url + '%s.html' stripUrl = url + '%s.html'

View file

@ -90,16 +90,6 @@ class TheOrderOfTheStick(_BasicScraper):
return pageUrl.rsplit('/', 1)[-1][:-5] return pageUrl.rsplit('/', 1)[-1][:-5]
class TheOuterQuarter(_BasicScraper):
url = 'http://theouterquarter.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'oq-the-first-take/4'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn'
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
baseUrl = 'http://plif.courageunfettered.com/' baseUrl = 'http://plif.courageunfettered.com/'
url = baseUrl + 'archive/arch2002.htm' url = baseUrl + 'archive/arch2002.htm'
@ -179,26 +169,6 @@ class ThreePanelSoul(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class ThunderAndLightning(_BasicScraper):
baseUrl = 'http://www.talcomic.com/wp/'
url = baseUrl + '?latestcomic'
rurl = escape(baseUrl)
stripUrl = baseUrl + '%s/'
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/page-nn'
class TinyKittenTeeth(_BasicScraper):
url = 'http://www.tinykittenteeth.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/01/26/gene-kelly'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
class ToonHole(_BasicScraper): class ToonHole(_BasicScraper):
url = 'http://www.toonhole.com/' url = 'http://www.toonhole.com/'
rurl = escape(url) rurl = escape(url)