Sort comics.

This commit is contained in:
Bastian Kleineidam 2013-03-06 20:21:10 +01:00
parent bae2a96d8b
commit d7925ba4a2
16 changed files with 436 additions and 437 deletions

View file

@ -8,24 +8,6 @@ from ..scraper import _BasicScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter from ..helpers import regexNamer, bounceStarter, indirectStarter
class ALessonIsLearned(_BasicScraper):
url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s'
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
help = 'Index format: nnn'
class ASofterWorld(_BasicScraper):
url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s'
imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' +
tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back')
help = 'Index format: n (unpadded)'
class AbleAndBaker(_BasicScraper): class AbleAndBaker(_BasicScraper):
url = 'http://www.jimburgessdesign.com/comics/index.php' url = 'http://www.jimburgessdesign.com/comics/index.php'
stripUrl = url + '?comic=%s' stripUrl = url + '?comic=%s'
@ -67,6 +49,15 @@ class AcademyVale(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class ALessonIsLearned(_BasicScraper):
url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s'
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
help = 'Index format: nnn'
class Alice(_BasicScraper): class Alice(_BasicScraper):
url = 'http://alice.alicecomics.com/' url = 'http://alice.alicecomics.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -152,6 +143,15 @@ class Achewood(_BasicScraper):
namer = regexNamer(compile(r'date=(\d+)')) namer = regexNamer(compile(r'date=(\d+)'))
class ASofterWorld(_BasicScraper):
url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s'
imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' +
tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back')
help = 'Index format: n (unpadded)'
class AstronomyPOTD(_BasicScraper): class AstronomyPOTD(_BasicScraper):
url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html' url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html'
starter = bounceStarter(url, starter = bounceStarter(url,

View file

@ -9,6 +9,22 @@ from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
class BackwaterPlanet(_BasicScraper):
url = 'http://www.backwaterplanet.com/current.htm'
stripUrl = 'http://www.backwaterplanet.com/archive/bwp%s.htm'
imageSearch = compile(r'<img src="(/images/comic/bwp.+?)">')
prevSearch = compile(r'<a href="(/archive/bwp.+?)"><img src="(images/Previous.jpg|/images/Previous.jpg)"')
help = 'Index format: yymmdd'
class BadassMuthas(_BasicScraper):
url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s'
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
help = 'Index format: nnn'
class BadMachinery(_BasicScraper): class BadMachinery(_BasicScraper):
url = 'http://scarygoround.com/' url = 'http://scarygoround.com/'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
@ -25,6 +41,14 @@ class Bardsworth(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class Baroquen(_BasicScraper):
url = 'http://www.baroquencomics.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.baroquencomics\.com/Comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.baroquencomics\.com/[^"]+)', after='prev'))
help = 'Index format: yyyy/mm/dd/strip-name'
class Bearmageddon(_BasicScraper): class Bearmageddon(_BasicScraper):
url = 'http://bearmageddon.com/' url = 'http://bearmageddon.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -42,6 +66,22 @@ class BetterDays(_BasicScraper):
help = 'Index format: yyyy/mm/<your guess>' help = 'Index format: yyyy/mm/<your guess>'
class BetweenFailures(_BasicScraper):
url = 'http://betweenfailures.com/'
stripUrl = url + 'archives/archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://betweenfailures\.com/wp-content/webcomic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous"))
help = 'Index format: stripnum-strip-name'
class BigFatWhale(_BasicScraper):
url = 'http://www.bigfatwhale.com/'
stripUrl = url + 'archives/bfw_%s.htm'
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)'))
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
help = 'Index format: nnn'
class BiggerThanCheeses(_BasicScraper): class BiggerThanCheeses(_BasicScraper):
url = 'http://www.biggercheese.com/' url = 'http://www.biggercheese.com/'
stripUrl = url + 'index.php?comic=%s' stripUrl = url + 'index.php?comic=%s'
@ -50,6 +90,14 @@ class BiggerThanCheeses(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class BillyTheDunce(_BasicScraper):
url = 'http://www.duncepress.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)'))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/strip-name'
class BizarreUprising(_BasicScraper): class BizarreUprising(_BasicScraper):
url = 'http://www.bizarreuprising.com/' url = 'http://www.bizarreuprising.com/'
stripUrl = url + 'view/%s' stripUrl = url + 'view/%s'
@ -58,6 +106,14 @@ class BizarreUprising(_BasicScraper):
help = 'Index format: n/name' help = 'Index format: n/name'
class BlankIt(_BasicScraper):
url = 'http://blankitcomics.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy/mm/dd/name'
class Blip(_BasicScraper): class Blip(_BasicScraper):
url = 'http://blipcomic.com/' url = 'http://blipcomic.com/'
stripUrl = url + 'index.php?strip_id=%s' stripUrl = url + 'index.php?strip_id=%s'
@ -71,6 +127,14 @@ class Blip(_BasicScraper):
return prevUrl.replace("www.blipcomic.com", "blipcomic.com") return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
class BloodBound(_BasicScraper):
url = 'http://bloodboundcomic.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://bloodboundcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://bloodboundcomic\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/name'
class BlueCrashKit(_BasicScraper): class BlueCrashKit(_BasicScraper):
url = 'http://robhamm.com/bluecrashkit/' url = 'http://robhamm.com/bluecrashkit/'
stripUrl = url + 'comics/blue-crash-kit/%s' stripUrl = url + 'comics/blue-crash-kit/%s'
@ -87,6 +151,14 @@ class BMovieComic(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class BookOfBiff(_BasicScraper):
url = 'http://www.thebookofbiff.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
### With BratHalla there is no 'previous' link at comic 360 ### With BratHalla there is no 'previous' link at comic 360
### You will need to use ### You will need to use
### mainline -c BratHalla:360-backup-dad-unstable-plans/ ### mainline -c BratHalla:360-backup-dad-unstable-plans/
@ -132,7 +204,7 @@ class BrentalFlossGuest(BrentalFloss):
class _BringBackRoomies(_BasicScraper): class _BringBackRoomies(_BasicScraper):
url = "http://www.bringbackroomies.com/" url = "http://www.bringbackroomies.com/"
stripUrl = url + "comic/%s" stripUrl = url + "comic/%s"
imageSearch = compile(tagre("img", "src", r'(http://www\.bringbackroomies\.com/wp-content/uploads/\d+/\d+/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.bringbackroomies\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("span", "class", "mininav-prev") + prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(http://www\.bringbackroomies\.com/comic/[^"]+)')) tagre("a", "href", r'(http://www\.bringbackroomies\.com/comic/[^"]+)'))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -146,6 +218,14 @@ class Brink(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class BobWhite(_BasicScraper):
url = 'http://www.bobwhitecomics.com/'
stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)"))
prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous')
help = 'Index format: yyyymmdd'
class BoredAndEvil(_BasicScraper): class BoredAndEvil(_BasicScraper):
url = 'http://www.boredandevil.com/' url = 'http://www.boredandevil.com/'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
@ -155,18 +235,6 @@ class BoredAndEvil(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/')[-1]
class BoxerHockey(_BasicScraper): class BoxerHockey(_BasicScraper):
url = 'http://boxerhockey.fireball20xl.com/' url = 'http://boxerhockey.fireball20xl.com/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
@ -181,6 +249,27 @@ class BoxerHockey(_BasicScraper):
return prevUrl.replace("www.boxerhockey.com", "boxerhockey.fireball20xl.com") return prevUrl.replace("www.boxerhockey.com", "boxerhockey.fireball20xl.com")
class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.rsplit('/')[-1]
class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s'
imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'"))
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn'
class BroodHollow(_BasicScraper): class BroodHollow(_BasicScraper):
url = 'http://broodhollow.chainsawsuit.com/' url = 'http://broodhollow.chainsawsuit.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -189,14 +278,6 @@ class BroodHollow(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://buttersafe\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://buttersafe\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
# XXX disallowed by robots.txt # XXX disallowed by robots.txt
class _ButtercupFestival(_BasicScraper): class _ButtercupFestival(_BasicScraper):
url = 'http://www.buttercupfestival.com/' url = 'http://www.buttercupfestival.com/'
@ -206,98 +287,17 @@ class _ButtercupFestival(_BasicScraper):
help = 'Index format: number-number' help = 'Index format: number-number'
class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://buttersafe\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://buttersafe\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ButternutSquash(_BasicScraper): class ButternutSquash(_BasicScraper):
url = 'http://www.butternutsquash.net/' url = 'http://www.butternutsquash.net/'
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name-author-name' help = 'Index format: yyyy/mm/dd/strip-name-author-name'
class BlankIt(_BasicScraper):
url = 'http://blankitcomics.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy/mm/dd/name'
class BobWhite(_BasicScraper):
url = 'http://www.bobwhitecomics.com/'
stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)"))
prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous')
help = 'Index format: yyyymmdd'
class BigFatWhale(_BasicScraper):
url = 'http://www.bigfatwhale.com/'
stripUrl = url + 'archives/bfw_%s.htm'
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)'))
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
help = 'Index format: nnn'
class BadassMuthas(_BasicScraper):
url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s'
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
help = 'Index format: nnn'
class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s'
imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'"))
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn'
class BloodBound(_BasicScraper):
url = 'http://bloodboundcomic.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://bloodboundcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://bloodboundcomic\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/name'
class BookOfBiff(_BasicScraper):
url = 'http://www.thebookofbiff.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripnum-strip-name'
class BillyTheDunce(_BasicScraper):
url = 'http://www.duncepress.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)'))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/strip-name'
class BackwaterPlanet(_BasicScraper):
url = 'http://www.backwaterplanet.com/current.htm'
stripUrl = 'http://www.backwaterplanet.com/archive/bwp%s.htm'
imageSearch = compile(r'<img src="(/images/comic/bwp.+?)">')
prevSearch = compile(r'<a href="(/archive/bwp.+?)"><img src="(images/Previous.jpg|/images/Previous.jpg)"')
help = 'Index format: yymmdd'
class Baroquen(_BasicScraper):
url = 'http://www.baroquencomics.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.baroquencomics\.com/Comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.baroquencomics\.com/[^"]+)', after='prev'))
help = 'Index format: yyyy/mm/dd/strip-name'
class BetweenFailures(_BasicScraper):
url = 'http://betweenfailures.com/'
stripUrl = url + 'archives/archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://betweenfailures\.com/wp-content/webcomic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous"))
help = 'Index format: stripnum-strip-name'

View file

@ -79,6 +79,29 @@ class DeepFried(_BasicScraper):
help = 'Index format: non' help = 'Index format: non'
class DieselSweeties(_BasicScraper):
url = 'http://www.dieselsweeties.com/'
stripUrl = url + 'archive/%s'
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') +
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(imageUrl.split('/')[-1].split('.')[0])
return 'sw%02d' % (index,)
class Dilbert(_BasicScraper):
url = 'http://dilbert.com/'
stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+/)', after="STR_Prev"))
imageSearch = compile(tagre("img", "src", r'(/dyn/str_strip/[^"]+\.strip\.zoom\.gif)'))
help = 'Index format: yyyy-mm-dd'
# XXX namer
class DMFA(_BasicScraper): class DMFA(_BasicScraper):
url = 'http://www.missmab.com/' url = 'http://www.missmab.com/'
stripUrl = url + 'Comics/Vol_%s.php' stripUrl = url + 'Comics/Vol_%s.php'
@ -97,6 +120,22 @@ class DoemainOfOurOwn(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class DominicDeegan(_BasicScraper):
url = 'http://www.dominic-deegan.com/'
stripUrl = url + 'view.php?date=%s'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
prevSearch = compile(r'"(view.php\?date=[^"]+)".+?prev21')
help = 'Index format: yyyy-mm-dd'
class DorkTower(_BasicScraper):
url = 'http://www.dorktower.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
class DrFun(_BasicScraper): class DrFun(_BasicScraper):
url = 'http://www.ibiblio.org/Dave/ar00502.htm' url = 'http://www.ibiblio.org/Dave/ar00502.htm'
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm' stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
@ -130,44 +169,6 @@ class DreamKeepersPrelude(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class Drowtales(_BasicScraper):
url = 'http://www.drowtales.com/mainarchive.php'
stripUrl = url + '?sid=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.drowtales\.com/mainarchive/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
help = 'Index format: number'
class DieselSweeties(_BasicScraper):
url = 'http://www.dieselsweeties.com/'
stripUrl = url + 'archive/%s'
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') +
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(imageUrl.split('/')[-1].split('.')[0])
return 'sw%02d' % (index,)
class DominicDeegan(_BasicScraper):
url = 'http://www.dominic-deegan.com/'
stripUrl = url + 'view.php?date=%s'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
prevSearch = compile(r'"(view.php\?date=[^"]+)".+?prev21')
help = 'Index format: yyyy-mm-dd'
class DorkTower(_BasicScraper):
url = 'http://www.dorktower.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
class DresdenCodak(_BasicScraper): class DresdenCodak(_BasicScraper):
url = 'http://dresdencodak.com/' url = 'http://dresdencodak.com/'
stripUrl = None stripUrl = None
@ -176,13 +177,12 @@ class DresdenCodak(_BasicScraper):
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">')) starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
class Dilbert(_BasicScraper): class Drowtales(_BasicScraper):
url = 'http://dilbert.com/' url = 'http://www.drowtales.com/mainarchive.php'
stripUrl = url + '%s/' stripUrl = url + '?sid=%s'
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+/)', after="STR_Prev")) imageSearch = compile(tagre("img", "src", r'(http://www\.drowtales\.com/mainarchive/[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(/dyn/str_strip/[^"]+\.strip\.zoom\.gif)')) prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
help = 'Index format: yyyy-mm-dd' help = 'Index format: number'
# XXX namer
# XXX disallowed by robots.txt # XXX disallowed by robots.txt

View file

@ -9,6 +9,19 @@ from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class EarthsongSaga(_BasicScraper):
url = 'http://www.earthsongsaga.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+current\.jpg')))
stripUrl = None
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
@classmethod
def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl)
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
class EdibleDirt(_BasicScraper): class EdibleDirt(_BasicScraper):
url = 'http://eddirt.frozenreality.co.uk/' url = 'http://eddirt.frozenreality.co.uk/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
@ -60,6 +73,14 @@ class ElGoonishShiveNP(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class Ellerbisms(_BasicScraper):
url = 'http://www.ellerbisms.com/'
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
help = 'Index format: nnn'
class EmergencyExit(_BasicScraper): class EmergencyExit(_BasicScraper):
url = 'http://www.eecomics.net/' url = 'http://www.eecomics.net/'
stripUrl = url + "?strip_id=%s" stripUrl = url + "?strip_id=%s"
@ -124,6 +145,14 @@ class ExiernDarkReflections(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class ExploitationNow(_BasicScraper):
url = 'http://www.exploitationnow.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num'
class ExtraLife(_BasicScraper): class ExtraLife(_BasicScraper):
url = 'http://www.myextralife.com/' url = 'http://www.myextralife.com/'
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
@ -138,32 +167,3 @@ class EyeOfRamalach(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/uploads/[^"]+final[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/uploads/[^"]+final[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/comic/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/comic/[^"]+)', after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'
class EarthsongSaga(_BasicScraper):
url = 'http://www.earthsongsaga.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+current\.jpg')))
stripUrl = None
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
@classmethod
def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl)
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
class ExploitationNow(_BasicScraper):
url = 'http://www.exploitationnow.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num'
class Ellerbisms(_BasicScraper):
url = 'http://www.ellerbisms.com/'
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
help = 'Index format: nnn'

View file

@ -17,6 +17,36 @@ class FalconTwin(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class Fallen(_BasicScraper):
url = 'http://www.fallencomic.com/fal-page.htm'
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
help = 'Index format: nn-m (comicNumber-partNumber)'
starter = indirectStarter(url,
compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE))
@classmethod
def namer(cls, imageUrl, pageUrl):
num = pageUrl.split('/')[-1].split('-')[0]
part = pageUrl.split('-')[-1].split('.')[0]
return '%s-%s' % (part, num)
def getIndexStripUrl(self, index):
index, part = index.split('-')
return self.stripUrl % (part, index, part)
class FantasyRealms(_BasicScraper):
url = 'http://www.fantasyrealmsonline.com/'
stripUrl = url + 'manga/%s.php'
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
class FauxPas(_BasicScraper): class FauxPas(_BasicScraper):
url = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi' url = 'http://www.ozfoxes.net/cgi/pl-fp1.cgi'
stripUrl = url + '?%s' stripUrl = url + '?%s'
@ -93,16 +123,6 @@ class Freefall(_BasicScraper):
help = 'Index format: nnnn/nnnnn' help = 'Index format: nnnn/nnnnn'
class FantasyRealms(_BasicScraper):
url = 'http://www.fantasyrealmsonline.com/'
stripUrl = url + 'manga/%s.php'
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
class FunInJammies(_BasicScraper): class FunInJammies(_BasicScraper):
url = 'http://www.funinjammies.com/' url = 'http://www.funinjammies.com/'
stripUrl = url + 'comic.php?issue=%s' stripUrl = url + 'comic.php?issue=%s'
@ -111,26 +131,6 @@ class FunInJammies(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class Fallen(_BasicScraper):
url = 'http://www.fallencomic.com/fal-page.htm'
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
help = 'Index format: nn-m (comicNumber-partNumber)'
starter = indirectStarter(url,
compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE))
@classmethod
def namer(cls, imageUrl, pageUrl):
num = pageUrl.split('/')[-1].split('-')[0]
part = pageUrl.split('-')[-1].split('.')[0]
return '%s-%s' % (part, num)
def getIndexStripUrl(self, index):
index, part = index.split('-')
return self.stripUrl % (part, index, part)
class FredoAndPidjin(_BasicScraper): class FredoAndPidjin(_BasicScraper):
url = 'http://www.pidjin.net/' url = 'http://www.pidjin.net/'
stripUrl = None stripUrl = None

View file

@ -27,13 +27,20 @@ class Garanos(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class GUComics(_BasicScraper): class GastroPhobia(_BasicScraper):
url = 'http://www.gucomics.com/comic/' url = 'http://www.gastrophobia.com/'
stripUrl = url + '?cdate=%s' stripUrl = url + 'index.php?date=%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)')) imageSearch = compile(r'<img src="(http://gastrophobia.com/comix/[^"]+)"[^>]*>(?!<br>)')
prevSearch = compile(tagre("a", "href", r'(/comic/\?cdate=\d+)') + prevSearch = compile(r'<a href="(.+?)"><img src="pix/prev.gif" ')
tagre("img", "src", r'/images/nav/prev\.png')) help = 'Index format: yyyy-mm-dd'
help = 'Index format: yyyymmdd'
class Geeks(_BasicScraper):
url = 'http://sevenfloorsdown.com/geeks/'
stripUrl = url + 'archives/%s'
imageSearch = compile(r'<img src=\'(http://sevenfloorsdown.com/geeks/comics/.+?)\'')
prevSearch = compile(r'<a href="(.+?)">&laquo; Previous')
help = 'Index format: nnn'
class GirlGenius(_BasicScraper): class GirlGenius(_BasicScraper):
@ -52,6 +59,14 @@ class GirlsWithSlingshots(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class GlassHalfEmpty(_BasicScraper):
url = 'http://www.defectivity.com/ghe/index.php'
stripUrl = url + '?strip_id=%s'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
help = 'Index format: nnn'
class GleefulNihilism(_BasicScraper): class GleefulNihilism(_BasicScraper):
url = 'http://gleefulnihilism.com/' url = 'http://gleefulnihilism.com/'
stripUrl = url + 'comics/%s/' stripUrl = url + 'comics/%s/'
@ -82,6 +97,14 @@ class GoneWithTheBlastwave(_BasicScraper):
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1)) return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
class GreystoneInn(_BasicScraper):
url = 'http://www.greystoneinn.net/'
stripUrl = url + 'd/%s.html'
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help='Index format: yyyymmdd'
class GrrlPower(_BasicScraper): class GrrlPower(_BasicScraper):
url = 'http://www.grrlpowercomic.com/' url = 'http://www.grrlpowercomic.com/'
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
@ -107,33 +130,10 @@ class Gunshow(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class GastroPhobia(_BasicScraper): class GUComics(_BasicScraper):
url = 'http://www.gastrophobia.com/' url = 'http://www.gucomics.com/comic/'
stripUrl = url + 'index.php?date=%s' stripUrl = url + '?cdate=%s'
imageSearch = compile(r'<img src="(http://gastrophobia.com/comix/[^"]+)"[^>]*>(?!<br>)') imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
prevSearch = compile(r'<a href="(.+?)"><img src="pix/prev.gif" ') prevSearch = compile(tagre("a", "href", r'(/comic/\?cdate=\d+)') +
help = 'Index format: yyyy-mm-dd' tagre("img", "src", r'/images/nav/prev\.png'))
class Geeks(_BasicScraper):
url = 'http://sevenfloorsdown.com/geeks/'
stripUrl = url + 'archives/%s'
imageSearch = compile(r'<img src=\'(http://sevenfloorsdown.com/geeks/comics/.+?)\'')
prevSearch = compile(r'<a href="(.+?)">&laquo; Previous')
help = 'Index format: nnn'
class GlassHalfEmpty(_BasicScraper):
url = 'http://www.defectivity.com/ghe/index.php'
stripUrl = url + '?strip_id=%s'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
help = 'Index format: nnn'
class GreystoneInn(_BasicScraper):
url = 'http://www.greystoneinn.net/'
stripUrl = url + 'd/%s.html'
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'

View file

@ -32,14 +32,6 @@ class IDreamOfAJeanieBottle(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class IrregularWebcomic(_BasicScraper):
url = 'http://www.irregularwebcomic.net/'
stripUrl = url + '%s.html'
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
help = 'Index format: nnn'
class InsideOut(_BasicScraper): class InsideOut(_BasicScraper):
url = 'http://www.insideoutcomic.com/' url = 'http://www.insideoutcomic.com/'
stripUrl = url + 'html/%s.html' stripUrl = url + 'html/%s.html'
@ -48,6 +40,14 @@ class InsideOut(_BasicScraper):
help = 'Index format: n_comic_name' help = 'Index format: n_comic_name'
class IrregularWebcomic(_BasicScraper):
url = 'http://www.irregularwebcomic.net/'
stripUrl = url + '%s.html'
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
help = 'Index format: nnn'
class ItsWalky(_BasicScraper): class ItsWalky(_BasicScraper):
url = 'http://www.itswalky.com/' url = 'http://www.itswalky.com/'
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'

View file

@ -6,30 +6,6 @@ from re import compile, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class Key(_BasicScraper):
url = 'http://key.shadilyn.com/latestpage.html'
stripUrl = 'http://key.shadilyn.com/pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
class Krakow(_BasicScraper):
url = 'http://www.krakow.krakowstudios.com/'
stripUrl = url + 'archive.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
help = 'Index format: yyyymmdd'
class Kukuburi(_BasicScraper):
url = 'http://www.kukuburi.com/current/'
stripUrl = 'http://www.kukuburi.com/v2/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: yyyy/mm/dd/stripname'
class KevinAndKell(_BasicScraper): class KevinAndKell(_BasicScraper):
url = 'http://www.kevinandkell.com/' url = 'http://www.kevinandkell.com/'
stripUrl = url + '%s/kk%s%s.html' stripUrl = url + '%s/kk%s%s.html'
@ -41,6 +17,14 @@ class KevinAndKell(_BasicScraper):
return self.stripUrl % tuple(map(int, index.split('-'))) return self.stripUrl % tuple(map(int, index.split('-')))
class Key(_BasicScraper):
url = 'http://key.shadilyn.com/latestpage.html'
stripUrl = 'http://key.shadilyn.com/pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
class KhaosKomix(_BasicScraper): class KhaosKomix(_BasicScraper):
adult = True adult = True
url = 'http://www.khaoskomix.com/' url = 'http://www.khaoskomix.com/'
@ -68,6 +52,22 @@ class _Kofightclub(_BasicScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class Krakow(_BasicScraper):
url = 'http://www.krakow.krakowstudios.com/'
stripUrl = url + 'archive.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
help = 'Index format: yyyymmdd'
class Kukuburi(_BasicScraper):
url = 'http://www.kukuburi.com/current/'
stripUrl = 'http://www.kukuburi.com/v2/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: yyyy/mm/dd/stripname'
class KuroShouri(_BasicScraper): class KuroShouri(_BasicScraper):
url = 'http://kuroshouri.com/' url = 'http://kuroshouri.com/'
stripUrl = url + '?webcomic_post=%s' stripUrl = url + '?webcomic_post=%s'

View file

@ -16,6 +16,16 @@ class LasLindas(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class LeastICouldDo(_BasicScraper):
url = 'http://www.leasticoulddo.com/'
stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.leasticoulddo\.com/wp-content/uploads/\d+/\d+/\d{8}\.\w{1,4})'))
prevSearch = compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="Previous"))
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="feature-comic")))
help = 'Index format: yyyymmdd'
class Lint(_BasicScraper): class Lint(_BasicScraper):
url = 'http://www.purnicellin.com/lint/' url = 'http://www.purnicellin.com/lint/'
stripUrl = url + '%s' stripUrl = url + '%s'
@ -24,6 +34,14 @@ class Lint(_BasicScraper):
help = 'Index format: yyyy/mm/dd/num-name' help = 'Index format: yyyy/mm/dd/num-name'
class LittleGamers(_BasicScraper):
url = 'http://www.little-gamers.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
help = 'Index format: yyyy/mm/dd/name'
class LoadingArtist(_BasicScraper): class LoadingArtist(_BasicScraper):
url = 'http://www.loadingartist.com/' url = 'http://www.loadingartist.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -45,21 +63,3 @@ class LookingForGroup(_BasicScraper):
@classmethod @classmethod
def namer(self, imageUrl, pageUrl): def namer(self, imageUrl, pageUrl):
return self.nameSearch.search(pageUrl).group(1) return self.nameSearch.search(pageUrl).group(1)
class LittleGamers(_BasicScraper):
url = 'http://www.little-gamers.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
help = 'Index format: yyyy/mm/dd/name'
class LeastICouldDo(_BasicScraper):
url = 'http://www.leasticoulddo.com/'
stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.leasticoulddo\.com/wp-content/uploads/\d+/\d+/\d{8}\.\w{1,4})'))
prevSearch = compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="Previous"))
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="feature-comic")))
help = 'Index format: yyyymmdd'

View file

@ -111,4 +111,3 @@ class MysteriesOfTheArcana(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)')) imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php[^"]+)', after="navprevious")) prevSearch = compile(tagre("a", "href", r'(index\.php[^"]+)', after="navprevious"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'

View file

@ -69,29 +69,12 @@ class Nicky510(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class Nnewts(_BasicScraper): class NekkoAndJoruba(_BasicScraper):
url = 'http://nnewts.com/' url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '%s/' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % 'nnewts-page-1' imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"')
imageSearch = compile(tagre("img", "src", r'(http://nnewts\.com/newty/comics/[^"]+)')) prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
prevSearch = compile(tagre("a", "href", r'(http://nnewts\.com/(?:nnewts-)?page-\d+/)', after="navi-prev")) help = 'Index format: nnn'
help = 'Index format: page-number'
class NoNeedForBushido(_BasicScraper):
url = 'http://noneedforbushido.com/latest/'
stripUrl = 'http://noneedforbushido.com/%s/'
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link"))
help = 'Index format: yyyy/comic/nnn'
class Nukees(_BasicScraper):
url = 'http://www.nukees.com/'
stripUrl = url + 'd/%s'
imageSearch = compile(r'"comic".+?"(/comics/.+?)"')
prevSearch = compile(r'"(/d/.+?)".+?previous')
help = 'Index format: yyyymmdd.html'
class NekoTheKitty(_BasicScraper): class NekoTheKitty(_BasicScraper):
@ -115,6 +98,15 @@ class NichtLustig(_BasicScraper):
compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))) compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
class Nnewts(_BasicScraper):
url = 'http://nnewts.com/'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'nnewts-page-1'
imageSearch = compile(tagre("img", "src", r'(http://nnewts\.com/newty/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://nnewts\.com/(?:nnewts-)?page-\d+/)', after="navi-prev"))
help = 'Index format: page-number'
class Nodwick(_BasicScraper): class Nodwick(_BasicScraper):
url = 'http://comic.nodwick.com/' url = 'http://comic.nodwick.com/'
stripUrl = url + "?p=%s" stripUrl = url + "?p=%s"
@ -123,14 +115,6 @@ class Nodwick(_BasicScraper):
help = 'Index format: stripnumber' help = 'Index format: stripnumber'
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NobodyScores(_BasicScraper): class NobodyScores(_BasicScraper):
url = 'http://nobodyscores.loosenutstudio.com/' url = 'http://nobodyscores.loosenutstudio.com/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
@ -138,3 +122,19 @@ class NobodyScores(_BasicScraper):
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>') prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>')
help = 'Index format: nnn' help = 'Index format: nnn'
class NoNeedForBushido(_BasicScraper):
url = 'http://noneedforbushido.com/latest/'
stripUrl = 'http://noneedforbushido.com/%s/'
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link"))
help = 'Index format: yyyy/comic/nnn'
class Nukees(_BasicScraper):
url = 'http://www.nukees.com/'
stripUrl = url + 'd/%s'
imageSearch = compile(r'"comic".+?"(/comics/.+?)"')
prevSearch = compile(r'"(/d/.+?)".+?previous')
help = 'Index format: yyyymmdd.html'

View file

@ -27,6 +27,23 @@ class OddFish(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class Oglaf(_BasicScraper):
url = 'http://oglaf.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(/media/comic/[^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("div", "id", "pvs"))
help = 'Index format: stripname/nn'
class OkCancel(_BasicScraper):
url = 'http://okcancel.com/'
stripUrl = url + 'comic/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)'))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)'))
starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd'
class OmakeTheater(_BasicScraper): class OmakeTheater(_BasicScraper):
url = 'http://omaketheater.com/' url = 'http://omaketheater.com/'
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
@ -61,23 +78,6 @@ class OurHomePlanet(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class OkCancel(_BasicScraper):
url = 'http://okcancel.com/'
stripUrl = url + 'comic/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)'))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)'))
starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd'
class Oglaf(_BasicScraper):
url = 'http://oglaf.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(/media/comic/[^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("div", "id", "pvs"))
help = 'Index format: stripname/nn'
class OverCompensating(_BasicScraper): class OverCompensating(_BasicScraper):
url = 'http://www.overcompensating.com/' url = 'http://www.overcompensating.com/'
stripUrl = url + 'posts/%s.html' stripUrl = url + 'posts/%s.html'

View file

@ -128,6 +128,15 @@ class Pimpette(_BasicScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
# Broken navigation: prev link at http://planescapecomic.com/201.html points to same URL.
class _PlanescapeSurvival(_BasicScraper):
url = 'http://planescapecomic.com/'
stripUrl = url + '%s.html'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
help = 'Index format: nnn'
class PokeyThePenguin(_BasicScraper): class PokeyThePenguin(_BasicScraper):
baseurl = 'http://www.yellow5.com/pokey/archive/' baseurl = 'http://www.yellow5.com/pokey/archive/'
url = baseurl + 'index558.html' url = baseurl + 'index558.html'
@ -186,12 +195,3 @@ class PunksAndNerdsOld(_BasicScraper):
imageSearch = compile(r' src="(/comics/.+?)"') imageSearch = compile(r' src="(/comics/.+?)"')
prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">') prevSearch = compile(r'><strong><a href="(.+?)"[^>]+?><img[^>]+?src="/previouscomic.gif">')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
# Broken navigation: prev link at http://planescapecomic.com/201.html points to same URL.
class _PlanescapeSurvival(_BasicScraper):
url = 'http://planescapecomic.com/'
stripUrl = url + '%s.html'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img alt="Previous" ')
help = 'Index format: nnn'

View file

@ -24,6 +24,19 @@ class RealLife(_BasicScraper):
help = 'Index format: yymmdd)' help = 'Index format: yymmdd)'
class RedMeat(_BasicScraper):
url = 'http://www.redmeat.com/redmeat/current/index.html'
starter = bounceStarter(url, compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
stripUrl = 'http://www.redmeat.com/redmeat/%s/index.html'
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
help = 'Index format: yyyy-mm-dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-2]
class RedString(_BasicScraper): class RedString(_BasicScraper):
url = 'http://www.redstring.strawberrycomics.com/' url = 'http://www.redstring.strawberrycomics.com/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
@ -38,16 +51,3 @@ class Roza(_BasicScraper):
imageSearch = compile(r'<img src="(pages/.+?)"') imageSearch = compile(r'<img src="(pages/.+?)"')
prevSearch = compile(r'<a href="(index.php\?date=.+?)">[^>].+?navtable_01.gif') prevSearch = compile(r'<a href="(index.php\?date=.+?)">[^>].+?navtable_01.gif')
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class RedMeat(_BasicScraper):
url = 'http://www.redmeat.com/redmeat/current/index.html'
starter = bounceStarter(url, compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
stripUrl = 'http://www.redmeat.com/redmeat/%s/index.html'
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
help = 'Index format: yyyy-mm-dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-2]

View file

@ -83,6 +83,23 @@ class SequentialArt(_BasicScraper):
help = 'Index format: name' help = 'Index format: name'
class SexyLosers(_BasicScraper):
adult = True
url = 'http://www.sexylosers.com/'
stripUrl = url + '%s.html'
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
@classmethod
def namer(cls, imageUrl, pageUrl):
index = pageUrl.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title
class ShadowGirls(_BasicScraper): class ShadowGirls(_BasicScraper):
url = 'http://www.shadowgirlscomic.com/' url = 'http://www.shadowgirlscomic.com/'
stripUrl = url + 'comics/%s' stripUrl = url + 'comics/%s'
@ -150,6 +167,14 @@ class SluggyFreelance(_BasicScraper):
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
class SMBC(_BasicScraper):
url = 'http://www.smbc-comics.com/'
stripUrl = url + 'index.php?db=comics&id=%s'
imageSearch = compile(r'<img src=\'(.+?\d{8}.\w{1,4})\'>')
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
help = 'Index format: nnnn'
class SnowFlame(_BasicScraper): class SnowFlame(_BasicScraper):
url = 'http://www.snowflamecomic.com/' url = 'http://www.snowflamecomic.com/'
stripUrl = url + '?comic=snowflame-%s-%s' stripUrl = url + '?comic=snowflame-%s-%s'
@ -275,23 +300,6 @@ class SomethingPositive(_BasicScraper):
help = 'Index format: mmddyyyy' help = 'Index format: mmddyyyy'
class SexyLosers(_BasicScraper):
adult = True
url = 'http://www.sexylosers.com/'
stripUrl = url + '%s.html'
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
@classmethod
def namer(cls, imageUrl, pageUrl):
index = pageUrl.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title
class StarCrossdDestiny(_BasicScraper): class StarCrossdDestiny(_BasicScraper):
url = 'http://www.starcrossd.net/comic.html' url = 'http://www.starcrossd.net/comic.html'
stripUrl = 'http://www.starcrossd.net/archives/%s.html' stripUrl = 'http://www.starcrossd.net/archives/%s.html'
@ -329,14 +337,6 @@ class _StrangeCandy(_BasicScraper):
help = 'Index format: yyyyddmm' help = 'Index format: yyyyddmm'
class SMBC(_BasicScraper):
url = 'http://www.smbc-comics.com/'
stripUrl = url + 'index.php?db=comics&id=%s'
imageSearch = compile(r'<img src=\'(.+?\d{8}.\w{1,4})\'>')
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
help = 'Index format: nnnn'
class SupernormalStep(_BasicScraper): class SupernormalStep(_BasicScraper):
url = 'http://supernormalstep.com/' url = 'http://supernormalstep.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'

View file

@ -8,12 +8,12 @@ from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class WayfarersMoon(_BasicScraper): class WapsiSquare(_BasicScraper):
url = 'http://www.wayfarersmoon.com/' url = 'http://wapsisquare.com/'
stripUrl = url + 'index.php?page=%s' stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(/admin.+?)"') imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif') prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: nn' help = 'Index format: strip-name'
class WastedTalent(_BasicScraper): class WastedTalent(_BasicScraper):
@ -24,6 +24,22 @@ class WastedTalent(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class WayfarersMoon(_BasicScraper):
url = 'http://www.wayfarersmoon.com/'
stripUrl = url + 'index.php?page=%s'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class WhiteNinja(_BasicScraper): class WhiteNinja(_BasicScraper):
url = 'http://www.whiteninjacomics.com/comics.shtml' url = 'http://www.whiteninjacomics.com/comics.shtml'
stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml' stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
@ -123,22 +139,6 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class WapsiSquare(_BasicScraper):
url = 'http://wapsisquare.com/'
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: strip-name'
class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/'
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Wondermark(_BasicScraper): class Wondermark(_BasicScraper):
url = 'http://wondermark.com/' url = 'http://wondermark.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'