Use re.escape and add some firstStripUrl.

This commit is contained in:
Bastian Kleineidam 2013-04-10 18:19:11 +02:00
parent fec6d92d8c
commit 5127d4c895
23 changed files with 582 additions and 369 deletions

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, MULTILINE
from re import compile, escape, MULTILINE
from ..util import tagre
from ..scraper import _BasicScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter
@ -11,14 +11,17 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter
class AbleAndBaker(_BasicScraper):
url = 'http://www.jimburgessdesign.com/comics/index.php'
stripUrl = url + '?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(comics/.+)'))
prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif')
help = 'Index format: nnn'
class AbsurdNotions(_BasicScraper):
url = 'http://www.absurdnotions.org/page129.html'
stripUrl = 'http://www.absurdnotions.org/page%s.html'
baseurl = 'http://www.absurdnotions.org/'
url = baseurl + 'page129.html'
stripUrl = baseurl + 'page%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
@ -27,11 +30,12 @@ class AbsurdNotions(_BasicScraper):
class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/'
starter = bounceStarter(url,
compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »"))
rurl = escape(url)
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
stripUrl = url + '%s'
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'&laquo; Previous</a>')
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(%sstrips/[^<>"]+)' % rurl))
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous</a>')
help = 'Index format: n (unpadded)'
@classmethod
@ -44,6 +48,7 @@ class AbstruseGoose(_BasicScraper):
class AcademyVale(_BasicScraper):
url = 'http://www.imagerie.com/vale/'
stripUrl = url + 'avarch.cgi?%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif'))
help = 'Index format: nnn'
@ -52,7 +57,7 @@ class AcademyVale(_BasicScraper):
class AhoiPolloi(_BasicScraper):
url = 'http://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s'
firstStripUrl = stripUrl % '20060305'
firstStripUrl = stripUrl % '20060306'
multipleImagesPerStrip = True
lang = 'de'
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
@ -69,29 +74,33 @@ class ALessonIsLearned(_BasicScraper):
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
help = 'Index format: nnn'
class Alice(_BasicScraper):
url = 'http://alice.alicecomics.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://alice\.alicecomics\.com/alicecomics/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%salicecomics/[^"]+)' % rurl, after="previous"))
help = 'Index format: name'
class AlienLovesPredator(_BasicScraper):
url = 'http://alienlovespredator.com/'
stripUrl = url + '%s'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name/'
help = 'Index format: yyyy/mm/dd/name'
class AlphaLuna(_BasicScraper):
url = 'http://www.alphaluna.net/'
stripUrl = url + 'issue-%s/'
firstStripUrl = stripUrl % '1/cover'
imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev"))
help = 'Index format: issue/page (e.g. 4/05)'
@ -102,11 +111,13 @@ class AlphaLunaSpanish(AlphaLuna):
lang = 'es'
url = 'http://alphaluna.net/spanish/'
stripUrl = url + 'issue-%s/'
firstStripUrl = stripUrl % '1/portada'
class Altermeta(_BasicScraper):
url = 'http://altermeta.net/'
stripUrl = url + 'archive.php?comic=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(r'<img src="(comics/[^"]+)" />')
prevSearch = compile(r'<a href="([^"]+)"><img src="http://altermeta\.net/template/default/images/sasha/back\.png')
help = 'Index format: n (unpadded)'
@ -115,14 +126,17 @@ class Altermeta(_BasicScraper):
class AltermetaOld(Altermeta):
url = 'http://altermeta.net/oldarchive/index.php'
stripUrl = 'http://altermeta.net/oldarchive/archive.php?comic=%s'
firstStripUrl = stripUrl % '0'
prevSearch = compile(r'<a href="([^"]+)">Back')
class AmazingSuperPowers(_BasicScraper):
url = 'http://www.amazingsuperpowers.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.amazingsuperpowers\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.amazingsuperpowers\.com/[^"]+)', after="prev"))
firstStripUrl = stripUrl % '2007/09/heredity'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name'
@ -136,15 +150,18 @@ class Angels2200(_BasicScraper):
class Antics(_BasicScraper):
url = 'http://www.anticscomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.anticscomic\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.anticscomic\.com/\?p=\d+)', after='prev'))
firstStripUrl = stripUrl % '3'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev'))
help = 'Index format: number'
class AppleGeeks(_BasicScraper):
url = 'http://www.applegeeks.com/'
stripUrl = url + 'comics/viewcomic.php?issue=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)'))
prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE)
help = 'Index format: n (unpadded)'
@ -153,6 +170,7 @@ class AppleGeeks(_BasicScraper):
class Achewood(_BasicScraper):
url = 'http://www.achewood.com/'
stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '00000000'
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous"))
help = 'Index format: mmddyyyy'
@ -162,6 +180,7 @@ class Achewood(_BasicScraper):
class ASofterWorld(_BasicScraper):
url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' +
tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back')
@ -169,10 +188,12 @@ class ASofterWorld(_BasicScraper):
class AstronomyPOTD(_BasicScraper):
url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html'
baseurl = 'http://antwrp.gsfc.nasa.gov/apod/'
url = baseurl + 'astropix.html'
starter = bounceStarter(url,
compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
stripUrl = baseurl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>")
@ -202,7 +223,8 @@ class AfterStrife(_BasicScraper):
class ALLCAPS(_BasicScraper):
url = 'http://www.allcapscomix.com/'
stripUrl = url + '%s'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2008/08/welcome-to-all-caps'
imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous</a>")
help = 'Index format: yyyy/mm/strip-name'
@ -211,6 +233,7 @@ class ALLCAPS(_BasicScraper):
class ASkeweredParadise(_BasicScraper):
url = 'http://aspcomics.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
help = 'Index format: nnn'
@ -221,6 +244,7 @@ class AGirlAndHerFed(_BasicScraper):
starter = bounceStarter(url,
compile(r'<a href="([^"]+)">[^>]+Back'))
stripUrl = url + '1.%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
prevSearch = compile(r'<a href="([^"]+)">[^>]+Back')
help = 'Index format: nnn'
@ -229,6 +253,7 @@ class AGirlAndHerFed(_BasicScraper):
class AetheriaEpics(_BasicScraper):
url = 'http://aetheria-epics.schala.net/'
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '00001'
imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous")
help = 'Index format: nnn'
@ -236,10 +261,11 @@ class AetheriaEpics(_BasicScraper):
class AirForceBlues(_BasicScraper):
url = 'http://www.afblues.com/'
stripUrl = url + 'wordpress/%s'
stripUrl = url + 'wordpress/%s/'
firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya'
imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous'))
help = 'Index format: yyyy/mm/dd/name/'
help = 'Index format: yyyy/mm/dd/stripname'
class AlienShores(_BasicScraper):
@ -252,23 +278,27 @@ class AlienShores(_BasicScraper):
class AllTheGrowingThings(_BasicScraper):
url = 'http://growingthings.typodmary.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://growingthings\.typodmary\.com/files/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://growingthings\.typodmary\.com/[^"]+)', after="prev"))
firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things'
imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name'
class Amya(_BasicScraper):
url = 'http://www.amyachronicles.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.amyachronicles\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.amyachronicles\.com/archives/\d+)', after="Previous"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="Previous"))
help = 'Index format: n'
class Angband(_BasicScraper):
url = 'http://angband.calamarain.net/'
stripUrl = url + 'view.php?date=%s'
firstStripUrl = stripUrl % '2005-12-30'
imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous")
help = 'Index format: yyyy-mm-dd'
@ -276,9 +306,10 @@ class Angband(_BasicScraper):
class AlsoBagels(_BasicScraper):
url = 'http://alsobagels.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://alsobagels\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://alsobagels\.com/index\.php/comic/[^"]+)', after="Previous"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous"))
help = 'Index format: strip-name'
@ -292,10 +323,12 @@ class Annyseed(_BasicScraper):
class AxeCop(_BasicScraper):
url = 'http://axecop.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/episode_\d+/)')))
rurl = escape(url)
starter = indirectStarter(url,
compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/episode_\d+/)' % rurl)))
stripUrl = url + 'index.php/acepisodes/read/%s/'
firstStripUrl = stripUrl % 'episode_0'
imageSearch = compile(tagre("img", "src", r'(http://axecop\.com/images/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/[^"]+)') +
imageSearch = compile(tagre("img", "src", r'(%simages/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/[^"]+)' % rurl) +
tagre("img", "src", r'http://axecop\.com/acimages/buttons/page_left\.png'))
help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..util import tagre
from ..scraper import _BasicScraper
@ -20,6 +20,7 @@ class BackwaterPlanet(_BasicScraper):
class BadassMuthas(_BasicScraper):
url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
help = 'Index format: nnn'
@ -28,6 +29,7 @@ class BadassMuthas(_BasicScraper):
class BadMachinery(_BasicScraper):
url = 'http://scarygoround.com/'
stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20090918'
imageSearch = compile(tagre("img", "src", r'(strips/\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + 'Previous')
help = 'Index format: yyyymmdd'
@ -35,32 +37,38 @@ class BadMachinery(_BasicScraper):
class Bardsworth(_BasicScraper):
url = 'http://www.bardsworth.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.bardsworth\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.bardsworth\.com/[^"]+)', after="prev"))
firstStripUrl = stripUrl % '750'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: nnn'
class Baroquen(_BasicScraper):
url = 'http://www.baroquencomics.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.baroquencomics\.com/Comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.baroquencomics\.com/[^"]+)', after='prev'))
firstStripUrl = stripUrl % '2008/11/05/raise-the-curtains'
imageSearch = compile(tagre("img", "src", r'(%sComics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='prev'))
help = 'Index format: yyyy/mm/dd/strip-name'
class Bearmageddon(_BasicScraper):
url = 'http://bearmageddon.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/08/01/page-1'
imageSearch = compile(tagre("img", "src", r'(http://bearmageddon\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://bearmageddon\.com/\d+/\d+/\d+/[^"]+)', after='navi-prev'))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev'))
help = 'Index format: yyyy/mm/dd/stripname'
class BetterDays(_BasicScraper):
url = 'http://jaynaylor.com/betterdays/'
stripUrl = url + 'archives/%s.html'
firstStripUrl = stripUrl % '2003/04/post-2'
imageSearch = compile(tagre("img", "src", r'(/betterdays/comic/[^>]+)', quote=""))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&laquo; Previous')
help = 'Index format: yyyy/mm/<your guess>'
@ -68,9 +76,10 @@ class BetterDays(_BasicScraper):
class BetweenFailures(_BasicScraper):
url = 'http://betweenfailures.com/'
rurl = escape(url)
stripUrl = url + 'archives/archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://betweenfailures\.com/wp-content/webcomic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/archive/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripnum-strip-name'
@ -85,6 +94,7 @@ class BigFatWhale(_BasicScraper):
class BiggerThanCheeses(_BasicScraper):
url = 'http://www.biggercheese.com/'
stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'src="(comics/.+?)" alt')
prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')
help = 'Index format: n (unpadded)'
@ -92,15 +102,18 @@ class BiggerThanCheeses(_BasicScraper):
class BillyTheDunce(_BasicScraper):
url = 'http://www.duncepress.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)'))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/strip-name'
firstStripUrl = stripUrl % '2009/06/an-introduction-of-sorts'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(r'<div class="nav-previous"><a href="(%s[^"]+)" rel="prev">' % rurl)
help = 'Index format: yyyy/mm/stripname'
class BizarreUprising(_BasicScraper):
url = 'http://www.bizarreuprising.com/'
stripUrl = url + 'view/%s'
firstStripUrl = stripUrl % '1/awakening-splash'
imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif'))
help = 'Index format: n/name'
@ -108,15 +121,17 @@ class BizarreUprising(_BasicScraper):
class BlankIt(_BasicScraper):
url = 'http://blankitcomics.com/'
stripUrl = url + '%s'
stripUrl = url + 'blankit-%s'
firstStripUrl = stripUrl % '0001'
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy/mm/dd/name'
help = 'Index format: number'
class Blip(_BasicScraper):
url = 'http://blipcomic.com/'
stripUrl = url + 'index.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev')
help = 'Index format: n'
@ -129,9 +144,11 @@ class Blip(_BasicScraper):
class BloodBound(_BasicScraper):
url = 'http://bloodboundcomic.com/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://bloodboundcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://bloodboundcomic\.com/[^"]+)', after="prev"))
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/06/06112006'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name'
@ -146,6 +163,7 @@ class BlueCrashKit(_BasicScraper):
class BMovieComic(_BasicScraper):
url = 'http://www.bmoviecomic.com/'
stripUrl = url + '?cid=%s'
firstStripUrl = stripUrl % '8'
imageSearch = compile(r'"(comics/.+?)"')
prevSearch = compile(r'(\?cid=.+?)".+?Prev')
help = 'Index format: n'
@ -171,6 +189,7 @@ class BratHalla(_BasicScraper):
class BrentalFloss(_BasicScraper):
url = 'http://brentalflossthecomic.com/'
stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
help = 'Index format: n'
@ -185,6 +204,7 @@ class BrentalFlossFit(BrentalFloss):
name = 'BrentalFloss/FlossedInTime'
url = 'http://brentalflossthecomic.com/fit/'
stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
@classmethod
def prevUrlModifier(cls, prevUrl):
@ -201,37 +221,43 @@ class BrentalFlossGuest(BrentalFloss):
name = 'BrentalFloss/GuestComics'
url = 'http://brentalflossthecomic.com/guestcomics/'
stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
# XXX disallowed by robots.txt
class _BringBackRoomies(_BasicScraper):
url = "http://www.bringbackroomies.com/"
rurl = escape(url)
stripUrl = url + "comic/%s"
imageSearch = compile(tagre("img", "src", r'(http://www\.bringbackroomies\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(http://www\.bringbackroomies\.com/comic/[^"]+)'))
tagre("a", "href", r'(%scomic/[^"]+)' % rurl))
help = 'Index format: stripname'
class Brink(_BasicScraper):
url = 'http://paperfangs.com/brink/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://paperfangs\.com/brink/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://paperfangs\.com/brink/[^"]+)', after="prev"))
help = 'Index format: n'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: number'
class BobWhite(_BasicScraper):
url = 'http://www.bobwhitecomics.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)"))
prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous')
firstStripUrl = stripUrl % '20110504'
imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous')
help = 'Index format: yyyymmdd'
class BoredAndEvil(_BasicScraper):
url = 'http://www.boredandevil.com/'
stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '2004-06-07'
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
starter = indirectStarter(url, prevSearch)
@ -241,6 +267,7 @@ class BoredAndEvil(_BasicScraper):
class BoxerHockey(_BasicScraper):
url = 'http://boxerhockey.fireball20xl.com/'
stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '56'
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg"))
prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') +
r'[^>]+Previous')
@ -255,6 +282,7 @@ class BoxerHockey(_BasicScraper):
class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)'
@ -268,6 +296,7 @@ class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s'
fristStripUrl = stripUrl % '0'
imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'"))
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn'
@ -275,9 +304,11 @@ class BrightlyWound(_BasicScraper):
class BroodHollow(_BasicScraper):
url = 'http://broodhollow.chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://broodhollow\.chainsawsuit\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://broodhollow\.chainsawsuit\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
firstStripUrl = stripUrl % '2012/10/08/broodhollow'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
@ -292,15 +323,19 @@ class _ButtercupFestival(_BasicScraper):
class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://buttersafe\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://buttersafe\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev"))
firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ButternutSquash(_BasicScraper):
url = 'http://www.butternutsquash.net/'
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev"))
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/04/16/meet-da-punks'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name-author-name'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter
@ -11,17 +11,20 @@ from ..util import tagre
class Caggage(_BasicScraper):
url = 'http://caggagecomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://caggagecomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://caggagecomic\.com/archives/\d+)', after="prev"))
firstStripUrl = stripUrl % '77'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
help = 'Index format: number'
class CaptainSNES(_BasicScraper):
url = 'http://www.captainsnes.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r"(http://www\.captainsnes\.com/comics/[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r'(http://www\.captainsnes\.com/[^"]+)') + tagre("span", "class", "prev"))
imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, quote="'"))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("span", "class", "prev"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/dd/nnn-stripname'
@ -29,6 +32,7 @@ class CaptainSNES(_BasicScraper):
class CaseyAndAndy(_BasicScraper):
url = 'http://www.galactanet.com/comic/'
stripUrl = url + 'view.php?strip=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)')
+ tagre("img", "src", r'previous\.gif'))
@ -37,10 +41,12 @@ class CaseyAndAndy(_BasicScraper):
class Catalyst(_BasicScraper):
baseUrl = "http://catalyst.spiderforest.com/"
rurl = escape(baseUrl)
url = baseUrl + "comic.php?comic_id=415"
stripUrl = baseUrl + "comic.php?comic_id=%s"
imageSearch = compile(tagre("img", "src", r'((?:http://catalyst\.spiderforest\.com/)?comics/[^"]+)'))
prevSearch = compile("<center>" + tagre("a", "href", r'(http://catalyst\.spiderforest\.com/comic\.php\?comic_id=\d+)'))
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl))
prevSearch = compile("<center>" + tagre("a", "href", r'(%scomic\.php\?comic_id=\d+)' % rurl))
help = 'Index format: number'
@ -54,17 +60,19 @@ class Catena(_BasicScraper):
class ChainsawSuit(_BasicScraper):
url = 'http://chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://chainsawsuit\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://chainsawsuit\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ChannelAte(_BasicScraper):
url = 'http://www.channelate.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.channelate\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.channelate\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name'
@ -126,25 +134,28 @@ class Comedity(_BasicScraper):
class Commissioned(_BasicScraper):
url = 'http://www.commissionedcomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.commissionedcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.commissionedcomic\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n'
class Concession(_BasicScraper):
url = 'http://concessioncomic.com/'
rurl = escape(url)
stripUrl = url + 'index.php?pid=%s'
imageSearch = compile(tagre("img", "src", r'(http://concessioncomic\.com/comics/[^"]+)', after="Comic"))
prevSearch = compile(tagre("a", "href", r'(http://concessioncomic\.com/index\.php\?pid=\d+)', after="nav-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl, after="Comic"))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?pid=\d+)' % rurl, after="nav-prev"))
help = 'Index format: number'
class CoolCatStudio(_BasicScraper):
url = 'http://www.coolcatstudio.com/'
rurl = escape(url)
stripUrl = url + 'strips-cat/ccs%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.coolcatstudio\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sstrips-cat/[^"]+)' % rurl, before="prev"))
help = 'Index format: yyyymmdd'
@ -233,23 +244,27 @@ class CrimsonDark(_BasicScraper):
class CatsAndCameras(_BasicScraper):
url = 'http://catsncameras.com/cnc/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">')
imageSearch = compile(tagre("img", "src", r'(%scnc/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("div", "class", r'nav-previous') +
tagre("a", "href", r'(%scnc/[^"]+)' % rurl))
help = 'Index format: nnn'
class CowboyJedi(_BasicScraper):
url = 'http://www.cowboyjedi.com/'
rurl = escape(url)
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.cowboyjedi.\com/comics/[^"]+)'))
prevSearch = compile(r'<a href="(http://www.cowboyjedi.com/.+?)" class="navi navi-prev"')
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/strip-name'
class CasuallyKayla(_BasicScraper):
url = 'http://casuallykayla.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '89'
imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)'))
prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)'))
help = 'Index format: nnn'
@ -257,9 +272,10 @@ class CasuallyKayla(_BasicScraper):
class Collar6(_BasicScraper):
url = 'http://collar6.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://collar6\.com/wp-content/webcomic/collar6/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://collar6\.com/archive/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
help = 'Index format: <name>'
@ -272,11 +288,13 @@ class Chester5000XYV(_BasicScraper):
class Champ2010(_BasicScraper):
baseurl = 'http://jedcollins.com/champ2010/'
rurl = escape(baseurl)
# the latest URL is hard coded since the comic is discontinued
url = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
stripUrl = 'http://jedcollins.com/champ2010/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://jedcollins\.com/champ2010/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://jedcollins\.com/champ2010/[^"]+)', after="Previous"))
url = baseurl + 'champ-12-30-10.html'
stripUrl = baseurl + '%s.html'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous"))
help = 'Index format: yy-dd-mm'
@ -294,9 +312,11 @@ class Chucklebrain(_BasicScraper):
class CompanyY(_BasicScraper):
url = 'http://company-y.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://company-y\.com/comics/[^"]+)'))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://company-y.com/.+?)"')
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("div", "class", r"nav-previous") +
tagre("a", "href", r'(%s[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/strip-name'
@ -324,10 +344,11 @@ class CraftedFables(_BasicScraper):
class CucumberQuest(_BasicScraper):
url = 'http://cucumber.gigidigi.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/'
firstStripUrl = stripUrl % 'page-1'
starter = indirectStarter(url + 'recent.html',
compile(r'window\.location="(/archive/[^"]+/)"'))
imageSearch = compile(tagre("img", "src", r'(http://cucumber\.gigidigi\.com/wp-content/webcomic/cq/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://cucumber\.gigidigi\.com/archive/[^"]+/)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/cq/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+/)' % rurl, after="previous"))
help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter
@ -48,19 +48,21 @@ class DangerouslyChloe(_BasicScraper):
class DarkWings(_BasicScraper):
url = 'http://www.flowerlarkstudios.com/dark-wings/'
rurl = escape(url)
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.flowerlarkstudios\.com/dark-wings/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.flowerlarkstudios\.com/dark-wings/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/page-nn-mm'
class DasLebenIstKeinPonyhof(_BasicScraper):
url = 'http://sarahburrini.com/wordpress/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'mein-erster-webcomic'
imageSearch = compile(tagre("img", "src", r'(http://sarahburrini\.com/wordpress/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(http://sarahburrini\.com/wordpress/comic/[^"]+)', after="navi-prev"))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname'
lang = 'de'
@ -84,9 +86,10 @@ class DeathToTheExtremist(_BasicScraper):
class DeepFried(_BasicScraper):
url = 'http://www.whatisdeepfried.com/'
rurl = escape(url)
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.whatisdeepfried\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.whatisdeepfried\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: non'
@ -168,9 +171,10 @@ class DoemainOfOurOwn(_BasicScraper):
class DogHouseDiaries(_BasicScraper):
url = 'http://thedoghousediaries.com/'
rurl = escape(url)
stripUrl = url + '%s'
prevSearch = compile(tagre("a", "href", r'(http://thedoghousediaries\.com/\d+)', after="previous-comic"))
imageSearch = compile(tagre("img", "src", r'(http://thedoghousediaries\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, after="previous-comic"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: number'
@ -184,15 +188,17 @@ class DominicDeegan(_BasicScraper):
class DorkTower(_BasicScraper):
url = 'http://www.dorktower.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/\d+/\d+/DorkTower[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous")
imageSearch = compile(tagre("img", "src", r'(%sfiles/\d+/\d+/DorkTower[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
class DrFun(_BasicScraper):
url = 'http://www.ibiblio.org/Dave/ar00502.htm'
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
baseurl = 'http://www.ibiblio.org/Dave/'
url = baseurl + 'ar00502.htm'
stripUrl = baseurl + 'ar%s.htm'
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d+/df[^"]+)">')
multipleImagesPerStrip = True
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
@ -217,18 +223,21 @@ class DreamKeepersPrelude(_BasicScraper):
class DresdenCodak(_BasicScraper):
url = 'http://dresdencodak.com/'
rurl = escape(url)
stripUrl = None
imageSearch = compile(r'<img src="http://dresdencodak.com(/comics/.*?\.jpg)"')
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("img", "src", r"%sm_prev\.png" % rurl))
starter = indirectStarter(url, compile(tagre("div", "id", "preview") +
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
class DrMcNinja(_BasicScraper):
url = 'http://drmcninja.com/'
rurl = escape(url)
stripUrl = url + 'archives/comic/%s/'
firstStripUrl = stripUrl % '0p1'
imageSearch = compile(tagre("img", "src", r'(http://drmcninja\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://drmcninja\.com/archives/comic/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/comic/[^"]+)' % rurl, after="prev"))
help = 'Index format: episode number and page'
@ -243,7 +252,8 @@ class Drowtales(_BasicScraper):
# XXX disallowed by robots.txt
class _DumbingOfAge(_BasicScraper):
url = 'http://www.dumbingofage.com/'
rurl = escape(url)
stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://www\.dumbingofage\.com/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.dumbingofage\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: yyyy/comic/book-num/seriesname/stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile, escape, IGNORECASE
from ..helpers import indirectStarter
from ..scraper import _BasicScraper
@ -76,9 +76,10 @@ class ElGoonishShiveNP(_BasicScraper):
class Ellerbisms(_BasicScraper):
url = 'http://www.ellerbisms.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: nnn'
@ -132,17 +133,19 @@ class EvilInc(_BasicScraper):
class Exiern(_BasicScraper):
url = 'http://www.exiern.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ExploitationNow(_BasicScraper):
url = 'http://www.exploitationnow.com/'
rurl = escape(url)
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num'
@ -156,16 +159,18 @@ class ExtraLife(_BasicScraper):
class ExtraOrdinary(_BasicScraper):
url = 'http://exocomics.com/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '01'
prevSearch = compile(tagre("a", "href", r'(http://www\.exocomics\.com/\d+)', before="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.exocomics\.com/comics/comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl))
help = 'Index format: number'
class EyeOfRamalach(_BasicScraper):
url = 'http://theeye.katbox.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/uploads/[^"]+)', after="data-webcomic-parent"))
prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/comic/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="data-webcomic-parent"))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE
from re import compile, escape, IGNORECASE, MULTILINE
from ..util import tagre
from ..scraper import _BasicScraper
@ -18,8 +18,9 @@ class FalconTwin(_BasicScraper):
class Fallen(_BasicScraper):
url = 'http://www.fallencomic.com/fal-page.htm'
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
baseurl = 'http://www.fallencomic.com/'
url = baseurl + 'fal-page.htm'
stripUrl = baseurl + 'pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
help = 'Index format: nn-m (comicNumber-partNumber)'
@ -56,8 +57,9 @@ class FauxPas(_BasicScraper):
class FeyWinds(_BasicScraper):
url = 'http://kitsune.rydia.net/index.html'
stripUrl = 'http://kitsune.rydia.net/comic/page.php?id=%s'
baseurl = 'http://kitsune.rydia.net/'
url = baseurl + 'index.html'
stripUrl = baseurl + 'comic/page.php?id=%s'
imageSearch = compile(r"(../comic/pages//.+?)'")
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
help = 'Index format: n (unpadded)'
@ -66,9 +68,10 @@ class FeyWinds(_BasicScraper):
class FilibusterCartoons(_BasicScraper):
url = 'http://www.filibustercartoons.com/'
rurl = escape(url)
stripUrl = url + 'index.php/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.filibustercartoons\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.filibustercartoons\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name'
@ -83,8 +86,9 @@ class FirstWorldProblems(_BasicScraper):
class FlakyPastry(_BasicScraper):
url = 'http://flakypastry.runningwithpencils.com/index.php'
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php?strip_id=%s'
baseurl = 'http://flakypastry.runningwithpencils.com/'
url = baseurl + 'index.php'
stripUrl = baseurl + 'comic.php?strip_id=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
help = 'Index format: nnnn'
@ -101,27 +105,29 @@ class Flemcomics(_BasicScraper):
class Flipside(_BasicScraper):
url = 'http://flipside.keenspot.com/comic.php'
rurl = escape(url)
stripUrl = url + '?i=%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://flipside\.keenspot\.com/comic\.php\?i=\d+)', after="prev"))
prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl, after="prev"))
help = 'Index format: nnnn'
class FonFlatter(_BasicScraper):
url = 'http://www.fonflatter.de/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman'
lang = 'de'
imageSearch = compile(r'src="(http://www\.fonflatter\.de/\d+/fred_\d+-\d+-\d+[^"]+)')
prevSearch = compile(tagre("a", "href", r'(http://www\.fonflatter\.de/[^"]+)', after="prev"))
imageSearch = compile(r'src="(%s\d+/fred_\d+-\d+-\d+[^"]+)' % rurl)
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname'
def shouldSkipUrl(self, url):
return url in (
"http://www.fonflatter.de/2006/11/30/adventskalender/",
"http://www.fonflatter.de/2006/09/21/danke/",
"http://www.fonflatter.de/2006/08/23/zgf-zuweilen-gestellte-fragen/",
"http://www.fonflatter.de/2005/10/19/naq-never-asked-questions/",
self.stripUrl % "2006/11/30/adventskalender",
self.stripUrl % "2006/09/21/danke",
self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
self.stripUrl % "2005/10/19/naq-never-asked-questions",
)
@ -154,10 +160,11 @@ class FredoAndPidjin(_BasicScraper):
class FullFrontalNerdity(_BasicScraper):
url = 'http://ffn.nodwick.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '6'
imageSearch = compile(tagre("img", "src", r'(http://ffn\.nodwick\.com/ffnstrips/\d+-\d+-\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://ffn\.nodwick\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%sffnstrips/\d+-\d+-\d+\.[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
@ -11,19 +11,22 @@ from ..util import tagre
class Galaxion(_BasicScraper):
url = 'http://galaxioncomics.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://galaxioncomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://galaxioncomics\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: n-comic/book-n/chapter-n/title-nnn'
class Garanos(_BasicScraper):
url = 'http://garanos.alexheberling.com/pages/page-1/'
baseurl = 'http://garanos.alexheberling.com/'
rurl = escape(baseurl)
url = baseurl + 'pages/page-1/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="navi-last")))
stripUrl = 'http://garanos.alexheberling.com/pages/page-%s'
imageSearch = compile(tagre("img", "src", r'(http://garanos\.alexheberling\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="prev"))
compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl, after="navi-last")))
stripUrl = baseurl + 'pages/page-%s'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl, after="prev"))
help = 'Index format: n (unpadded)'
@ -70,9 +73,10 @@ class GlassHalfEmpty(_BasicScraper):
class GleefulNihilism(_BasicScraper):
url = 'http://gleefulnihilism.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s/'
imageSearch = compile(tagre("img", "src", r'(http://gleefulnihilism\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://gleefulnihilism\.com/comics/[^"]+)') + 'Previous')
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) + 'Previous')
help = 'Index format: yyyy/mm/dd/stripname'
@ -86,10 +90,11 @@ class Goats(_BasicScraper):
class GoblinsComic(_BasicScraper):
url = 'http://www.goblinscomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '06252005'
prevSearch = compile(tagre("a", "href", r'(http://www\.goblinscomic\.com/\d+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.goblinscomic\.com/comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.[^"]+)' % rurl))
help = 'Index format: ddmmyyyy'
@ -109,9 +114,10 @@ class GoneWithTheBlastwave(_BasicScraper):
class GrrlPower(_BasicScraper):
url = 'http://www.grrlpowercomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.grrlpowercomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.grrlpowercomic\.com/archives/\d+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="navi-prev"))
help = 'Index format: number'

View file

@ -1,6 +1,6 @@
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre, getPageContent, fetchUrls
from ..helpers import bounceStarter
@ -29,13 +29,14 @@ class HagarTheHorrible(_BasicScraper):
class HarkAVagrant(_BasicScraper):
url = 'http://www.harkavagrant.com/'
rurl = escape(url)
starter = bounceStarter(url,
compile(tagre("a", "href", r'(http://www\.harkavagrant\.com/index\.php\?id=\d+)') +
compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonnext.png")))
stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www.harkavagrant.com/[^"]+)', after='BORDER'))
prevSearch = compile(tagre("a", "href", r'(http://www\.harkavagrant\.com/index\.php\?id=\d+)') +
imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, after='BORDER'))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonprevious.png"))
help = 'Index format: number'
@ -48,18 +49,20 @@ class HarkAVagrant(_BasicScraper):
class HijinksEnsue(_BasicScraper):
url = 'http://hijinksensue.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://hijinksensue\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://hijinksensue\.com/\d+/\d+/\d+/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/name'
class Hipsters(_BasicScraper):
url = 'http://www.hipsters-comic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/08/hip01'
imageSearch = compile(tagre("img", "src", r'(http://www\.hipsters-comic\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.hipsters-comic\.com/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/dd/stripname'

View file

@ -2,25 +2,27 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
class IAmArg(_BasicScraper):
url = 'http://iamarg.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/05/08/05082011'
imageSearch = compile(tagre("img", "src", r'(http://iamarg\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://iamarg\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class IanJay(_BasicScraper):
url = 'http://ianjay.net/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://ianjay\.net/comics/\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://ianjay\.net/\?p=\d+)', after="Previous"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="Previous"))
help = 'Index foramt: nnn'

View file

@ -2,16 +2,17 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
class JackCannon(_BasicScraper):
url = 'http://fancyadventures.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://fancyadventures\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://fancyadventures\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/page-nnn'
@ -41,8 +42,9 @@ class JohnnyWander(_BasicScraper):
class JustAnotherEscape(_BasicScraper):
url = 'http://www.justanotherescape.com/'
rurl = escape(url)
stripUrl = url + 'index.cgi?date=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.justanotherescape\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.justanotherescape\.com//index\.cgi\?date=\d+)')
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sindex\.cgi\?date=\d+)' % rurl)
+ tagre("img", "alt", "Previous Comic"))
help = 'Index format: yyyymmdd'

View file

@ -2,19 +2,20 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
class KatzenfutterGeleespritzer(_BasicScraper):
url = 'http://www.katzenfuttergeleespritzer.de/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'dont-drink-and-drive'
imageSearch = (
compile(tagre("img", "src", r'(http://www\.katzenfuttergeleespritzer\.de/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')),
compile(tagre("img", "src", r'(http://www\.katzenfuttergeleespritzer\.de/wp-content/uploads/\d+/\d+/mmai_404[^"]+)')),
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/mmai_404[^"]+)' % rurl)),
)
prevSearch = compile(tagre("a", "href", r'(http://www.katzenfuttergeleespritzer.de/comic/[^"]+)', after="navi-prev"))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname'
lang = 'de'
@ -32,8 +33,9 @@ class KevinAndKell(_BasicScraper):
class Key(_BasicScraper):
url = 'http://key.shadilyn.com/latestpage.html'
stripUrl = 'http://key.shadilyn.com/pages/%s.html'
baseurl = 'http://key.shadilyn.com/'
url = baseurl + 'latestpage.html'
stripUrl = baseurl + 'pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
@ -41,16 +43,18 @@ class Key(_BasicScraper):
class KickInTheHead(_BasicScraper):
url = 'http://www.kickinthehead.org/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/03/20/ipod-envy'
imageSearch = compile(tagre("img", "src", r'(http://www\.kickinthehead\.org/kickinthehead3/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.kickinthehead\.org/\d+/\d+/\d+/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class KillerKomics(_BasicScraper):
url = 'http://www.killerkomics.com/web-comics/index_ang.cfm'
stripUrl = 'http://www.killerkomics.com/web-comics/%s.cfm'
baseurl = 'http://www.killerkomics.com/web-comics/'
url = baseurl + 'index_ang.cfm'
stripUrl = baseurl + '%s.cfm'
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
help = 'Index format: strip-name'
@ -75,8 +79,9 @@ class Krakow(_BasicScraper):
class Kukuburi(_BasicScraper):
url = 'http://www.kukuburi.com/current/'
stripUrl = 'http://www.kukuburi.com/v2/%s/'
baseurl = 'http://www.kukuburi.com/'
url = baseurl + 'current/'
stripUrl = baseurl + 'v2/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: yyyy/mm/dd/stripname'
@ -84,7 +89,8 @@ class Kukuburi(_BasicScraper):
class KuroShouri(_BasicScraper):
url = 'http://kuroshouri.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(http://kuroshouri\.com/wp-content/webcomic/kuroshouri/[^'\"]+)", quote="['\"]"))
prevSearch = compile(tagre("a", "href", r'(http://kuroshouri\.com/\?webcomic_post=[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r"(%swp-content/webcomic/kuroshouri/[^'\"]+)" % rurl, quote="['\"]"))
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic_post=[^"]+)' % rurl, after="previous"))
help = 'Index format: chapter-n-page-m'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
@ -10,27 +10,30 @@ from ..util import tagre
class LasLindas(_BasicScraper):
url = 'http://laslindas.katbox.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://laslindas\.katbox\.net/wp-content/uploads/[^"]+)', after="attachment-full"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="attachment-full"))
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(http://laslindas\.katbox\.net/comic/[^"]+)', after="previous"))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname'
class LeastICouldDo(_BasicScraper):
url = 'http://www.leasticoulddo.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.leasticoulddo\.com/wp-content/uploads/\d+/\d+/\d{8}\.\w{1,4})'))
prevSearch = compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="Previous"))
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="Previous"))
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="feature-comic")))
compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="feature-comic")))
help = 'Index format: yyyymmdd'
class Lint(_BasicScraper):
url = 'http://www.purnicellin.com/lint/'
rurl = escape(url)
stripUrl = url + '%s'
imageSearch = compile(r'<img src="(http://www.purnicellin.com/lint/comics/.+?)"')
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/num-name'
@ -45,18 +48,20 @@ class LittleGamers(_BasicScraper):
class LoadingArtist(_BasicScraper):
url = 'http://www.loadingartist.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/01/04/born'
imageSearch = compile(tagre("img", "src", r'(http://www\.loadingartist\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.loadingartist\.com/\d+/\d+/\d+/[^"]+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class LookingForGroup(_BasicScraper):
url = 'http://www.lfgcomic.com/'
rurl = escape(url)
stripUrl = url + 'page/%s/'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.lfgcomic\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.lfgcomic\.com/page/\d+/)', after="navtop-prev"))
starter = indirectStarter(url, compile(tagre("a", "href", r'(http://www\.lfgcomic\.com/page/\d+/)', after="feature-previous")))
prevSearch = compile(tagre("a", "href", r'(%spage/\d+/)' % rurl, after="navtop-prev"))
starter = indirectStarter(url, compile(tagre("a", "href", r'(%spage/\d+/)' % rurl, after="feature-previous")))
nameSearch = compile(r'/page/(\d+)/')
help = 'Index format: nnn'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
@ -26,9 +26,10 @@ class MagickChicks(_BasicScraper):
class ManlyGuysDoingManlyThings(_BasicScraper):
url = 'http://thepunchlineismachismo.com/'
rurl = escape(url)
stripUrl = url + 'archives/comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://thepunchlineismachismo\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://thepunchlineismachismo\.com/archives/comic/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/comic/[^"]+)' % rurl, after="previous"))
help = 'Index format: ddmmyyyy'
@ -66,9 +67,10 @@ class MegaTokyo(_BasicScraper):
class Meiosis(_BasicScraper):
url = 'http://meiosiswebcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://meiosiswebcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://meiosiswebcomic\.com/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/ddmmyyyy'
@ -91,9 +93,10 @@ class MenageA3(_BasicScraper):
class Melonpool(_BasicScraper):
url = 'http://www.melonpool.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.melonpool\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.melonpool\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n'
@ -107,12 +110,13 @@ class Misfile(_BasicScraper):
class MyCartoons(_BasicScraper):
url = 'http://mycartoons.de/'
rurl = escape(url)
stripUrl = url + 'page/%s'
imageSearch = (
compile(tagre("img", "src", r'(http://mycartoons\.de/wp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)')),
compile(tagre("img", "src", r'(http://mycartoons\.de/cartoons/[^"]+/\d+-\d+-\d+[^"]+)'))
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
)
prevSearch = compile(tagre("a", "href", r'(http://mycartoons\.de/page/[^"]+)') + "&laquo;")
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + "&laquo;")
help = 'Index format: number'
lang = 'de'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre
@ -23,17 +23,19 @@ class Namesake(_BasicScraper):
class NamirDeiter(_BasicScraper):
url = 'http://www.namirdeiter.com/'
rurl = escape(url)
stripUrl = url + 'comics/index.php?date=%s'
imageSearch = compile(tagre("img", "src", r"'?(http://www\.namirdeiter\.com/comics/\d+\.jpg)'?", quote=""))
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'")+"Previous")
help = 'Index format: yyyymmdd'
class Nedroid(_BasicScraper):
url = 'http://nedroid.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://nedroid\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://nedroid\.com/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name'
@ -62,28 +64,30 @@ class NewWorld(_BasicScraper):
class Nicky510(_BasicScraper):
url = 'http://www.nickyitis.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.nickyitis\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.nickyitis\.com/comic/[^"]+)', after="Previous"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="Previous"))
help = 'Index format: stripname'
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"')
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NekoTheKitty(_BasicScraper):
url = 'http://www.nekothekitty.net/'
rurl = escape(url)
stripUrl = url + 'comics/%s'
starter = bounceStarter(url, compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallnext.png')))
starter = bounceStarter(url, compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) +
tagre("img", "src", r'%sfiles/smallnext\.png' % rurl)))
imageSearch = compile(tagre("img", "src", r'(http://(?:img\d+|www)\.smackjeeves\.com/images/uploaded/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
prevSearch = compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) +
tagre("img", "src", r'%sfiles/smallprev\.png' % rurl))
help = 'Index format: n/n-name'
@ -100,35 +104,40 @@ class NichtLustig(_BasicScraper):
class Nnewts(_BasicScraper):
url = 'http://nnewts.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'nnewts-page-1'
imageSearch = compile(tagre("img", "src", r'(http://nnewts\.com/newty/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://nnewts\.com/(?:nnewts-)?page-\d+/)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%snewty/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s(?:nnewts-)?page-\d+/)' % rurl, after="navi-prev"))
help = 'Index format: page-number'
class Nodwick(_BasicScraper):
url = 'http://comic.nodwick.com/'
rurl = escape(url)
stripUrl = url + "?p=%s"
imageSearch = compile(tagre("img", "src", r'(http://comic\.nodwick\.com/nodwickstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://comic\.nodwick\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%snodwickstrips/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: stripnumber'
class NobodyScores(_BasicScraper):
url = 'http://nobodyscores.loosenutstudio.com/'
rurl = escape(url)
stripUrl = url + 'index.php?id=%s'
imageSearch = compile(tagre("img", "src", r'(http://nobodyscores\.loosenutstudio\.com/comix/[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%scomix/[^"]+)' % rurl))
multipleImagesPerStrip = True
prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>')
prevSearch = compile(r'<a href="(%sindex.php.+?)">the one before </a>' % rurl)
help = 'Index format: nnn'
class NoNeedForBushido(_BasicScraper):
url = 'http://noneedforbushido.com/latest/'
stripUrl = 'http://noneedforbushido.com/%s/'
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link"))
baseurl = 'http://noneedforbushido.com/'
rurl = escape(baseurl)
url = baseurl + 'latest/'
stripUrl = baseurl + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/comic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="previous-comic-link"))
help = 'Index format: yyyy/comic/nnn'

View file

@ -11,7 +11,8 @@ from ..scraper import _BasicScraper
class NineteenNinetySeven(_BasicScraper):
name = '1997'
url = 'http://www.1977thecomic.com/'
stripUrl = url + '%s'
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1977-comics/from-the-beginning-part-1'
imageSearch = compile(tagre("img", "src", r'(http://www\.1977thecomic\.com/comics-1977/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+"Previous")
help = 'Index format: yyyy/mm/dd/strip-name'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre, urlopen
@ -10,20 +10,22 @@ from ..util import tagre, urlopen
class OctopusPie(_BasicScraper):
url = 'http://www.octopuspie.com/'
rurl = escape(url)
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)') +
tagre("img", "src", r'http://www\.octopuspie\.com/junk/latest\.png')))
compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("img", "src", r'%sjunk/latest\.png' % rurl)))
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.octopuspie\.com/strippy/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%sstrippy/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class OddFish(_BasicScraper):
url = 'http://www.odd-fish.net/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.odd-fish\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.odd-fish\.net/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname'
@ -45,27 +47,30 @@ class Oglaf(_BasicScraper):
class OkCancel(_BasicScraper):
url = 'http://okcancel.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)'))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)'))
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd'
class OmakeTheater(_BasicScraper):
url = 'http://omaketheater.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://media\.omaketheater\.com/4koma/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://omaketheater\.com/comic/\d+/)', after="prev"))
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="prev"))
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://omaketheater\.com/comic/\d+/)')))
compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl)))
help = 'Index format: number (unpadded)'
class OnTheEdge(_BasicScraper):
url = 'http://ontheedgecomics.com/'
stripUrl = 'http://ontheedgecomics.com/comic/%s'
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
rurl = escape(url)
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
help = 'Index format: nnn (unpadded)'
@ -80,10 +85,11 @@ class OneQuestion(_BasicScraper):
class OrnerBoy(_BasicScraper):
url = 'http://www.orneryboy.com/'
rurl = escape(url)
stripUrl = url + 'index.php?comicID=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.orneryboy\.com/index\.php\?comicID=\d+)') +
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comicID=\d+)' % rurl) +
tagre("img", "src", r'images/prev_a\.gif'))
help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre
@ -10,9 +10,10 @@ from ..util import tagre
class PandyLand(_BasicScraper):
url = 'http://pandyland.net/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://pandyland\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://pandyland\.net/\d+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="prev"))
help = 'Index format: number'
@ -27,10 +28,11 @@ class ParadigmShift(_BasicScraper):
class ParallelUniversum(_BasicScraper):
url = 'http://www.paralleluniversum.net/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '001-der-comic-ist-tot'
imageSearch = compile(tagre("img", "src", r'(http://www\.paralleluniversum\.net/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.paralleluniversum\.net/[^"]+/)') +
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
tagre("span", "class", "prev"))
help = 'Index format: number-stripname'
lang = 'de'
@ -38,15 +40,17 @@ class ParallelUniversum(_BasicScraper):
class PartiallyClips(_BasicScraper):
url = 'http://partiallyclips.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class PastelDefender(_BasicScraper):
url = 'http://www.pasteldefender.com/coverbackcover.html'
stripUrl = 'http://www.pasteldefender.com/%s.html'
baseurl = 'http://www.pasteldefender.com/'
url = baseurl + 'coverbackcover.html'
stripUrl = baseurl + '%s.html'
imageSearch = compile(r'<IMG SRC="(images/.+?)" WIDTH="742"')
prevSearch = compile(r'<A HREF="([^"]+)"><IMG SRC="images/back\.gif"')
help = 'Index format: nnn'
@ -62,22 +66,24 @@ class PebbleVersion(_BasicScraper):
class PennyAndAggie(_BasicScraper):
url = 'http://www.pennyandaggie.com/'
rurl = escape(url)
stripUrl = url + 'index.php?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
starter = indirectStarter(url, prevSearch)
help = 'Index format: n (unpadded)'
class PennyArcade(_BasicScraper):
url = 'http://penny-arcade.com/comic/'
rurl = escape(url)
starter = bounceStarter(url,
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnNext"))
compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext"))
)
stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnPrev"))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev"))
help = 'Index format: yyyy/mm/dd'
@classmethod
@ -88,9 +94,10 @@ class PennyArcade(_BasicScraper):
class PeppermintSaga(_BasicScraper):
url = 'http://www.pepsaga.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pepsaga\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.pepsaga\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'
@ -107,17 +114,19 @@ class PHDComics(_BasicScraper):
class PicPakDog(_BasicScraper):
url = 'http://www.picpak.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.picpak\.net/wp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.picpak\.net/comic/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname'
class Pixel(_BasicScraper):
url = 'http://pixelcomic.net/'
rurl = escape(url)
stripUrl = url + '%s.php'
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(http://pixelcomic\.net/\d+\.php)', before="prev"))
prevSearch = compile(tagre("a", "href", r'(%s\d+\.php)' % rurl, before="prev"))
help = 'Index format: nnn'
@ -195,9 +204,10 @@ class ProperBarn(_BasicScraper):
class PunksAndNerds(_BasicScraper):
url = 'http://www.punksandnerds.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.punksandnerds\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.punksandnerds\.com/\?p=\d+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: nnn'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
@ -16,8 +16,10 @@ class QuestionableContent(_BasicScraper):
class Qwantz(_BasicScraper):
url = 'http://www.qwantz.com/index.php'
baseurl = 'http://www.qwantz.com/'
url = baseurl + 'index.php'
rurl = escape(baseurl)
stripUrl = url + '?comic=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.qwantz\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.qwantz\.com/index\.php\?comic=\d+)', before="prev"))
imageSearch = compile(tagre("img", "src", r'(%s/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comic=\d+)' % rurl, before="prev"))
help = 'Index format: n'

View file

@ -34,9 +34,10 @@ class RealmOfAtland(_BasicScraper):
class RedMeat(_BasicScraper):
url = 'http://www.redmeat.com/redmeat/current/index.html'
baseurl = 'http://www.redmeat.com/redmeat/'
url = baseurl + 'current/index.html'
starter = bounceStarter(url, compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
stripUrl = 'http://www.redmeat.com/redmeat/%s/index.html'
stripUrl = baseurl + '%s/index.html'
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
help = 'Index format: yyyy-mm-dd'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, MULTILINE, IGNORECASE, sub
from re import compile, escape, MULTILINE, IGNORECASE, sub
from os.path import splitext
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter
@ -11,9 +11,11 @@ from ..util import tagre
class SailorsunOrg(_BasicScraper):
url = 'http://sailorsun.org/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
rurl = escape(url)
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n (unpadded)'
@ -27,19 +29,21 @@ class SamAndFuzzy(_BasicScraper):
class SandraAndWoo(_BasicScraper):
url = 'http://www.sandraandwoo.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2000/01/01/welcome-to-sandra-and-woo'
imageSearch = compile(tagre("img", "src", r'(http://www\.sandraandwoo\.com/comics/\d+-\d+-\d+-[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.sandraandwoo\.com/\d+/\d+/\d+/[^"]+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname'
class SandraAndWooGerman(_BasicScraper):
url = 'http://www.sandraandwoo.com/woode/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2008/10/19/ein-ausgefuchster-waschbar'
imageSearch = compile(tagre("img", "src", r'(http://www\.sandraandwoo\.com/woode/comics/\d+-\d+-\d+-[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.sandraandwoo\.com/woode/\d+/\d+/\d+/[^"]+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname'
lang = 'de'
@ -54,9 +58,10 @@ class ScaryGoRound(_BasicScraper):
class ScenesFromAMultiverse(_BasicScraper):
url = 'http://amultiverse.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://amultiverse\.com/files/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://amultiverse\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
@ -79,9 +84,10 @@ class SchoolBites(_BasicScraper):
class Schuelert(_BasicScraper):
url = 'http://www.schuelert.de/'
rurl = escape(url)
stripUrl = None
imageSearch = compile(tagre("img", "src", r"(http://www.schuelert.de/wp-content/[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r'(http://www\.schuelert\.de/index\.php\?paged=\d+)') + "&laquo;")
imageSearch = compile(tagre("img", "src", r"(%swp-content/[^']+)" % rurl, quote="'"))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?paged=\d+)' % rurl) + "&laquo;")
multipleImagesPerStrip = True
help = 'Index format: none'
lang = 'de'
@ -89,10 +95,11 @@ class Schuelert(_BasicScraper):
class Science(_BasicScraper):
url = 'http://sci-ence.org/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'periodic-table-element-ass'
prevSearch = compile(tagre("a", "href", r'(http://sci-ence\.org/[^"]+/)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://sci-ence\.org/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: stripname'
description = u'A comic about science, technology, skepticism, geekery, video games, atheism, and more.'
@ -142,18 +149,20 @@ class Sheldon(_BasicScraper):
class Shivae(_BasicScraper):
url = 'http://shivae.net/'
rurl = escape(url)
stripUrl = url + 'blog/%s/'
imageSearch = compile(tagre("img", "src", r'(http://shivae\.net/files/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://shivae\.net/blog/[^"]+)', after="Previous"))
imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname'
# XXX disallowed by robots.txt
class _Shortpacked(_BasicScraper):
url = 'http://www.shortpacked.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.shortpacked\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.shortpacked\.com/\d+/comic/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/comic/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/comic/book-nn/mm-name1/name2'
@ -189,9 +198,10 @@ class SkinDeep(_BasicScraper):
class SlightlyDamned(_BasicScraper):
url = 'http://www.sdamned.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.sdamned\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.sdamned\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/number'
@ -251,14 +261,15 @@ class SnowFlakes(_BasicScraper):
class SnowFlame(_BasicScraper):
url = 'http://www.snowflamecomic.com/'
rurl = escape(url)
stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01')
imageSearch = compile(tagre("img", "src", r'(http://www\.snowflamecomic\.com/wp-content/uploads/\d+/\d+/[^"]+)', after="Snow[Ff]lame the fan made"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame the fan made"))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(http://www\.snowflamecomic\.com/\?comic=snowflame[^"]+)'))
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))
starter = bounceStarter(url,
compile(tagre("span", "class", "mininav-next") +
tagre("a", "href", r'(http://www\.snowflamecomic\.com/\?comic=snowflame[^"]+)')))
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
help = 'Index format: chapter-page'
def getStripIndexUrl(self, index):
@ -276,25 +287,29 @@ class SnowFlame(_BasicScraper):
class SodiumEyes(_BasicScraper):
url = 'http://sodiumeyes.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^ ]+)', quote=""))
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Sorcery101(_BasicScraper):
url = 'http://www.sorcery101.net/sorcery-101/'
baseurl = 'http://www.sorcery101.net/'
url = baseurl + 'sorcery-101/'
rurl = escape(baseurl)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.sorcery101\.net/wp-content/uploads/\d+/\d+/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.sorcery101\.net/sorcery-101/[^"]+)', after="previous-"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-"))
help = 'Index format: stripname'
class SpaceTrawler(_BasicScraper):
url = 'http://spacetrawler.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://spacetrawler\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://spacetrawler\.com/\d+/\d+/\d+/[^"]+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
@ -309,26 +324,29 @@ class SpareParts(_BasicScraper):
class Spinnerette(_BasicScraper):
url = 'http://www.spinnyverse.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.spinnyverse\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.spinnyverse\.com/[^"]+)', before="Previous Comic"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="Previous Comic"))
help = 'Index format: number'
class SPQRBlues(_BasicScraper):
url = 'http://spqrblues.com/IV/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://spqrblues\.com/IV/comics/\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(http://spqrblues\.com/IV/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'
# XXX disallowed by robots.txt
class _StationV3(_BasicScraper):
url = 'http://www.stationv3.com/'
rurl = escape(url)
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://www\.stationv3\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.stationv3\.com/d/\d+\.html)') +
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sd/\d+\.html)' % rurl) +
tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif'))
help = 'Index format: yyyymmdd'
@ -343,9 +361,10 @@ class StickyDillyBuns(_BasicScraper):
class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://stubblecomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://stubblecomics\.com/\?p=\d+)', after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: number'
@ -386,17 +405,19 @@ class StuffNoOneToldMe(_BasicScraper):
class StrawberryDeathCake(_BasicScraper):
url = 'http://strawberrydeathcake.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname'
class SuburbanTribe(_BasicScraper):
url = 'http://www.pixelwhip.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pixelwhip\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.pixelwhip\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: nnnn'
@ -430,9 +451,10 @@ class StarCrossdDestiny(_BasicScraper):
class Spamusement(_BasicScraper):
url = 'http://spamusement.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s'
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(http://spamusement.com/index.php/comics/view/.+?)">', IGNORECASE)
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, IGNORECASE)
help = 'Index format: n (unpadded)'
starter = indirectStarter(url, prevSearch)
@ -448,7 +470,8 @@ class _StrangeCandy(_BasicScraper):
class SupernormalStep(_BasicScraper):
url = 'http://supernormalstep.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://supernormalstep\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://supernormalstep\.com/\?p=\d+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
@ -34,12 +34,13 @@ class TheNoob(_BasicScraper):
class TheOrderOfTheStick(_BasicScraper):
url = 'http://www.giantitp.com/comics/oots0863.html'
stripUrl = 'http://www.giantitp.com/comics/oots%s.html'
baseurl = 'http://www.giantitp.com/'
url = baseurl + 'comics/oots0863.html'
stripUrl = baseurl + 'comics/oots%s.html'
imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)'
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"'))
starter = indirectStarter(baseurl, compile(r'<A href="(/comics/oots\d{4}\.html)"'))
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -47,8 +48,9 @@ class TheOrderOfTheStick(_BasicScraper):
class TheParkingLotIsFull(_BasicScraper):
url = 'http://plif.courageunfettered.com/archive/arch2002.htm'
stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm'
baseurl = 'http://plif.courageunfettered.com/'
url = baseurl + 'archive/arch2002.htm'
stripUrl = baseurl + 'archive/arch%s.htm'
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
multipleImagesPerStrip = True
prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
@ -65,19 +67,21 @@ class TheWotch(_BasicScraper):
class ThisIsIndexed(_BasicScraper):
url = 'http://thisisindexed.com/'
rurl = escape(url)
stripUrl = url + 'page/%s'
imageSearch = compile(tagre("img", "src", r'(http://thisisindexed\.com/wp-content/uploads/\d+/\d+/card[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/card[^"]+)' % rurl))
multipleImagesPerStrip = True
prevSearch = compile(tagre("div", "class", "nav-previous") +
tagre("a", "href", r'(http://thisisindexed\.com/page/\d+/)'))
tagre("a", "href", r'(%spage/\d+/)' % rurl))
help = 'Index format: number'
class ThunderAndLightning(_BasicScraper):
url = 'http://www.talcomic.com/wp/'
rurl = escape(url)
stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://www\.talcomic\.com/wp/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.talcomic\.com/wp/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%swp/[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%swp/comics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/page-nn'
@classmethod
@ -87,17 +91,19 @@ class ThunderAndLightning(_BasicScraper):
class TinyKittenTeeth(_BasicScraper):
url = 'http://www.tinykittenteeth.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
class ToonHole(_BasicScraper):
url = 'http://www.toonhole.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.toonhole\.com/comics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.toonhole\.com/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url):
@ -115,9 +121,10 @@ class _TwoLumps(_BasicScraper):
class TwoTwoOneFour(_BasicScraper):
url = 'http://www.nitrocosm.com/go/2214_classic/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/[^"]+)', before="gallery_display"))
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/2214_classic/\d+/)', after="Previous"))
prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="Previous"))
help = 'Index format: n (unpadded)'
@ -131,32 +138,37 @@ class TheWhiteboard(_BasicScraper):
class HMHigh(_BasicScraper):
name = 'TheFallenAngel/HMHigh'
url = 'http://www.thefallenangel.co.uk/hmhigh/'
baseurl = 'http://www.thefallenagel.co.uk/'
url = baseurl + 'hmhigh/'
rurl = escape(baseurl)
stripUrl = url + '?id=%s'
imageSearch = compile(r'<img src="(http://www.thefallenangel.co.uk/hmhigh/img/comic/.+?)"')
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)" title=".+?">Prev</a>')
imageSearch = compile(r'<img src="(%shmhigh/img/comic/.+?)"' % rurl)
prevSearch = compile(r' <a href="(%s.+?)" title=".+?">Prev</a>' % rurl)
help = 'Index format: nnn'
class TheOuterQuarter(_BasicScraper):
url = 'http://theouterquarter.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"')
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn'
class ThreePanelSoul(_BasicScraper):
url = 'http://threepanelsoul.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://threepanelsoul\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://threepanelsoul\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class TracyAndTristan(_BasicScraper):
url = 'http://tandt.thecomicseries.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s'
imageSearch = compile(tagre("img", "src", r'(http://tandt\.thecomicseries\.com/images/comics/[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(%simages/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+)', after="prev"))
help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
@ -11,8 +11,9 @@ from ..helpers import indirectStarter
class WapsiSquare(_BasicScraper):
url = 'http://wapsisquare.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"')
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: stripname'
@ -35,8 +36,8 @@ class WayfarersMoon(_BasicScraper):
class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/'
starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.webdesignerdepot\.com/\d+/\d+/[^"]+/)')))
rurl = escape(url)
starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = (
@ -46,7 +47,7 @@ class WebDesignerCOTW(_BasicScraper):
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
)
multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(http://www\.webdesignerdepot\.com/\d+/\d+/[^']+)", before='prev', quote="'"))
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'"))
help = 'Index format: yyyy/mm/stripname'
description = "The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist whos one of the most published, prolific and versatile cartoonists in the world today."
@ -63,24 +64,28 @@ class WebDesignerCOTW(_BasicScraper):
class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class WhiteNinja(_BasicScraper):
url = 'http://www.whiteninjacomics.com/comics.shtml'
stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml'
baseurl = 'http://www.whiteninjacomics.com/'
url = baseurl + 'comics.shtml'
stripUrl = baseurl + 'comics/%s.shtml'
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)'
class WhyTheLongFace(_BasicScraper):
url = 'http://www.absurdnotions.org/wtlf200709.html'
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
baseurl = 'http://www.absurdnotions.org/'
rurl = escape(baseurl)
url = baseurl + 'wtlf200709.html'
stripUrl = baseurl + 'wtlf%s.html'
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE)
multipleImagesPerStrip = True
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm'
@ -96,9 +101,10 @@ class Wigu(_BasicScraper):
class Wonderella(_BasicScraper):
url = 'http://nonadventures.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://nonadventures\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://nonadventures\.com/\d+/\d+/\d+/[^"]+)', after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name'
@ -198,8 +204,9 @@ class Wulffmorgenthaler(_BasicScraper):
class WhiteNoise(_BasicScraper):
url = 'http://www.wncomic.com/archive.php'
stripUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s'
baseurl = 'http://www.wncomic.com/'
url = baseurl + 'archive.php'
stripUrl = baseurl + 'archive_comments.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile
from re import compile, escape
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import bounceStarter
@ -10,9 +10,10 @@ from ..helpers import bounceStarter
class ZapComic(_BasicScraper):
url = 'http://www.zapcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.zapcomic\.com\?comic_object=\d+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.zapcomic\.com/[^"]+)', after="previous-comic-link"))
imageSearch = compile(tagre("img", "src", r'(%s\?comic_object=\d+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="previous-comic-link"))
help = 'Index format: yyyy/mm/nnn-stripname'
@ -41,9 +42,10 @@ class ZebraGirl(_BasicScraper):
class ZenPencils(_BasicScraper):
url = 'http://zenpencils.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '1-ralph-waldo-emerson-make-them-cry'
prevSearch = compile(tagre("a", "href", r'(http://zenpencils\.com/comic/[^"]+/)', after="navi-prev"))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+/)' % rurl, after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(http://maxcdn\.zenpencils\.com/comics/\d+-\d+-\d+[^"]+)'))
help = 'Index format: num-stripname'
description = u'Inspirational quotes from famous people adapted into cartoons.'
@ -59,13 +61,14 @@ class ZombieHunters(_BasicScraper):
class Zwarwald(_BasicScraper):
url = "http://www.zwarwald.de/"
rurl = escape(url)
stripUrl = url + 'index.php/page/%s/'
# anything before page 495 seems to be flash
firstStripUrl = stripUrl % '495'
lang = 'de'
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.zwarwald\.de|wp1163540.wp190.webpack.hosteurope.de/wordpress)/images/\d+/\d+/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.zwarwald\.de/index\.php/page/\d+/)') +
tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'"))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
tagre("img", "src", r'%simages/prev\.jpg' % rurl, quote="'"))
help = 'Index format: number'
waitSeconds = 1