Use re.escape and add some firstStripUrl.

This commit is contained in:
Bastian Kleineidam 2013-04-10 18:19:11 +02:00
parent fec6d92d8c
commit 5127d4c895
23 changed files with 582 additions and 369 deletions

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, MULTILINE from re import compile, escape, MULTILINE
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter from ..helpers import regexNamer, bounceStarter, indirectStarter
@ -11,14 +11,17 @@ from ..helpers import regexNamer, bounceStarter, indirectStarter
class AbleAndBaker(_BasicScraper): class AbleAndBaker(_BasicScraper):
url = 'http://www.jimburgessdesign.com/comics/index.php' url = 'http://www.jimburgessdesign.com/comics/index.php'
stripUrl = url + '?comic=%s' stripUrl = url + '?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(comics/.+)')) imageSearch = compile(tagre('img', 'src', r'(comics/.+)'))
prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif') prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif')
help = 'Index format: nnn' help = 'Index format: nnn'
class AbsurdNotions(_BasicScraper): class AbsurdNotions(_BasicScraper):
url = 'http://www.absurdnotions.org/page129.html' baseurl = 'http://www.absurdnotions.org/'
stripUrl = 'http://www.absurdnotions.org/page%s.html' url = baseurl + 'page129.html'
stripUrl = baseurl + 'page%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
@ -27,11 +30,12 @@ class AbsurdNotions(_BasicScraper):
class AbstruseGoose(_BasicScraper): class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/' url = 'http://abstrusegoose.com/'
starter = bounceStarter(url, rurl = escape(url)
compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)')+"Next »")) starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) firstStripUrl = stripUrl % '1'
prevSearch = compile(tagre('a', 'href', r'(http://abstrusegoose\.com/\d+)') + r'&laquo; Previous</a>') imageSearch = compile(tagre('img', 'src', r'(%sstrips/[^<>"]+)' % rurl))
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous</a>')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod @classmethod
@ -44,6 +48,7 @@ class AbstruseGoose(_BasicScraper):
class AcademyVale(_BasicScraper): class AcademyVale(_BasicScraper):
url = 'http://www.imagerie.com/vale/' url = 'http://www.imagerie.com/vale/'
stripUrl = url + 'avarch.cgi?%s' stripUrl = url + 'avarch.cgi?%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -52,7 +57,7 @@ class AcademyVale(_BasicScraper):
class AhoiPolloi(_BasicScraper): class AhoiPolloi(_BasicScraper):
url = 'http://ahoipolloi.blogger.de/' url = 'http://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s' stripUrl = url + '?day=%s'
firstStripUrl = stripUrl % '20060305' firstStripUrl = stripUrl % '20060306'
multipleImagesPerStrip = True multipleImagesPerStrip = True
lang = 'de' lang = 'de'
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)')) imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)'))
@ -69,29 +74,33 @@ class ALessonIsLearned(_BasicScraper):
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s' stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
help = 'Index format: nnn' help = 'Index format: nnn'
class Alice(_BasicScraper): class Alice(_BasicScraper):
url = 'http://alice.alicecomics.com/' url = 'http://alice.alicecomics.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://alice\.alicecomics\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://alice\.alicecomics\.com/alicecomics/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%salicecomics/[^"]+)' % rurl, after="previous"))
help = 'Index format: name' help = 'Index format: name'
class AlienLovesPredator(_BasicScraper): class AlienLovesPredator(_BasicScraper):
url = 'http://alienlovespredator.com/' url = 'http://alienlovespredator.com/'
stripUrl = url + '%s' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name/' help = 'Index format: yyyy/mm/dd/name'
class AlphaLuna(_BasicScraper): class AlphaLuna(_BasicScraper):
url = 'http://www.alphaluna.net/' url = 'http://www.alphaluna.net/'
stripUrl = url + 'issue-%s/' stripUrl = url + 'issue-%s/'
firstStripUrl = stripUrl % '1/cover'
imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)')) imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev"))
help = 'Index format: issue/page (e.g. 4/05)' help = 'Index format: issue/page (e.g. 4/05)'
@ -102,11 +111,13 @@ class AlphaLunaSpanish(AlphaLuna):
lang = 'es' lang = 'es'
url = 'http://alphaluna.net/spanish/' url = 'http://alphaluna.net/spanish/'
stripUrl = url + 'issue-%s/' stripUrl = url + 'issue-%s/'
firstStripUrl = stripUrl % '1/portada'
class Altermeta(_BasicScraper): class Altermeta(_BasicScraper):
url = 'http://altermeta.net/' url = 'http://altermeta.net/'
stripUrl = url + 'archive.php?comic=%s' stripUrl = url + 'archive.php?comic=%s'
firstStripUrl = stripUrl % '0'
imageSearch = compile(r'<img src="(comics/[^"]+)" />') imageSearch = compile(r'<img src="(comics/[^"]+)" />')
prevSearch = compile(r'<a href="([^"]+)"><img src="http://altermeta\.net/template/default/images/sasha/back\.png') prevSearch = compile(r'<a href="([^"]+)"><img src="http://altermeta\.net/template/default/images/sasha/back\.png')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -115,14 +126,17 @@ class Altermeta(_BasicScraper):
class AltermetaOld(Altermeta): class AltermetaOld(Altermeta):
url = 'http://altermeta.net/oldarchive/index.php' url = 'http://altermeta.net/oldarchive/index.php'
stripUrl = 'http://altermeta.net/oldarchive/archive.php?comic=%s' stripUrl = 'http://altermeta.net/oldarchive/archive.php?comic=%s'
firstStripUrl = stripUrl % '0'
prevSearch = compile(r'<a href="([^"]+)">Back') prevSearch = compile(r'<a href="([^"]+)">Back')
class AmazingSuperPowers(_BasicScraper): class AmazingSuperPowers(_BasicScraper):
url = 'http://www.amazingsuperpowers.com/' url = 'http://www.amazingsuperpowers.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.amazingsuperpowers\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '2007/09/heredity'
prevSearch = compile(tagre("a", "href", r'(http://www\.amazingsuperpowers\.com/[^"]+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name' help = 'Index format: yyyy/mm/name'
@ -136,15 +150,18 @@ class Angels2200(_BasicScraper):
class Antics(_BasicScraper): class Antics(_BasicScraper):
url = 'http://www.anticscomic.com/' url = 'http://www.anticscomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.anticscomic\.com/comics/\d+-\d+-\d+[^"]+)')) firstStripUrl = stripUrl % '3'
prevSearch = compile(tagre("a", "href", r'(http://www\.anticscomic\.com/\?p=\d+)', after='prev')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev'))
help = 'Index format: number' help = 'Index format: number'
class AppleGeeks(_BasicScraper): class AppleGeeks(_BasicScraper):
url = 'http://www.applegeeks.com/' url = 'http://www.applegeeks.com/'
stripUrl = url + 'comics/viewcomic.php?issue=%s' stripUrl = url + 'comics/viewcomic.php?issue=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)'))
prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE) prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE)
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -153,6 +170,7 @@ class AppleGeeks(_BasicScraper):
class Achewood(_BasicScraper): class Achewood(_BasicScraper):
url = 'http://www.achewood.com/' url = 'http://www.achewood.com/'
stripUrl = url + 'index.php?date=%s' stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '00000000'
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous"))
help = 'Index format: mmddyyyy' help = 'Index format: mmddyyyy'
@ -162,6 +180,7 @@ class Achewood(_BasicScraper):
class ASofterWorld(_BasicScraper): class ASofterWorld(_BasicScraper):
url = 'http://www.asofterworld.com/' url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' + imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' +
tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)')) tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)'))
prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back') prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back')
@ -169,10 +188,12 @@ class ASofterWorld(_BasicScraper):
class AstronomyPOTD(_BasicScraper): class AstronomyPOTD(_BasicScraper):
url = 'http://antwrp.gsfc.nasa.gov/apod/astropix.html' baseurl = 'http://antwrp.gsfc.nasa.gov/apod/'
url = baseurl + 'astropix.html'
starter = bounceStarter(url, starter = bounceStarter(url,
compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>")) compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&gt;</a>"))
stripUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html' stripUrl = baseurl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)'))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>") prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "&lt;</a>")
@ -202,7 +223,8 @@ class AfterStrife(_BasicScraper):
class ALLCAPS(_BasicScraper): class ALLCAPS(_BasicScraper):
url = 'http://www.allcapscomix.com/' url = 'http://www.allcapscomix.com/'
stripUrl = url + '%s' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2008/08/welcome-to-all-caps'
imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous</a>") prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous</a>")
help = 'Index format: yyyy/mm/strip-name' help = 'Index format: yyyy/mm/strip-name'
@ -211,6 +233,7 @@ class ALLCAPS(_BasicScraper):
class ASkeweredParadise(_BasicScraper): class ASkeweredParadise(_BasicScraper):
url = 'http://aspcomics.net/' url = 'http://aspcomics.net/'
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
help = 'Index format: nnn' help = 'Index format: nnn'
@ -221,6 +244,7 @@ class AGirlAndHerFed(_BasicScraper):
starter = bounceStarter(url, starter = bounceStarter(url,
compile(r'<a href="([^"]+)">[^>]+Back')) compile(r'<a href="([^"]+)">[^>]+Back'))
stripUrl = url + '1.%s.html' stripUrl = url + '1.%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
prevSearch = compile(r'<a href="([^"]+)">[^>]+Back') prevSearch = compile(r'<a href="([^"]+)">[^>]+Back')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -229,6 +253,7 @@ class AGirlAndHerFed(_BasicScraper):
class AetheriaEpics(_BasicScraper): class AetheriaEpics(_BasicScraper):
url = 'http://aetheria-epics.schala.net/' url = 'http://aetheria-epics.schala.net/'
stripUrl = url + '%s.html' stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '00001'
imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous")
help = 'Index format: nnn' help = 'Index format: nnn'
@ -236,10 +261,11 @@ class AetheriaEpics(_BasicScraper):
class AirForceBlues(_BasicScraper): class AirForceBlues(_BasicScraper):
url = 'http://www.afblues.com/' url = 'http://www.afblues.com/'
stripUrl = url + 'wordpress/%s' stripUrl = url + 'wordpress/%s/'
firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya'
imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous'))
help = 'Index format: yyyy/mm/dd/name/' help = 'Index format: yyyy/mm/dd/stripname'
class AlienShores(_BasicScraper): class AlienShores(_BasicScraper):
@ -252,23 +278,27 @@ class AlienShores(_BasicScraper):
class AllTheGrowingThings(_BasicScraper): class AllTheGrowingThings(_BasicScraper):
url = 'http://growingthings.typodmary.com/' url = 'http://growingthings.typodmary.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://growingthings\.typodmary\.com/files/comics/[^"]+)')) firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things'
prevSearch = compile(tagre("a", "href", r'(http://growingthings\.typodmary\.com/[^"]+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'
class Amya(_BasicScraper): class Amya(_BasicScraper):
url = 'http://www.amyachronicles.com/' url = 'http://www.amyachronicles.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.amyachronicles\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.amyachronicles\.com/archives/\d+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="Previous"))
help = 'Index format: n' help = 'Index format: n'
class Angband(_BasicScraper): class Angband(_BasicScraper):
url = 'http://angband.calamarain.net/' url = 'http://angband.calamarain.net/'
stripUrl = url + 'view.php?date=%s' stripUrl = url + 'view.php?date=%s'
firstStripUrl = stripUrl % '2005-12-30'
imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous")
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
@ -276,9 +306,10 @@ class Angband(_BasicScraper):
class AlsoBagels(_BasicScraper): class AlsoBagels(_BasicScraper):
url = 'http://alsobagels.com/' url = 'http://alsobagels.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comic/%s/' stripUrl = url + 'index.php/comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://alsobagels\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://alsobagels\.com/index\.php/comic/[^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous"))
help = 'Index format: strip-name' help = 'Index format: strip-name'
@ -292,10 +323,12 @@ class Annyseed(_BasicScraper):
class AxeCop(_BasicScraper): class AxeCop(_BasicScraper):
url = 'http://axecop.com/' url = 'http://axecop.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/episode_\d+/)'))) rurl = escape(url)
starter = indirectStarter(url,
compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/episode_\d+/)' % rurl)))
stripUrl = url + 'index.php/acepisodes/read/%s/' stripUrl = url + 'index.php/acepisodes/read/%s/'
firstStripUrl = stripUrl % 'episode_0' firstStripUrl = stripUrl % 'episode_0'
imageSearch = compile(tagre("img", "src", r'(http://axecop\.com/images/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%simages/uploads/(?:axecop|AXE-COP|acmarried|nightmonster)[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://axecop\.com/index\.php/acepisodes/read/[^"]+)') + prevSearch = compile(tagre("a", "href", r'(%sindex\.php/acepisodes/read/[^"]+)' % rurl) +
tagre("img", "src", r'http://axecop\.com/acimages/buttons/page_left\.png')) tagre("img", "src", r'http://axecop\.com/acimages/buttons/page_left\.png'))
help = 'Index format: stripname' help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -20,6 +20,7 @@ class BackwaterPlanet(_BasicScraper):
class BadassMuthas(_BasicScraper): class BadassMuthas(_BasicScraper):
url = 'http://badassmuthas.com/pages/comic.php' url = 'http://badassmuthas.com/pages/comic.php'
stripUrl = url + '?%s' stripUrl = url + '?%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -28,6 +29,7 @@ class BadassMuthas(_BasicScraper):
class BadMachinery(_BasicScraper): class BadMachinery(_BasicScraper):
url = 'http://scarygoround.com/' url = 'http://scarygoround.com/'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '20090918'
imageSearch = compile(tagre("img", "src", r'(strips/\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(strips/\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + 'Previous') prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + 'Previous')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -35,32 +37,38 @@ class BadMachinery(_BasicScraper):
class Bardsworth(_BasicScraper): class Bardsworth(_BasicScraper):
url = 'http://www.bardsworth.com/' url = 'http://www.bardsworth.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.bardsworth\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '750'
prevSearch = compile(tagre("a", "href", r'(http://www\.bardsworth\.com/[^"]+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: nnn' help = 'Index format: nnn'
class Baroquen(_BasicScraper): class Baroquen(_BasicScraper):
url = 'http://www.baroquencomics.com/' url = 'http://www.baroquencomics.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.baroquencomics\.com/Comics/[^"]+)')) firstStripUrl = stripUrl % '2008/11/05/raise-the-curtains'
prevSearch = compile(tagre("a", "href", r'(http://www\.baroquencomics\.com/[^"]+)', after='prev')) imageSearch = compile(tagre("img", "src", r'(%sComics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='prev'))
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'
class Bearmageddon(_BasicScraper): class Bearmageddon(_BasicScraper):
url = 'http://bearmageddon.com/' url = 'http://bearmageddon.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/08/01/page-1' firstStripUrl = stripUrl % '2011/08/01/page-1'
imageSearch = compile(tagre("img", "src", r'(http://bearmageddon\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://bearmageddon\.com/\d+/\d+/\d+/[^"]+)', after='navi-prev')) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev'))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class BetterDays(_BasicScraper): class BetterDays(_BasicScraper):
url = 'http://jaynaylor.com/betterdays/' url = 'http://jaynaylor.com/betterdays/'
stripUrl = url + 'archives/%s.html' stripUrl = url + 'archives/%s.html'
firstStripUrl = stripUrl % '2003/04/post-2'
imageSearch = compile(tagre("img", "src", r'(/betterdays/comic/[^>]+)', quote="")) imageSearch = compile(tagre("img", "src", r'(/betterdays/comic/[^>]+)', quote=""))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&laquo; Previous') prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&laquo; Previous')
help = 'Index format: yyyy/mm/<your guess>' help = 'Index format: yyyy/mm/<your guess>'
@ -68,9 +76,10 @@ class BetterDays(_BasicScraper):
class BetweenFailures(_BasicScraper): class BetweenFailures(_BasicScraper):
url = 'http://betweenfailures.com/' url = 'http://betweenfailures.com/'
rurl = escape(url)
stripUrl = url + 'archives/archive/%s' stripUrl = url + 'archives/archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://betweenfailures\.com/wp-content/webcomic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://betweenfailures\.com/archives/archive/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchives/archive/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripnum-strip-name' help = 'Index format: stripnum-strip-name'
@ -85,6 +94,7 @@ class BigFatWhale(_BasicScraper):
class BiggerThanCheeses(_BasicScraper): class BiggerThanCheeses(_BasicScraper):
url = 'http://www.biggercheese.com/' url = 'http://www.biggercheese.com/'
stripUrl = url + 'index.php?comic=%s' stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'src="(comics/.+?)" alt') imageSearch = compile(r'src="(comics/.+?)" alt')
prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back') prevSearch = compile(r'"(index.php\?comic=.+?)".+?_back')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -92,15 +102,18 @@ class BiggerThanCheeses(_BasicScraper):
class BillyTheDunce(_BasicScraper): class BillyTheDunce(_BasicScraper):
url = 'http://www.duncepress.com/' url = 'http://www.duncepress.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.duncepress\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '2009/06/an-introduction-of-sorts'
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.duncepress.com/[^"]+)" rel="prev">') imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/strip-name' prevSearch = compile(r'<div class="nav-previous"><a href="(%s[^"]+)" rel="prev">' % rurl)
help = 'Index format: yyyy/mm/stripname'
class BizarreUprising(_BasicScraper): class BizarreUprising(_BasicScraper):
url = 'http://www.bizarreuprising.com/' url = 'http://www.bizarreuprising.com/'
stripUrl = url + 'view/%s' stripUrl = url + 'view/%s'
firstStripUrl = stripUrl % '1/awakening-splash'
imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif')) prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif'))
help = 'Index format: n/name' help = 'Index format: n/name'
@ -108,15 +121,17 @@ class BizarreUprising(_BasicScraper):
class BlankIt(_BasicScraper): class BlankIt(_BasicScraper):
url = 'http://blankitcomics.com/' url = 'http://blankitcomics.com/'
stripUrl = url + '%s' stripUrl = url + 'blankit-%s'
firstStripUrl = stripUrl % '0001'
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: number'
class Blip(_BasicScraper): class Blip(_BasicScraper):
url = 'http://blipcomic.com/' url = 'http://blipcomic.com/'
stripUrl = url + 'index.php?strip_id=%s' stripUrl = url + 'index.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(istrip_files/strips/.+?)"') imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev') prevSearch = compile(r'First.+?"(index.php\?strip_id=.+?)".+?prev')
help = 'Index format: n' help = 'Index format: n'
@ -129,9 +144,11 @@ class Blip(_BasicScraper):
class BloodBound(_BasicScraper): class BloodBound(_BasicScraper):
url = 'http://bloodboundcomic.com/' url = 'http://bloodboundcomic.com/'
stripUrl = url + '%s' rurl = escape(url)
imageSearch = compile(tagre("img", "src", r'(http://bloodboundcomic\.com/comics/[^"]+)')) stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://bloodboundcomic\.com/[^"]+)', after="prev")) firstStripUrl = stripUrl % '2006/06/06112006'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/name' help = 'Index format: yyyy/mm/name'
@ -146,6 +163,7 @@ class BlueCrashKit(_BasicScraper):
class BMovieComic(_BasicScraper): class BMovieComic(_BasicScraper):
url = 'http://www.bmoviecomic.com/' url = 'http://www.bmoviecomic.com/'
stripUrl = url + '?cid=%s' stripUrl = url + '?cid=%s'
firstStripUrl = stripUrl % '8'
imageSearch = compile(r'"(comics/.+?)"') imageSearch = compile(r'"(comics/.+?)"')
prevSearch = compile(r'(\?cid=.+?)".+?Prev') prevSearch = compile(r'(\?cid=.+?)".+?Prev')
help = 'Index format: n' help = 'Index format: n'
@ -171,6 +189,7 @@ class BratHalla(_BasicScraper):
class BrentalFloss(_BasicScraper): class BrentalFloss(_BasicScraper):
url = 'http://brentalflossthecomic.com/' url = 'http://brentalflossthecomic.com/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)')) imageSearch = compile(tagre("img", "src", r'([^"]*/img/comic/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev") prevSearch = compile(tagre("a", "href", r'([^"]*)') + "Prev")
help = 'Index format: n' help = 'Index format: n'
@ -185,6 +204,7 @@ class BrentalFlossFit(BrentalFloss):
name = 'BrentalFloss/FlossedInTime' name = 'BrentalFloss/FlossedInTime'
url = 'http://brentalflossthecomic.com/fit/' url = 'http://brentalflossthecomic.com/fit/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
@classmethod @classmethod
def prevUrlModifier(cls, prevUrl): def prevUrlModifier(cls, prevUrl):
@ -201,37 +221,43 @@ class BrentalFlossGuest(BrentalFloss):
name = 'BrentalFloss/GuestComics' name = 'BrentalFloss/GuestComics'
url = 'http://brentalflossthecomic.com/guestcomics/' url = 'http://brentalflossthecomic.com/guestcomics/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
fristStripUrl = stripUrl % '1'
# XXX disallowed by robots.txt # XXX disallowed by robots.txt
class _BringBackRoomies(_BasicScraper): class _BringBackRoomies(_BasicScraper):
url = "http://www.bringbackroomies.com/" url = "http://www.bringbackroomies.com/"
rurl = escape(url)
stripUrl = url + "comic/%s" stripUrl = url + "comic/%s"
imageSearch = compile(tagre("img", "src", r'(http://www\.bringbackroomies\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("span", "class", "mininav-prev") + prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(http://www\.bringbackroomies\.com/comic/[^"]+)')) tagre("a", "href", r'(%scomic/[^"]+)' % rurl))
help = 'Index format: stripname' help = 'Index format: stripname'
class Brink(_BasicScraper): class Brink(_BasicScraper):
url = 'http://paperfangs.com/brink/' url = 'http://paperfangs.com/brink/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://paperfangs\.com/brink/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://paperfangs\.com/brink/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: n' help = 'Index format: number'
class BobWhite(_BasicScraper): class BobWhite(_BasicScraper):
url = 'http://www.bobwhitecomics.com/' url = 'http://www.bobwhitecomics.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s' stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(http://www\.bobwhitecomics\.com/wp/wp-content/webcomic/untitled/\d+.jpg)")) firstStripUrl = stripUrl % '20110504'
prevSearch = compile(tagre("a", "href", "(http://www\.bobwhitecomics\.com/\?webcomic_post=\d+)")+r'[^"]+Previous') imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class BoredAndEvil(_BasicScraper): class BoredAndEvil(_BasicScraper):
url = 'http://www.boredandevil.com/' url = 'http://www.boredandevil.com/'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '2004-06-07'
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif') prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
@ -241,6 +267,7 @@ class BoredAndEvil(_BasicScraper):
class BoxerHockey(_BasicScraper): class BoxerHockey(_BasicScraper):
url = 'http://boxerhockey.fireball20xl.com/' url = 'http://boxerhockey.fireball20xl.com/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '56'
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg")) imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg"))
prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') + prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') +
r'[^>]+Previous') r'[^>]+Previous')
@ -255,6 +282,7 @@ class BoxerHockey(_BasicScraper):
class BoyOnAStickAndSlither(_BasicScraper): class BoyOnAStickAndSlither(_BasicScraper):
url = 'http://www.boasas.com/' url = 'http://www.boasas.com/'
stripUrl = url + 'page/%s' stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)')) imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page") prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page")
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -268,6 +296,7 @@ class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/' baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137' url = baseUrl + '?comic=137'
stripUrl = baseUrl + '?comic=%s' stripUrl = baseUrl + '?comic=%s'
fristStripUrl = stripUrl % '0'
imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'")) imageSearch = compile(tagre("img", "src", r"(comic/[^']+)", quote="'"))
prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'') prevSearch = compile(r'<div id=\'navback\'><a href=\'(\?comic\=\d+)\'><img src=\'images/previous.png\'')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -275,9 +304,11 @@ class BrightlyWound(_BasicScraper):
class BroodHollow(_BasicScraper): class BroodHollow(_BasicScraper):
url = 'http://broodhollow.chainsawsuit.com/' url = 'http://broodhollow.chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://broodhollow\.chainsawsuit\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '2012/10/08/broodhollow'
prevSearch = compile(tagre("a", "href", r'(http://broodhollow\.chainsawsuit\.com/\d+/\d+/\d+/[^"]+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -292,15 +323,19 @@ class _ButtercupFestival(_BasicScraper):
class ButterSafe(_BasicScraper): class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/' url = 'http://buttersafe.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://buttersafe\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
prevSearch = compile(tagre("a", "href", r'(http://buttersafe\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class ButternutSquash(_BasicScraper): class ButternutSquash(_BasicScraper):
url = 'http://www.butternutsquash.net/' url = 'http://www.butternutsquash.net/'
stripUrl = url + '%s' rurl = escape(url)
imageSearch = compile(tagre("img", "src", r'(http://www\.butternutsquash\.net/comics/[^"]+)')) stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://www\.butternutsquash\.net/[^"]+)', after="prev")) firstStripUrl = stripUrl % '2003/04/16/meet-da-punks'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name-author-name' help = 'Index format: yyyy/mm/dd/strip-name-author-name'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import bounceStarter, indirectStarter
@ -11,17 +11,20 @@ from ..util import tagre
class Caggage(_BasicScraper): class Caggage(_BasicScraper):
url = 'http://caggagecomic.com/' url = 'http://caggagecomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://caggagecomic\.com/comics/[^"]+)')) firstStripUrl = stripUrl % '77'
prevSearch = compile(tagre("a", "href", r'(http://caggagecomic\.com/archives/\d+)', after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'
class CaptainSNES(_BasicScraper): class CaptainSNES(_BasicScraper):
url = 'http://www.captainsnes.com/' url = 'http://www.captainsnes.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r"(http://www\.captainsnes\.com/comics/[^']+)", quote="'")) imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, quote="'"))
prevSearch = compile(tagre("a", "href", r'(http://www\.captainsnes\.com/[^"]+)') + tagre("span", "class", "prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("span", "class", "prev"))
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/dd/nnn-stripname' help = 'Index format: yyyy/mm/dd/nnn-stripname'
@ -29,6 +32,7 @@ class CaptainSNES(_BasicScraper):
class CaseyAndAndy(_BasicScraper): class CaseyAndAndy(_BasicScraper):
url = 'http://www.galactanet.com/comic/' url = 'http://www.galactanet.com/comic/'
stripUrl = url + 'view.php?strip=%s' stripUrl = url + 'view.php?strip=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)')) imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)')
+ tagre("img", "src", r'previous\.gif')) + tagre("img", "src", r'previous\.gif'))
@ -37,10 +41,12 @@ class CaseyAndAndy(_BasicScraper):
class Catalyst(_BasicScraper): class Catalyst(_BasicScraper):
baseUrl = "http://catalyst.spiderforest.com/" baseUrl = "http://catalyst.spiderforest.com/"
rurl = escape(baseUrl)
url = baseUrl + "comic.php?comic_id=415" url = baseUrl + "comic.php?comic_id=415"
stripUrl = baseUrl + "comic.php?comic_id=%s" stripUrl = baseUrl + "comic.php?comic_id=%s"
imageSearch = compile(tagre("img", "src", r'((?:http://catalyst\.spiderforest\.com/)?comics/[^"]+)')) firstStripUrl = stripUrl % '1'
prevSearch = compile("<center>" + tagre("a", "href", r'(http://catalyst\.spiderforest\.com/comic\.php\?comic_id=\d+)')) imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl))
prevSearch = compile("<center>" + tagre("a", "href", r'(%scomic\.php\?comic_id=\d+)' % rurl))
help = 'Index format: number' help = 'Index format: number'
@ -54,17 +60,19 @@ class Catena(_BasicScraper):
class ChainsawSuit(_BasicScraper): class ChainsawSuit(_BasicScraper):
url = 'http://chainsawsuit.com/' url = 'http://chainsawsuit.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://chainsawsuit\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://chainsawsuit\.com/\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class ChannelAte(_BasicScraper): class ChannelAte(_BasicScraper):
url = 'http://www.channelate.com/' url = 'http://www.channelate.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.channelate\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.channelate\.com/\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -126,25 +134,28 @@ class Comedity(_BasicScraper):
class Commissioned(_BasicScraper): class Commissioned(_BasicScraper):
url = 'http://www.commissionedcomic.com/' url = 'http://www.commissionedcomic.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.commissionedcomic\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.commissionedcomic\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n' help = 'Index format: n'
class Concession(_BasicScraper): class Concession(_BasicScraper):
url = 'http://concessioncomic.com/' url = 'http://concessioncomic.com/'
rurl = escape(url)
stripUrl = url + 'index.php?pid=%s' stripUrl = url + 'index.php?pid=%s'
imageSearch = compile(tagre("img", "src", r'(http://concessioncomic\.com/comics/[^"]+)', after="Comic")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl, after="Comic"))
prevSearch = compile(tagre("a", "href", r'(http://concessioncomic\.com/index\.php\?pid=\d+)', after="nav-prev")) prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?pid=\d+)' % rurl, after="nav-prev"))
help = 'Index format: number' help = 'Index format: number'
class CoolCatStudio(_BasicScraper): class CoolCatStudio(_BasicScraper):
url = 'http://www.coolcatstudio.com/' url = 'http://www.coolcatstudio.com/'
rurl = escape(url)
stripUrl = url + 'strips-cat/ccs%s' stripUrl = url + 'strips-cat/ccs%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.coolcatstudio\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="prev")) prevSearch = compile(tagre("a", "href", r'(%sstrips-cat/[^"]+)' % rurl, before="prev"))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -233,23 +244,27 @@ class CrimsonDark(_BasicScraper):
class CatsAndCameras(_BasicScraper): class CatsAndCameras(_BasicScraper):
url = 'http://catsncameras.com/cnc/' url = 'http://catsncameras.com/cnc/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"') imageSearch = compile(tagre("img", "src", r'(%scnc/comics/[^"]+)' % rurl))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">') prevSearch = compile(tagre("div", "class", r'nav-previous') +
tagre("a", "href", r'(%scnc/[^"]+)' % rurl))
help = 'Index format: nnn' help = 'Index format: nnn'
class CowboyJedi(_BasicScraper): class CowboyJedi(_BasicScraper):
url = 'http://www.cowboyjedi.com/' url = 'http://www.cowboyjedi.com/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.cowboyjedi.\com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(r'<a href="(http://www.cowboyjedi.com/.+?)" class="navi navi-prev"') prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'
class CasuallyKayla(_BasicScraper): class CasuallyKayla(_BasicScraper):
url = 'http://casuallykayla.com/' url = 'http://casuallykayla.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '89'
imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)'))
prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)')) prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -257,9 +272,10 @@ class CasuallyKayla(_BasicScraper):
class Collar6(_BasicScraper): class Collar6(_BasicScraper):
url = 'http://collar6.com/' url = 'http://collar6.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s' stripUrl = url + 'archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://collar6\.com/wp-content/webcomic/collar6/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://collar6\.com/archive/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
help = 'Index format: <name>' help = 'Index format: <name>'
@ -272,11 +288,13 @@ class Chester5000XYV(_BasicScraper):
class Champ2010(_BasicScraper): class Champ2010(_BasicScraper):
baseurl = 'http://jedcollins.com/champ2010/'
rurl = escape(baseurl)
# the latest URL is hard coded since the comic is discontinued # the latest URL is hard coded since the comic is discontinued
url = 'http://jedcollins.com/champ2010/champ-12-30-10.html' url = baseurl + 'champ-12-30-10.html'
stripUrl = 'http://jedcollins.com/champ2010/%s.html' stripUrl = baseurl + '%s.html'
imageSearch = compile(tagre("img", "src", r'(http://jedcollins\.com/champ2010/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://jedcollins\.com/champ2010/[^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous"))
help = 'Index format: yy-dd-mm' help = 'Index format: yy-dd-mm'
@ -294,9 +312,11 @@ class Chucklebrain(_BasicScraper):
class CompanyY(_BasicScraper): class CompanyY(_BasicScraper):
url = 'http://company-y.com/' url = 'http://company-y.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://company-y\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://company-y.com/.+?)"') prevSearch = compile(tagre("div", "class", r"nav-previous") +
tagre("a", "href", r'(%s[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'
@ -324,10 +344,11 @@ class CraftedFables(_BasicScraper):
class CucumberQuest(_BasicScraper): class CucumberQuest(_BasicScraper):
url = 'http://cucumber.gigidigi.com/' url = 'http://cucumber.gigidigi.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/' stripUrl = url + 'archive/%s/'
firstStripUrl = stripUrl % 'page-1' firstStripUrl = stripUrl % 'page-1'
starter = indirectStarter(url + 'recent.html', starter = indirectStarter(url + 'recent.html',
compile(r'window\.location="(/archive/[^"]+/)"')) compile(r'window\.location="(/archive/[^"]+/)"'))
imageSearch = compile(tagre("img", "src", r'(http://cucumber\.gigidigi\.com/wp-content/webcomic/cq/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/cq/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://cucumber\.gigidigi\.com/archive/[^"]+/)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+/)' % rurl, after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter from ..helpers import indirectStarter, bounceStarter
@ -48,19 +48,21 @@ class DangerouslyChloe(_BasicScraper):
class DarkWings(_BasicScraper): class DarkWings(_BasicScraper):
url = 'http://www.flowerlarkstudios.com/dark-wings/' url = 'http://www.flowerlarkstudios.com/dark-wings/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.flowerlarkstudios\.com/dark-wings/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.flowerlarkstudios\.com/dark-wings/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/page-nn-mm' help = 'Index format: yyyy/mm/dd/page-nn-mm'
class DasLebenIstKeinPonyhof(_BasicScraper): class DasLebenIstKeinPonyhof(_BasicScraper):
url = 'http://sarahburrini.com/wordpress/' url = 'http://sarahburrini.com/wordpress/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'mein-erster-webcomic' firstStripUrl = stripUrl % 'mein-erster-webcomic'
imageSearch = compile(tagre("img", "src", r'(http://sarahburrini\.com/wordpress/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(http://sarahburrini\.com/wordpress/comic/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
lang = 'de' lang = 'de'
@ -84,9 +86,10 @@ class DeathToTheExtremist(_BasicScraper):
class DeepFried(_BasicScraper): class DeepFried(_BasicScraper):
url = 'http://www.whatisdeepfried.com/' url = 'http://www.whatisdeepfried.com/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.whatisdeepfried\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.whatisdeepfried\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: non' help = 'Index format: non'
@ -168,9 +171,10 @@ class DoemainOfOurOwn(_BasicScraper):
class DogHouseDiaries(_BasicScraper): class DogHouseDiaries(_BasicScraper):
url = 'http://thedoghousediaries.com/' url = 'http://thedoghousediaries.com/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
prevSearch = compile(tagre("a", "href", r'(http://thedoghousediaries\.com/\d+)', after="previous-comic")) prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, after="previous-comic"))
imageSearch = compile(tagre("img", "src", r'(http://thedoghousediaries\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
help = 'Index format: number' help = 'Index format: number'
@ -184,15 +188,17 @@ class DominicDeegan(_BasicScraper):
class DorkTower(_BasicScraper): class DorkTower(_BasicScraper):
url = 'http://www.dorktower.com/' url = 'http://www.dorktower.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/\d+/\d+/DorkTower[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%sfiles/\d+/\d+/DorkTower[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous") prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy' help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
class DrFun(_BasicScraper): class DrFun(_BasicScraper):
url = 'http://www.ibiblio.org/Dave/ar00502.htm' baseurl = 'http://www.ibiblio.org/Dave/'
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm' url = baseurl + 'ar00502.htm'
stripUrl = baseurl + 'ar%s.htm'
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d+/df[^"]+)">') imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d+/df[^"]+)">')
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,') prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
@ -217,18 +223,21 @@ class DreamKeepersPrelude(_BasicScraper):
class DresdenCodak(_BasicScraper): class DresdenCodak(_BasicScraper):
url = 'http://dresdencodak.com/' url = 'http://dresdencodak.com/'
rurl = escape(url)
stripUrl = None stripUrl = None
imageSearch = compile(r'<img src="http://dresdencodak.com(/comics/.*?\.jpg)"') imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>') prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("img", "src", r"%sm_prev\.png" % rurl))
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">')) starter = indirectStarter(url, compile(tagre("div", "id", "preview") +
tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
class DrMcNinja(_BasicScraper): class DrMcNinja(_BasicScraper):
url = 'http://drmcninja.com/' url = 'http://drmcninja.com/'
rurl = escape(url)
stripUrl = url + 'archives/comic/%s/' stripUrl = url + 'archives/comic/%s/'
firstStripUrl = stripUrl % '0p1' firstStripUrl = stripUrl % '0p1'
imageSearch = compile(tagre("img", "src", r'(http://drmcninja\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://drmcninja\.com/archives/comic/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%sarchives/comic/[^"]+)' % rurl, after="prev"))
help = 'Index format: episode number and page' help = 'Index format: episode number and page'
@ -243,7 +252,8 @@ class Drowtales(_BasicScraper):
# XXX disallowed by robots.txt # XXX disallowed by robots.txt
class _DumbingOfAge(_BasicScraper): class _DumbingOfAge(_BasicScraper):
url = 'http://www.dumbingofage.com/' url = 'http://www.dumbingofage.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://www\.dumbingofage\.com/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.dumbingofage\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: yyyy/comic/book-num/seriesname/stripname' help = 'Index format: yyyy/comic/book-num/seriesname/stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile, escape, IGNORECASE
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -76,9 +76,10 @@ class ElGoonishShiveNP(_BasicScraper):
class Ellerbisms(_BasicScraper): class Ellerbisms(_BasicScraper):
url = 'http://www.ellerbisms.com/' url = 'http://www.ellerbisms.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -132,17 +133,19 @@ class EvilInc(_BasicScraper):
class Exiern(_BasicScraper): class Exiern(_BasicScraper):
url = 'http://www.exiern.com/' url = 'http://www.exiern.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class ExploitationNow(_BasicScraper): class ExploitationNow(_BasicScraper):
url = 'http://www.exploitationnow.com/' url = 'http://www.exploitationnow.com/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num' help = 'Index format: yyyy-mm-dd/num'
@ -156,16 +159,18 @@ class ExtraLife(_BasicScraper):
class ExtraOrdinary(_BasicScraper): class ExtraOrdinary(_BasicScraper):
url = 'http://exocomics.com/' url = 'http://exocomics.com/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '01' firstStripUrl = stripUrl % '01'
prevSearch = compile(tagre("a", "href", r'(http://www\.exocomics\.com/\d+)', before="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.exocomics\.com/comics/comics/\d+\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl))
help = 'Index format: number' help = 'Index format: number'
class EyeOfRamalach(_BasicScraper): class EyeOfRamalach(_BasicScraper):
url = 'http://theeye.katbox.net/' url = 'http://theeye.katbox.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/uploads/[^"]+)', after="data-webcomic-parent")) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="data-webcomic-parent"))
prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/comic/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE from re import compile, escape, IGNORECASE, MULTILINE
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -18,13 +18,14 @@ class FalconTwin(_BasicScraper):
class Fallen(_BasicScraper): class Fallen(_BasicScraper):
url = 'http://www.fallencomic.com/fal-page.htm' baseurl = 'http://www.fallencomic.com/'
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm' url = baseurl + 'fal-page.htm'
stripUrl = baseurl + 'pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE) imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE) prevSearch = compile(r'<A HREF="(.+?)"><FONT FACE="Courier">Back', IGNORECASE)
help = 'Index format: nn-m (comicNumber-partNumber)' help = 'Index format: nn-m (comicNumber-partNumber)'
starter = indirectStarter(url, starter = indirectStarter(url,
compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE)) compile(r'\(NEW \d{2}/\d{2}/\d{2}\)\s*\n*\s*<a href="(pages/part\d+/\d+-p\d+\.htm)">\d+</a>', MULTILINE))
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -44,7 +45,7 @@ class FantasyRealms(_BasicScraper):
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE) prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter(url, starter = indirectStarter(url,
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)) compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
class FauxPas(_BasicScraper): class FauxPas(_BasicScraper):
@ -56,8 +57,9 @@ class FauxPas(_BasicScraper):
class FeyWinds(_BasicScraper): class FeyWinds(_BasicScraper):
url = 'http://kitsune.rydia.net/index.html' baseurl = 'http://kitsune.rydia.net/'
stripUrl = 'http://kitsune.rydia.net/comic/page.php?id=%s' url = baseurl + 'index.html'
stripUrl = baseurl + 'comic/page.php?id=%s'
imageSearch = compile(r"(../comic/pages//.+?)'") imageSearch = compile(r"(../comic/pages//.+?)'")
prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png") prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -66,9 +68,10 @@ class FeyWinds(_BasicScraper):
class FilibusterCartoons(_BasicScraper): class FilibusterCartoons(_BasicScraper):
url = 'http://www.filibustercartoons.com/' url = 'http://www.filibustercartoons.com/'
rurl = escape(url)
stripUrl = url + 'index.php/%s' stripUrl = url + 'index.php/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.filibustercartoons\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.filibustercartoons\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -83,8 +86,9 @@ class FirstWorldProblems(_BasicScraper):
class FlakyPastry(_BasicScraper): class FlakyPastry(_BasicScraper):
url = 'http://flakypastry.runningwithpencils.com/index.php' baseurl = 'http://flakypastry.runningwithpencils.com/'
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php?strip_id=%s' url = baseurl + 'index.php'
stripUrl = baseurl + 'comic.php?strip_id=%s'
imageSearch = compile(r'<img src="(comics/.+?)"') imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back') prevSearch = compile(r'<a href="(.+?)".+?btn_back')
help = 'Index format: nnnn' help = 'Index format: nnnn'
@ -101,27 +105,29 @@ class Flemcomics(_BasicScraper):
class Flipside(_BasicScraper): class Flipside(_BasicScraper):
url = 'http://flipside.keenspot.com/comic.php' url = 'http://flipside.keenspot.com/comic.php'
rurl = escape(url)
stripUrl = url + '?i=%s' stripUrl = url + '?i=%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://flipside\.keenspot\.com/comic\.php\?i=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl, after="prev"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
class FonFlatter(_BasicScraper): class FonFlatter(_BasicScraper):
url = 'http://www.fonflatter.de/' url = 'http://www.fonflatter.de/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman' firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman'
lang = 'de' lang = 'de'
imageSearch = compile(r'src="(http://www\.fonflatter\.de/\d+/fred_\d+-\d+-\d+[^"]+)') imageSearch = compile(r'src="(%s\d+/fred_\d+-\d+-\d+[^"]+)' % rurl)
prevSearch = compile(tagre("a", "href", r'(http://www\.fonflatter\.de/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname' help = 'Index format: yyyy/mm/dd/number-stripname'
def shouldSkipUrl(self, url): def shouldSkipUrl(self, url):
return url in ( return url in (
"http://www.fonflatter.de/2006/11/30/adventskalender/", self.stripUrl % "2006/11/30/adventskalender",
"http://www.fonflatter.de/2006/09/21/danke/", self.stripUrl % "2006/09/21/danke",
"http://www.fonflatter.de/2006/08/23/zgf-zuweilen-gestellte-fragen/", self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
"http://www.fonflatter.de/2005/10/19/naq-never-asked-questions/", self.stripUrl % "2005/10/19/naq-never-asked-questions",
) )
@ -154,10 +160,11 @@ class FredoAndPidjin(_BasicScraper):
class FullFrontalNerdity(_BasicScraper): class FullFrontalNerdity(_BasicScraper):
url = 'http://ffn.nodwick.com/' url = 'http://ffn.nodwick.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '6' firstStripUrl = stripUrl % '6'
imageSearch = compile(tagre("img", "src", r'(http://ffn\.nodwick\.com/ffnstrips/\d+-\d+-\d+\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%sffnstrips/\d+-\d+-\d+\.[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://ffn\.nodwick\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
@ -11,19 +11,22 @@ from ..util import tagre
class Galaxion(_BasicScraper): class Galaxion(_BasicScraper):
url = 'http://galaxioncomics.com/' url = 'http://galaxioncomics.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://galaxioncomics\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://galaxioncomics\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: n-comic/book-n/chapter-n/title-nnn' help = 'Index format: n-comic/book-n/chapter-n/title-nnn'
class Garanos(_BasicScraper): class Garanos(_BasicScraper):
url = 'http://garanos.alexheberling.com/pages/page-1/' baseurl = 'http://garanos.alexheberling.com/'
rurl = escape(baseurl)
url = baseurl + 'pages/page-1/'
starter = indirectStarter(url, starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="navi-last"))) compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl, after="navi-last")))
stripUrl = 'http://garanos.alexheberling.com/pages/page-%s' stripUrl = baseurl + 'pages/page-%s'
imageSearch = compile(tagre("img", "src", r'(http://garanos\.alexheberling\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl, after="prev"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -70,9 +73,10 @@ class GlassHalfEmpty(_BasicScraper):
class GleefulNihilism(_BasicScraper): class GleefulNihilism(_BasicScraper):
url = 'http://gleefulnihilism.com/' url = 'http://gleefulnihilism.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s/' stripUrl = url + 'comics/%s/'
imageSearch = compile(tagre("img", "src", r'(http://gleefulnihilism\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://gleefulnihilism\.com/comics/[^"]+)') + 'Previous') prevSearch = compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) + 'Previous')
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -86,10 +90,11 @@ class Goats(_BasicScraper):
class GoblinsComic(_BasicScraper): class GoblinsComic(_BasicScraper):
url = 'http://www.goblinscomic.com/' url = 'http://www.goblinscomic.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '06252005' firstStripUrl = stripUrl % '06252005'
prevSearch = compile(tagre("a", "href", r'(http://www\.goblinscomic\.com/\d+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.goblinscomic\.com/comics/\d+\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.[^"]+)' % rurl))
help = 'Index format: ddmmyyyy' help = 'Index format: ddmmyyyy'
@ -109,9 +114,10 @@ class GoneWithTheBlastwave(_BasicScraper):
class GrrlPower(_BasicScraper): class GrrlPower(_BasicScraper):
url = 'http://www.grrlpowercomic.com/' url = 'http://www.grrlpowercomic.com/'
rurl = escape(url)
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.grrlpowercomic\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.grrlpowercomic\.com/archives/\d+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="navi-prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -1,6 +1,6 @@
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre, getPageContent, fetchUrls from ..util import tagre, getPageContent, fetchUrls
from ..helpers import bounceStarter from ..helpers import bounceStarter
@ -29,13 +29,14 @@ class HagarTheHorrible(_BasicScraper):
class HarkAVagrant(_BasicScraper): class HarkAVagrant(_BasicScraper):
url = 'http://www.harkavagrant.com/' url = 'http://www.harkavagrant.com/'
rurl = escape(url)
starter = bounceStarter(url, starter = bounceStarter(url,
compile(tagre("a", "href", r'(http://www\.harkavagrant\.com/index\.php\?id=\d+)') + compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonnext.png"))) tagre("img", "src", "buttonnext.png")))
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www.harkavagrant.com/[^"]+)', after='BORDER')) imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, after='BORDER'))
prevSearch = compile(tagre("a", "href", r'(http://www\.harkavagrant\.com/index\.php\?id=\d+)') + prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
tagre("img", "src", "buttonprevious.png")) tagre("img", "src", "buttonprevious.png"))
help = 'Index format: number' help = 'Index format: number'
@ -48,18 +49,20 @@ class HarkAVagrant(_BasicScraper):
class HijinksEnsue(_BasicScraper): class HijinksEnsue(_BasicScraper):
url = 'http://hijinksensue.com/' url = 'http://hijinksensue.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://hijinksensue\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://hijinksensue\.com/\d+/\d+/\d+/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
class Hipsters(_BasicScraper): class Hipsters(_BasicScraper):
url = 'http://www.hipsters-comic.com/' url = 'http://www.hipsters-comic.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/08/hip01' firstStripUrl = stripUrl % '2010/08/hip01'
imageSearch = compile(tagre("img", "src", r'(http://www\.hipsters-comic\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.hipsters-comic\.com/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/dd/stripname' help = 'Index format: yyyy/dd/stripname'

View file

@ -2,25 +2,27 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class IAmArg(_BasicScraper): class IAmArg(_BasicScraper):
url = 'http://iamarg.com/' url = 'http://iamarg.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/05/08/05082011' firstStripUrl = stripUrl % '2011/05/08/05082011'
imageSearch = compile(tagre("img", "src", r'(http://iamarg\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://iamarg\.com/\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class IanJay(_BasicScraper): class IanJay(_BasicScraper):
url = 'http://ianjay.net/' url = 'http://ianjay.net/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://ianjay\.net/comics/\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://ianjay\.net/\?p=\d+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="Previous"))
help = 'Index foramt: nnn' help = 'Index foramt: nnn'

View file

@ -2,16 +2,17 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class JackCannon(_BasicScraper): class JackCannon(_BasicScraper):
url = 'http://fancyadventures.com/' url = 'http://fancyadventures.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://fancyadventures\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://fancyadventures\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/page-nnn' help = 'Index format: yyyy/mm/dd/page-nnn'
@ -41,8 +42,9 @@ class JohnnyWander(_BasicScraper):
class JustAnotherEscape(_BasicScraper): class JustAnotherEscape(_BasicScraper):
url = 'http://www.justanotherescape.com/' url = 'http://www.justanotherescape.com/'
rurl = escape(url)
stripUrl = url + 'index.cgi?date=%s' stripUrl = url + 'index.cgi?date=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.justanotherescape\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.justanotherescape\.com//index\.cgi\?date=\d+)') prevSearch = compile(tagre("a", "href", r'(%sindex\.cgi\?date=\d+)' % rurl)
+ tagre("img", "alt", "Previous Comic")) + tagre("img", "alt", "Previous Comic"))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'

View file

@ -2,19 +2,20 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
class KatzenfutterGeleespritzer(_BasicScraper): class KatzenfutterGeleespritzer(_BasicScraper):
url = 'http://www.katzenfuttergeleespritzer.de/' url = 'http://www.katzenfuttergeleespritzer.de/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'dont-drink-and-drive' firstStripUrl = stripUrl % 'dont-drink-and-drive'
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(http://www\.katzenfuttergeleespritzer\.de/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(http://www\.katzenfuttergeleespritzer\.de/wp-content/uploads/\d+/\d+/mmai_404[^"]+)')), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/mmai_404[^"]+)' % rurl)),
) )
prevSearch = compile(tagre("a", "href", r'(http://www.katzenfuttergeleespritzer.de/comic/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
lang = 'de' lang = 'de'
@ -32,8 +33,9 @@ class KevinAndKell(_BasicScraper):
class Key(_BasicScraper): class Key(_BasicScraper):
url = 'http://key.shadilyn.com/latestpage.html' baseurl = 'http://key.shadilyn.com/'
stripUrl = 'http://key.shadilyn.com/pages/%s.html' url = baseurl + 'latestpage.html'
stripUrl = baseurl + 'pages/%s.html'
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"') imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev') prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -41,16 +43,18 @@ class Key(_BasicScraper):
class KickInTheHead(_BasicScraper): class KickInTheHead(_BasicScraper):
url = 'http://www.kickinthehead.org/' url = 'http://www.kickinthehead.org/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/03/20/ipod-envy' firstStripUrl = stripUrl % '2003/03/20/ipod-envy'
imageSearch = compile(tagre("img", "src", r'(http://www\.kickinthehead\.org/kickinthehead3/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.kickinthehead\.org/\d+/\d+/\d+/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class KillerKomics(_BasicScraper): class KillerKomics(_BasicScraper):
url = 'http://www.killerkomics.com/web-comics/index_ang.cfm' baseurl = 'http://www.killerkomics.com/web-comics/'
stripUrl = 'http://www.killerkomics.com/web-comics/%s.cfm' url = baseurl + 'index_ang.cfm'
stripUrl = baseurl + '%s.cfm'
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"') imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"') prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
help = 'Index format: strip-name' help = 'Index format: strip-name'
@ -75,16 +79,18 @@ class Krakow(_BasicScraper):
class Kukuburi(_BasicScraper): class Kukuburi(_BasicScraper):
url = 'http://www.kukuburi.com/current/' baseurl = 'http://www.kukuburi.com/'
stripUrl = 'http://www.kukuburi.com/v2/%s/' url = baseurl + 'current/'
stripUrl = baseurl + 'v2/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]')) imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
prevSearch = compile(r'nav-previous.+?"(http.+?)"') prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class KuroShouri(_BasicScraper): class KuroShouri(_BasicScraper):
url = 'http://kuroshouri.com/' url = 'http://kuroshouri.com/'
stripUrl = url + '?webcomic_post=%s' rurl = escape(url)
imageSearch = compile(tagre("img", "src", r"(http://kuroshouri\.com/wp-content/webcomic/kuroshouri/[^'\"]+)", quote="['\"]")) stripUrl = url + '?webcomic_post=%s'
prevSearch = compile(tagre("a", "href", r'(http://kuroshouri\.com/\?webcomic_post=[^"]+)', after="previous")) imageSearch = compile(tagre("img", "src", r"(%swp-content/webcomic/kuroshouri/[^'\"]+)" % rurl, quote="['\"]"))
help = 'Index format: chapter-n-page-m' prevSearch = compile(tagre("a", "href", r'(%s\?webcomic_post=[^"]+)' % rurl, after="previous"))
help = 'Index format: chapter-n-page-m'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre
@ -10,27 +10,30 @@ from ..util import tagre
class LasLindas(_BasicScraper): class LasLindas(_BasicScraper):
url = 'http://laslindas.katbox.net/' url = 'http://laslindas.katbox.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://laslindas\.katbox\.net/wp-content/uploads/[^"]+)', after="attachment-full")) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="attachment-full"))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(http://laslindas\.katbox\.net/comic/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'
class LeastICouldDo(_BasicScraper): class LeastICouldDo(_BasicScraper):
url = 'http://www.leasticoulddo.com/' url = 'http://www.leasticoulddo.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.leasticoulddo\.com/wp-content/uploads/\d+/\d+/\d{8}\.\w{1,4})')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.leasticoulddo\.com/wp-content/uploads/\d+/\d+/\d{8}\.\w{1,4})'))
prevSearch = compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="Previous"))
starter = indirectStarter(url, starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.leasticoulddo\.com/comic/\d+/)', after="feature-comic"))) compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="feature-comic")))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class Lint(_BasicScraper): class Lint(_BasicScraper):
url = 'http://www.purnicellin.com/lint/' url = 'http://www.purnicellin.com/lint/'
rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(r'<img src="(http://www.purnicellin.com/lint/comics/.+?)"') imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">') prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
help = 'Index format: yyyy/mm/dd/num-name' help = 'Index format: yyyy/mm/dd/num-name'
@ -45,18 +48,20 @@ class LittleGamers(_BasicScraper):
class LoadingArtist(_BasicScraper): class LoadingArtist(_BasicScraper):
url = 'http://www.loadingartist.com/' url = 'http://www.loadingartist.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/01/04/born' firstStripUrl = stripUrl % '2011/01/04/born'
imageSearch = compile(tagre("img", "src", r'(http://www\.loadingartist\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.loadingartist\.com/\d+/\d+/\d+/[^"]+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class LookingForGroup(_BasicScraper): class LookingForGroup(_BasicScraper):
url = 'http://www.lfgcomic.com/' url = 'http://www.lfgcomic.com/'
rurl = escape(url)
stripUrl = url + 'page/%s/' stripUrl = url + 'page/%s/'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.lfgcomic\.com/wp-content/uploads/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.lfgcomic\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.lfgcomic\.com/page/\d+/)', after="navtop-prev")) prevSearch = compile(tagre("a", "href", r'(%spage/\d+/)' % rurl, after="navtop-prev"))
starter = indirectStarter(url, compile(tagre("a", "href", r'(http://www\.lfgcomic\.com/page/\d+/)', after="feature-previous"))) starter = indirectStarter(url, compile(tagre("a", "href", r'(%spage/\d+/)' % rurl, after="feature-previous")))
nameSearch = compile(r'/page/(\d+)/') nameSearch = compile(r'/page/(\d+)/')
help = 'Index format: nnn' help = 'Index format: nnn'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
@ -26,9 +26,10 @@ class MagickChicks(_BasicScraper):
class ManlyGuysDoingManlyThings(_BasicScraper): class ManlyGuysDoingManlyThings(_BasicScraper):
url = 'http://thepunchlineismachismo.com/' url = 'http://thepunchlineismachismo.com/'
rurl = escape(url)
stripUrl = url + 'archives/comic/%s' stripUrl = url + 'archives/comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://thepunchlineismachismo\.com/wp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://thepunchlineismachismo\.com/archives/comic/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchives/comic/[^"]+)' % rurl, after="previous"))
help = 'Index format: ddmmyyyy' help = 'Index format: ddmmyyyy'
@ -66,9 +67,10 @@ class MegaTokyo(_BasicScraper):
class Meiosis(_BasicScraper): class Meiosis(_BasicScraper):
url = 'http://meiosiswebcomic.com/' url = 'http://meiosiswebcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://meiosiswebcomic\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://meiosiswebcomic\.com/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/ddmmyyyy' help = 'Index format: yyyy/mm/ddmmyyyy'
@ -91,9 +93,10 @@ class MenageA3(_BasicScraper):
class Melonpool(_BasicScraper): class Melonpool(_BasicScraper):
url = 'http://www.melonpool.com/' url = 'http://www.melonpool.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.melonpool\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.melonpool\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n' help = 'Index format: n'
@ -106,15 +109,16 @@ class Misfile(_BasicScraper):
class MyCartoons(_BasicScraper): class MyCartoons(_BasicScraper):
url = 'http://mycartoons.de/' url = 'http://mycartoons.de/'
stripUrl = url + 'page/%s' rurl = escape(url)
imageSearch = ( stripUrl = url + 'page/%s'
compile(tagre("img", "src", r'(http://mycartoons\.de/wp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)')), imageSearch = (
compile(tagre("img", "src", r'(http://mycartoons\.de/cartoons/[^"]+/\d+-\d+-\d+[^"]+)')) compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
) compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
prevSearch = compile(tagre("a", "href", r'(http://mycartoons\.de/page/[^"]+)') + "&laquo;") )
help = 'Index format: number' prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + "&laquo;")
lang = 'de' help = 'Index format: number'
lang = 'de'
class MysteriesOfTheArcana(_BasicScraper): class MysteriesOfTheArcana(_BasicScraper):

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter from ..helpers import indirectStarter, bounceStarter
from ..util import tagre from ..util import tagre
@ -23,17 +23,19 @@ class Namesake(_BasicScraper):
class NamirDeiter(_BasicScraper): class NamirDeiter(_BasicScraper):
url = 'http://www.namirdeiter.com/' url = 'http://www.namirdeiter.com/'
rurl = escape(url)
stripUrl = url + 'comics/index.php?date=%s' stripUrl = url + 'comics/index.php?date=%s'
imageSearch = compile(tagre("img", "src", r"'?(http://www\.namirdeiter\.com/comics/\d+\.jpg)'?", quote="")) imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous") prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'")+"Previous")
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class Nedroid(_BasicScraper): class Nedroid(_BasicScraper):
url = 'http://nedroid.com/' url = 'http://nedroid.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://nedroid\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://nedroid\.com/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -62,28 +64,30 @@ class NewWorld(_BasicScraper):
class Nicky510(_BasicScraper): class Nicky510(_BasicScraper):
url = 'http://www.nickyitis.com/' url = 'http://www.nickyitis.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.nickyitis\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.nickyitis\.com/comic/[^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="Previous"))
help = 'Index format: stripname' help = 'Index format: stripname'
class NekkoAndJoruba(_BasicScraper): class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/' url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(r'<img src="(http://www.nekkoandjoruba.com/comics/.+?)"') imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>') prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn' help = 'Index format: nnn'
class NekoTheKitty(_BasicScraper): class NekoTheKitty(_BasicScraper):
url = 'http://www.nekothekitty.net/' url = 'http://www.nekothekitty.net/'
rurl = escape(url)
stripUrl = url + 'comics/%s' stripUrl = url + 'comics/%s'
starter = bounceStarter(url, compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') + starter = bounceStarter(url, compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) +
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallnext.png'))) tagre("img", "src", r'%sfiles/smallnext\.png' % rurl)))
imageSearch = compile(tagre("img", "src", r'(http://(?:img\d+|www)\.smackjeeves\.com/images/uploaded/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://(?:img\d+|www)\.smackjeeves\.com/images/uploaded/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') + prevSearch = compile(tagre("a", "href", r'(%scomics/[^"]+)' % rurl) +
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png')) tagre("img", "src", r'%sfiles/smallprev\.png' % rurl))
help = 'Index format: n/n-name' help = 'Index format: n/n-name'
@ -100,35 +104,40 @@ class NichtLustig(_BasicScraper):
class Nnewts(_BasicScraper): class Nnewts(_BasicScraper):
url = 'http://nnewts.com/' url = 'http://nnewts.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'nnewts-page-1' firstStripUrl = stripUrl % 'nnewts-page-1'
imageSearch = compile(tagre("img", "src", r'(http://nnewts\.com/newty/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%snewty/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://nnewts\.com/(?:nnewts-)?page-\d+/)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s(?:nnewts-)?page-\d+/)' % rurl, after="navi-prev"))
help = 'Index format: page-number' help = 'Index format: page-number'
class Nodwick(_BasicScraper): class Nodwick(_BasicScraper):
url = 'http://comic.nodwick.com/' url = 'http://comic.nodwick.com/'
rurl = escape(url)
stripUrl = url + "?p=%s" stripUrl = url + "?p=%s"
imageSearch = compile(tagre("img", "src", r'(http://comic\.nodwick\.com/nodwickstrips/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%snodwickstrips/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://comic\.nodwick\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: stripnumber' help = 'Index format: stripnumber'
class NobodyScores(_BasicScraper): class NobodyScores(_BasicScraper):
url = 'http://nobodyscores.loosenutstudio.com/' url = 'http://nobodyscores.loosenutstudio.com/'
rurl = escape(url)
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
imageSearch = compile(tagre("img", "src", r'(http://nobodyscores\.loosenutstudio\.com/comix/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomix/[^"]+)' % rurl))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>') prevSearch = compile(r'<a href="(%sindex.php.+?)">the one before </a>' % rurl)
help = 'Index format: nnn' help = 'Index format: nnn'
class NoNeedForBushido(_BasicScraper): class NoNeedForBushido(_BasicScraper):
url = 'http://noneedforbushido.com/latest/' baseurl = 'http://noneedforbushido.com/'
stripUrl = 'http://noneedforbushido.com/%s/' rurl = escape(baseurl)
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)')) url = baseurl + 'latest/'
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link")) stripUrl = baseurl + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/comic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="previous-comic-link"))
help = 'Index format: yyyy/comic/nnn' help = 'Index format: yyyy/comic/nnn'

View file

@ -11,7 +11,8 @@ from ..scraper import _BasicScraper
class NineteenNinetySeven(_BasicScraper): class NineteenNinetySeven(_BasicScraper):
name = '1997' name = '1997'
url = 'http://www.1977thecomic.com/' url = 'http://www.1977thecomic.com/'
stripUrl = url + '%s' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1977-comics/from-the-beginning-part-1'
imageSearch = compile(tagre("img", "src", r'(http://www\.1977thecomic\.com/comics-1977/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.1977thecomic\.com/comics-1977/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)')+"Previous") prevSearch = compile(tagre("a", "href", r'([^"]+)')+"Previous")
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..util import tagre, urlopen from ..util import tagre, urlopen
@ -10,20 +10,22 @@ from ..util import tagre, urlopen
class OctopusPie(_BasicScraper): class OctopusPie(_BasicScraper):
url = 'http://www.octopuspie.com/' url = 'http://www.octopuspie.com/'
rurl = escape(url)
starter = indirectStarter(url, starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)') + compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("img", "src", r'http://www\.octopuspie\.com/junk/latest\.png'))) tagre("img", "src", r'%sjunk/latest\.png' % rurl)))
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.octopuspie\.com/strippy/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%sstrippy/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy-mm-dd/nnn-strip-name' help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class OddFish(_BasicScraper): class OddFish(_BasicScraper):
url = 'http://www.odd-fish.net/' url = 'http://www.odd-fish.net/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.odd-fish\.net/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.odd-fish\.net/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -45,27 +47,30 @@ class Oglaf(_BasicScraper):
class OkCancel(_BasicScraper): class OkCancel(_BasicScraper):
url = 'http://okcancel.com/' url = 'http://okcancel.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s.html' stripUrl = url + 'comic/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)')) imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)')) prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class OmakeTheater(_BasicScraper): class OmakeTheater(_BasicScraper):
url = 'http://omaketheater.com/' url = 'http://omaketheater.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
imageSearch = compile(tagre("img", "src", r'(http://media\.omaketheater\.com/4koma/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://media\.omaketheater\.com/4koma/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://omaketheater\.com/comic/\d+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="prev"))
starter = indirectStarter(url, starter = indirectStarter(url,
compile(tagre("a", "href", r'(http://omaketheater\.com/comic/\d+/)'))) compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl)))
help = 'Index format: number (unpadded)' help = 'Index format: number (unpadded)'
class OnTheEdge(_BasicScraper): class OnTheEdge(_BasicScraper):
url = 'http://ontheedgecomics.com/' url = 'http://ontheedgecomics.com/'
stripUrl = 'http://ontheedgecomics.com/comic/%s' rurl = escape(url)
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"') stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="([^"]+)" rel="prev">') prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
help = 'Index format: nnn (unpadded)' help = 'Index format: nnn (unpadded)'
@ -80,10 +85,11 @@ class OneQuestion(_BasicScraper):
class OrnerBoy(_BasicScraper): class OrnerBoy(_BasicScraper):
url = 'http://www.orneryboy.com/' url = 'http://www.orneryboy.com/'
rurl = escape(url)
stripUrl = url + 'index.php?comicID=%s' stripUrl = url + 'index.php?comicID=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.orneryboy\.com/index\.php\?comicID=\d+)') + prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comicID=\d+)' % rurl) +
tagre("img", "src", r'images/prev_a\.gif')) tagre("img", "src", r'images/prev_a\.gif'))
help = 'Index format: number' help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre from ..util import tagre
@ -10,9 +10,10 @@ from ..util import tagre
class PandyLand(_BasicScraper): class PandyLand(_BasicScraper):
url = 'http://pandyland.net/' url = 'http://pandyland.net/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://pandyland\.net/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://pandyland\.net/\d+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'
@ -27,10 +28,11 @@ class ParadigmShift(_BasicScraper):
class ParallelUniversum(_BasicScraper): class ParallelUniversum(_BasicScraper):
url = 'http://www.paralleluniversum.net/' url = 'http://www.paralleluniversum.net/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '001-der-comic-ist-tot' firstStripUrl = stripUrl % '001-der-comic-ist-tot'
imageSearch = compile(tagre("img", "src", r'(http://www\.paralleluniversum\.net/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.paralleluniversum\.net/[^"]+/)') + prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
tagre("span", "class", "prev")) tagre("span", "class", "prev"))
help = 'Index format: number-stripname' help = 'Index format: number-stripname'
lang = 'de' lang = 'de'
@ -38,15 +40,17 @@ class ParallelUniversum(_BasicScraper):
class PartiallyClips(_BasicScraper): class PartiallyClips(_BasicScraper):
url = 'http://partiallyclips.com/' url = 'http://partiallyclips.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class PastelDefender(_BasicScraper): class PastelDefender(_BasicScraper):
url = 'http://www.pasteldefender.com/coverbackcover.html' baseurl = 'http://www.pasteldefender.com/'
stripUrl = 'http://www.pasteldefender.com/%s.html' url = baseurl + 'coverbackcover.html'
stripUrl = baseurl + '%s.html'
imageSearch = compile(r'<IMG SRC="(images/.+?)" WIDTH="742"') imageSearch = compile(r'<IMG SRC="(images/.+?)" WIDTH="742"')
prevSearch = compile(r'<A HREF="([^"]+)"><IMG SRC="images/back\.gif"') prevSearch = compile(r'<A HREF="([^"]+)"><IMG SRC="images/back\.gif"')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -62,22 +66,24 @@ class PebbleVersion(_BasicScraper):
class PennyAndAggie(_BasicScraper): class PennyAndAggie(_BasicScraper):
url = 'http://www.pennyandaggie.com/' url = 'http://www.pennyandaggie.com/'
rurl = escape(url)
stripUrl = url + 'index.php?p=%s' stripUrl = url + 'index.php?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") + prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote="")) tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class PennyArcade(_BasicScraper): class PennyArcade(_BasicScraper):
url = 'http://penny-arcade.com/comic/' url = 'http://penny-arcade.com/comic/'
rurl = escape(url)
starter = bounceStarter(url, starter = bounceStarter(url,
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnNext")) compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext"))
) )
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnPrev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev"))
help = 'Index format: yyyy/mm/dd' help = 'Index format: yyyy/mm/dd'
@classmethod @classmethod
@ -88,9 +94,10 @@ class PennyArcade(_BasicScraper):
class PeppermintSaga(_BasicScraper): class PeppermintSaga(_BasicScraper):
url = 'http://www.pepsaga.com/' url = 'http://www.pepsaga.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pepsaga\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.pepsaga\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'
@ -107,17 +114,19 @@ class PHDComics(_BasicScraper):
class PicPakDog(_BasicScraper): class PicPakDog(_BasicScraper):
url = 'http://www.picpak.net/' url = 'http://www.picpak.net/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.picpak\.net/wp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.picpak\.net/comic/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
class Pixel(_BasicScraper): class Pixel(_BasicScraper):
url = 'http://pixelcomic.net/' url = 'http://pixelcomic.net/'
rurl = escape(url)
stripUrl = url + '%s.php' stripUrl = url + '%s.php'
imageSearch = compile(tagre("img", "src", r'(\d+\.png)')) imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(http://pixelcomic\.net/\d+\.php)', before="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+\.php)' % rurl, before="prev"))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -195,9 +204,10 @@ class ProperBarn(_BasicScraper):
class PunksAndNerds(_BasicScraper): class PunksAndNerds(_BasicScraper):
url = 'http://www.punksandnerds.com/' url = 'http://www.punksandnerds.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.punksandnerds\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.punksandnerds\.com/\?p=\d+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: nnn' help = 'Index format: nnn'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
@ -16,8 +16,10 @@ class QuestionableContent(_BasicScraper):
class Qwantz(_BasicScraper): class Qwantz(_BasicScraper):
url = 'http://www.qwantz.com/index.php' baseurl = 'http://www.qwantz.com/'
url = baseurl + 'index.php'
rurl = escape(baseurl)
stripUrl = url + '?comic=%s' stripUrl = url + '?comic=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.qwantz\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%s/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.qwantz\.com/index\.php\?comic=\d+)', before="prev")) prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comic=\d+)' % rurl, before="prev"))
help = 'Index format: n' help = 'Index format: n'

View file

@ -34,9 +34,10 @@ class RealmOfAtland(_BasicScraper):
class RedMeat(_BasicScraper): class RedMeat(_BasicScraper):
url = 'http://www.redmeat.com/redmeat/current/index.html' baseurl = 'http://www.redmeat.com/redmeat/'
url = baseurl + 'current/index.html'
starter = bounceStarter(url, compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>')) starter = bounceStarter(url, compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">next</a>'))
stripUrl = 'http://www.redmeat.com/redmeat/%s/index.html' stripUrl = baseurl + '%s/index.html'
imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>') imageSearch = compile(r'<img src="(index-1\.gif)" width="\d+" height="\d+" [^>]*>')
prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>') prevSearch = compile(r'<a href="(\.\./\d{4}-\d{2}-\d{2}/index\.html)">previous</a>')
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, MULTILINE, IGNORECASE, sub from re import compile, escape, MULTILINE, IGNORECASE, sub
from os.path import splitext from os.path import splitext
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter from ..helpers import indirectStarter, bounceStarter
@ -11,9 +11,11 @@ from ..util import tagre
class SailorsunOrg(_BasicScraper): class SailorsunOrg(_BasicScraper):
url = 'http://sailorsun.org/' url = 'http://sailorsun.org/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev")) rurl = escape(url)
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -27,19 +29,21 @@ class SamAndFuzzy(_BasicScraper):
class SandraAndWoo(_BasicScraper): class SandraAndWoo(_BasicScraper):
url = 'http://www.sandraandwoo.com/' url = 'http://www.sandraandwoo.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2000/01/01/welcome-to-sandra-and-woo' firstStripUrl = stripUrl % '2000/01/01/welcome-to-sandra-and-woo'
imageSearch = compile(tagre("img", "src", r'(http://www\.sandraandwoo\.com/comics/\d+-\d+-\d+-[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.sandraandwoo\.com/\d+/\d+/\d+/[^"]+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname' help = 'Index format: yyyy/mm/dd/number-stripname'
class SandraAndWooGerman(_BasicScraper): class SandraAndWooGerman(_BasicScraper):
url = 'http://www.sandraandwoo.com/woode/' url = 'http://www.sandraandwoo.com/woode/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2008/10/19/ein-ausgefuchster-waschbar' firstStripUrl = stripUrl % '2008/10/19/ein-ausgefuchster-waschbar'
imageSearch = compile(tagre("img", "src", r'(http://www\.sandraandwoo\.com/woode/comics/\d+-\d+-\d+-[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.sandraandwoo\.com/woode/\d+/\d+/\d+/[^"]+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/number-stripname' help = 'Index format: yyyy/mm/dd/number-stripname'
lang = 'de' lang = 'de'
@ -54,9 +58,10 @@ class ScaryGoRound(_BasicScraper):
class ScenesFromAMultiverse(_BasicScraper): class ScenesFromAMultiverse(_BasicScraper):
url = 'http://amultiverse.com/' url = 'http://amultiverse.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://amultiverse\.com/files/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://amultiverse\.com/\d+\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -79,9 +84,10 @@ class SchoolBites(_BasicScraper):
class Schuelert(_BasicScraper): class Schuelert(_BasicScraper):
url = 'http://www.schuelert.de/' url = 'http://www.schuelert.de/'
rurl = escape(url)
stripUrl = None stripUrl = None
imageSearch = compile(tagre("img", "src", r"(http://www.schuelert.de/wp-content/[^']+)", quote="'")) imageSearch = compile(tagre("img", "src", r"(%swp-content/[^']+)" % rurl, quote="'"))
prevSearch = compile(tagre("a", "href", r'(http://www\.schuelert\.de/index\.php\?paged=\d+)') + "&laquo;") prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?paged=\d+)' % rurl) + "&laquo;")
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: none' help = 'Index format: none'
lang = 'de' lang = 'de'
@ -89,10 +95,11 @@ class Schuelert(_BasicScraper):
class Science(_BasicScraper): class Science(_BasicScraper):
url = 'http://sci-ence.org/' url = 'http://sci-ence.org/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'periodic-table-element-ass' firstStripUrl = stripUrl % 'periodic-table-element-ass'
prevSearch = compile(tagre("a", "href", r'(http://sci-ence\.org/[^"]+/)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://sci-ence\.org/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
help = 'Index format: stripname' help = 'Index format: stripname'
description = u'A comic about science, technology, skepticism, geekery, video games, atheism, and more.' description = u'A comic about science, technology, skepticism, geekery, video games, atheism, and more.'
@ -142,18 +149,20 @@ class Sheldon(_BasicScraper):
class Shivae(_BasicScraper): class Shivae(_BasicScraper):
url = 'http://shivae.net/' url = 'http://shivae.net/'
rurl = escape(url)
stripUrl = url + 'blog/%s/' stripUrl = url + 'blog/%s/'
imageSearch = compile(tagre("img", "src", r'(http://shivae\.net/files/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://shivae\.net/blog/[^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
# XXX disallowed by robots.txt # XXX disallowed by robots.txt
class _Shortpacked(_BasicScraper): class _Shortpacked(_BasicScraper):
url = 'http://www.shortpacked.com/' url = 'http://www.shortpacked.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.shortpacked\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.shortpacked\.com/\d+/comic/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/comic/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/comic/book-nn/mm-name1/name2' help = 'Index format: yyyy/comic/book-nn/mm-name1/name2'
@ -189,9 +198,10 @@ class SkinDeep(_BasicScraper):
class SlightlyDamned(_BasicScraper): class SlightlyDamned(_BasicScraper):
url = 'http://www.sdamned.com/' url = 'http://www.sdamned.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.sdamned\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.sdamned\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/number' help = 'Index format: yyyy/mm/number'
@ -251,14 +261,15 @@ class SnowFlakes(_BasicScraper):
class SnowFlame(_BasicScraper): class SnowFlame(_BasicScraper):
url = 'http://www.snowflamecomic.com/' url = 'http://www.snowflamecomic.com/'
rurl = escape(url)
stripUrl = url + '?comic=snowflame-%s-%s' stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01') firstStripUrl = stripUrl % ('01', '01')
imageSearch = compile(tagre("img", "src", r'(http://www\.snowflamecomic\.com/wp-content/uploads/\d+/\d+/[^"]+)', after="Snow[Ff]lame the fan made")) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame the fan made"))
prevSearch = compile(tagre("span", "class", "mininav-prev") + prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(http://www\.snowflamecomic\.com/\?comic=snowflame[^"]+)')) tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))
starter = bounceStarter(url, starter = bounceStarter(url,
compile(tagre("span", "class", "mininav-next") + compile(tagre("span", "class", "mininav-next") +
tagre("a", "href", r'(http://www\.snowflamecomic\.com/\?comic=snowflame[^"]+)'))) tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
help = 'Index format: chapter-page' help = 'Index format: chapter-page'
def getStripIndexUrl(self, index): def getStripIndexUrl(self, index):
@ -276,25 +287,29 @@ class SnowFlame(_BasicScraper):
class SodiumEyes(_BasicScraper): class SodiumEyes(_BasicScraper):
url = 'http://sodiumeyes.com/' url = 'http://sodiumeyes.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^ ]+)', quote="")) imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class Sorcery101(_BasicScraper): class Sorcery101(_BasicScraper):
url = 'http://www.sorcery101.net/sorcery-101/' baseurl = 'http://www.sorcery101.net/'
url = baseurl + 'sorcery-101/'
rurl = escape(baseurl)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.sorcery101\.net/wp-content/uploads/\d+/\d+/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.sorcery101\.net/sorcery-101/[^"]+)', after="previous-")) prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-"))
help = 'Index format: stripname' help = 'Index format: stripname'
class SpaceTrawler(_BasicScraper): class SpaceTrawler(_BasicScraper):
url = 'http://spacetrawler.com/' url = 'http://spacetrawler.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://spacetrawler\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://spacetrawler\.com/\d+/\d+/\d+/[^"]+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -309,26 +324,29 @@ class SpareParts(_BasicScraper):
class Spinnerette(_BasicScraper): class Spinnerette(_BasicScraper):
url = 'http://www.spinnyverse.com/' url = 'http://www.spinnyverse.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.spinnyverse\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.spinnyverse\.com/[^"]+)', before="Previous Comic")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="Previous Comic"))
help = 'Index format: number' help = 'Index format: number'
class SPQRBlues(_BasicScraper): class SPQRBlues(_BasicScraper):
url = 'http://spqrblues.com/IV/' url = 'http://spqrblues.com/IV/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://spqrblues\.com/IV/comics/\d+\.png)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://spqrblues\.com/IV/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'
# XXX disallowed by robots.txt # XXX disallowed by robots.txt
class _StationV3(_BasicScraper): class _StationV3(_BasicScraper):
url = 'http://www.stationv3.com/' url = 'http://www.stationv3.com/'
rurl = escape(url)
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://www\.stationv3\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.stationv3\.com/d/\d+\.html)') + prevSearch = compile(tagre("a", "href", r'(%sd/\d+\.html)' % rurl) +
tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif')) tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif'))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -343,9 +361,10 @@ class StickyDillyBuns(_BasicScraper):
class Stubble(_BasicScraper): class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/' url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://stubblecomics\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://stubblecomics\.com/\?p=\d+)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: number' help = 'Index format: number'
@ -386,17 +405,19 @@ class StuffNoOneToldMe(_BasicScraper):
class StrawberryDeathCake(_BasicScraper): class StrawberryDeathCake(_BasicScraper):
url = 'http://strawberrydeathcake.com/' url = 'http://strawberrydeathcake.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/' stripUrl = url + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'
class SuburbanTribe(_BasicScraper): class SuburbanTribe(_BasicScraper):
url = 'http://www.pixelwhip.com/' url = 'http://www.pixelwhip.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pixelwhip\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.pixelwhip\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
@ -430,9 +451,10 @@ class StarCrossdDestiny(_BasicScraper):
class Spamusement(_BasicScraper): class Spamusement(_BasicScraper):
url = 'http://spamusement.com/' url = 'http://spamusement.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s' stripUrl = url + 'index.php/comics/view/%s'
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE) imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
prevSearch = compile(r'<a href="(http://spamusement.com/index.php/comics/view/.+?)">', IGNORECASE) prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, IGNORECASE)
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
@ -448,7 +470,8 @@ class _StrangeCandy(_BasicScraper):
class SupernormalStep(_BasicScraper): class SupernormalStep(_BasicScraper):
url = 'http://supernormalstep.com/' url = 'http://supernormalstep.com/'
rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://supernormalstep\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://supernormalstep\.com/\?p=\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
from ..util import tagre from ..util import tagre
@ -34,12 +34,13 @@ class TheNoob(_BasicScraper):
class TheOrderOfTheStick(_BasicScraper): class TheOrderOfTheStick(_BasicScraper):
url = 'http://www.giantitp.com/comics/oots0863.html' baseurl = 'http://www.giantitp.com/'
stripUrl = 'http://www.giantitp.com/comics/oots%s.html' url = baseurl + 'comics/oots0863.html'
stripUrl = baseurl + 'comics/oots%s.html'
imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">') imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"') prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter('http://www.giantitp.com/', compile(r'<A href="(/comics/oots\d{4}\.html)"')) starter = indirectStarter(baseurl, compile(r'<A href="(/comics/oots\d{4}\.html)"'))
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
@ -47,8 +48,9 @@ class TheOrderOfTheStick(_BasicScraper):
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
url = 'http://plif.courageunfettered.com/archive/arch2002.htm' baseurl = 'http://plif.courageunfettered.com/'
stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm' url = baseurl + 'archive/arch2002.htm'
stripUrl = baseurl + 'archive/arch%s.htm'
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">') imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}') prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
@ -65,19 +67,21 @@ class TheWotch(_BasicScraper):
class ThisIsIndexed(_BasicScraper): class ThisIsIndexed(_BasicScraper):
url = 'http://thisisindexed.com/' url = 'http://thisisindexed.com/'
rurl = escape(url)
stripUrl = url + 'page/%s' stripUrl = url + 'page/%s'
imageSearch = compile(tagre("img", "src", r'(http://thisisindexed\.com/wp-content/uploads/\d+/\d+/card[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/card[^"]+)' % rurl))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("div", "class", "nav-previous") + prevSearch = compile(tagre("div", "class", "nav-previous") +
tagre("a", "href", r'(http://thisisindexed\.com/page/\d+/)')) tagre("a", "href", r'(%spage/\d+/)' % rurl))
help = 'Index format: number' help = 'Index format: number'
class ThunderAndLightning(_BasicScraper): class ThunderAndLightning(_BasicScraper):
url = 'http://www.talcomic.com/wp/' url = 'http://www.talcomic.com/wp/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
prevSearch = compile(tagre("a", "href", r'(http://www\.talcomic\.com/wp/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%swp/[^"]+)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(http://www\.talcomic\.com/wp/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%swp/comics/[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/page-nn' help = 'Index format: yyyy/mm/dd/page-nn'
@classmethod @classmethod
@ -87,17 +91,19 @@ class ThunderAndLightning(_BasicScraper):
class TinyKittenTeeth(_BasicScraper): class TinyKittenTeeth(_BasicScraper):
url = 'http://www.tinykittenteeth.com/' url = 'http://www.tinykittenteeth.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname (unpadded)' help = 'Index format: yyyy/mm/dd/stripname (unpadded)'
class ToonHole(_BasicScraper): class ToonHole(_BasicScraper):
url = 'http://www.toonhole.com/' url = 'http://www.toonhole.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.toonhole\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.toonhole\.com/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/stripname' help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url): def shouldSkipUrl(self, url):
@ -115,9 +121,10 @@ class _TwoLumps(_BasicScraper):
class TwoTwoOneFour(_BasicScraper): class TwoTwoOneFour(_BasicScraper):
url = 'http://www.nitrocosm.com/go/2214_classic/' url = 'http://www.nitrocosm.com/go/2214_classic/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/[^"]+)', before="gallery_display")) imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/[^"]+)', before="gallery_display"))
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/2214_classic/\d+/)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(%s\d+/)' % rurl, after="Previous"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -131,32 +138,37 @@ class TheWhiteboard(_BasicScraper):
class HMHigh(_BasicScraper): class HMHigh(_BasicScraper):
name = 'TheFallenAngel/HMHigh' name = 'TheFallenAngel/HMHigh'
url = 'http://www.thefallenangel.co.uk/hmhigh/' baseurl = 'http://www.thefallenagel.co.uk/'
url = baseurl + 'hmhigh/'
rurl = escape(baseurl)
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
imageSearch = compile(r'<img src="(http://www.thefallenangel.co.uk/hmhigh/img/comic/.+?)"') imageSearch = compile(r'<img src="(%shmhigh/img/comic/.+?)"' % rurl)
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)" title=".+?">Prev</a>') prevSearch = compile(r' <a href="(%s.+?)" title=".+?">Prev</a>' % rurl)
help = 'Index format: nnn' help = 'Index format: nnn'
class TheOuterQuarter(_BasicScraper): class TheOuterQuarter(_BasicScraper):
url = 'http://theouterquarter.com/' url = 'http://theouterquarter.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://theouterquarter.com/comics/.+?)"') imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">') prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn' help = 'Index format: nnn'
class ThreePanelSoul(_BasicScraper): class ThreePanelSoul(_BasicScraper):
url = 'http://threepanelsoul.com/' url = 'http://threepanelsoul.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://threepanelsoul\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://threepanelsoul\.com/\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class TracyAndTristan(_BasicScraper): class TracyAndTristan(_BasicScraper):
url = 'http://tandt.thecomicseries.com/' url = 'http://tandt.thecomicseries.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s' stripUrl = url + 'comics/%s'
imageSearch = compile(tagre("img", "src", r'(http://tandt\.thecomicseries\.com/images/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%simages/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(/comics/\d+)', after="prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
@ -11,8 +11,9 @@ from ..helpers import indirectStarter
class WapsiSquare(_BasicScraper): class WapsiSquare(_BasicScraper):
url = 'http://wapsisquare.com/' url = 'http://wapsisquare.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
imageSearch = compile(r'<img src="(http://wapsisquare.com/comics/.+?)"') imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>') prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
help = 'Index format: stripname' help = 'Index format: stripname'
@ -35,8 +36,8 @@ class WayfarersMoon(_BasicScraper):
class WebDesignerCOTW(_BasicScraper): class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/' url = 'http://www.webdesignerdepot.com/'
starter = indirectStarter(url, rurl = escape(url)
compile(tagre("a", "href", r'(http://www\.webdesignerdepot\.com/\d+/\d+/[^"]+/)'))) starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1' firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = ( imageSearch = (
@ -46,7 +47,7 @@ class WebDesignerCOTW(_BasicScraper):
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')), compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
) )
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(http://www\.webdesignerdepot\.com/\d+/\d+/[^']+)", before='prev', quote="'")) prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'"))
help = 'Index format: yyyy/mm/stripname' help = 'Index format: yyyy/mm/stripname'
description = "The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist whos one of the most published, prolific and versatile cartoonists in the world today." description = "The content revolves around web design, blogging and funny situations that we encounter in our daily lives as designers and this week we focus on Christmas. These great cartoons are created by Jerry King, an award-winning cartoonist whos one of the most published, prolific and versatile cartoonists in the world today."
@ -63,24 +64,28 @@ class WebDesignerCOTW(_BasicScraper):
class WeCanSleepTomorrow(_BasicScraper): class WeCanSleepTomorrow(_BasicScraper):
url = 'http://wecansleeptomorrow.com/' url = 'http://wecansleeptomorrow.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class WhiteNinja(_BasicScraper): class WhiteNinja(_BasicScraper):
url = 'http://www.whiteninjacomics.com/comics.shtml' baseurl = 'http://www.whiteninjacomics.com/'
stripUrl = 'http://www.whiteninjacomics.com/comics/%s.shtml' url = baseurl + 'comics.shtml'
stripUrl = baseurl + 'comics/%s.shtml'
imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0') imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
prevSearch = compile(r'(/comics/.+?shtml).+?previous') prevSearch = compile(r'(/comics/.+?shtml).+?previous')
help = 'Index format: s (comic name)' help = 'Index format: s (comic name)'
class WhyTheLongFace(_BasicScraper): class WhyTheLongFace(_BasicScraper):
url = 'http://www.absurdnotions.org/wtlf200709.html' baseurl = 'http://www.absurdnotions.org/'
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html' rurl = escape(baseurl)
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE) url = baseurl + 'wtlf200709.html'
stripUrl = baseurl + 'wtlf%s.html'
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE)
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ') prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm' help = 'Index format: yyyymm'
@ -96,9 +101,10 @@ class Wigu(_BasicScraper):
class Wonderella(_BasicScraper): class Wonderella(_BasicScraper):
url = 'http://nonadventures.com/' url = 'http://nonadventures.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://nonadventures\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://nonadventures\.com/\d+/\d+/\d+/[^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -198,8 +204,9 @@ class Wulffmorgenthaler(_BasicScraper):
class WhiteNoise(_BasicScraper): class WhiteNoise(_BasicScraper):
url = 'http://www.wncomic.com/archive.php' baseurl = 'http://www.wncomic.com/'
stripUrl = 'http://www.wncomic.com/archive_comments.php?strip_id=%s' url = baseurl + 'archive.php'
stripUrl = baseurl + 'archive_comments.php?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"') imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ') prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
help = 'Index format: n' help = 'Index format: n'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre from ..util import tagre
from ..helpers import bounceStarter from ..helpers import bounceStarter
@ -10,9 +10,10 @@ from ..helpers import bounceStarter
class ZapComic(_BasicScraper): class ZapComic(_BasicScraper):
url = 'http://www.zapcomic.com/' url = 'http://www.zapcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.zapcomic\.com\?comic_object=\d+)')) imageSearch = compile(tagre("img", "src", r'(%s\?comic_object=\d+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(http://www\.zapcomic\.com/[^"]+)', after="previous-comic-link")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="previous-comic-link"))
help = 'Index format: yyyy/mm/nnn-stripname' help = 'Index format: yyyy/mm/nnn-stripname'
@ -41,9 +42,10 @@ class ZebraGirl(_BasicScraper):
class ZenPencils(_BasicScraper): class ZenPencils(_BasicScraper):
url = 'http://zenpencils.com/' url = 'http://zenpencils.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % '1-ralph-waldo-emerson-make-them-cry' firstStripUrl = stripUrl % '1-ralph-waldo-emerson-make-them-cry'
prevSearch = compile(tagre("a", "href", r'(http://zenpencils\.com/comic/[^"]+/)', after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+/)' % rurl, after="navi-prev"))
imageSearch = compile(tagre("img", "src", r'(http://maxcdn\.zenpencils\.com/comics/\d+-\d+-\d+[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://maxcdn\.zenpencils\.com/comics/\d+-\d+-\d+[^"]+)'))
help = 'Index format: num-stripname' help = 'Index format: num-stripname'
description = u'Inspirational quotes from famous people adapted into cartoons.' description = u'Inspirational quotes from famous people adapted into cartoons.'
@ -59,13 +61,14 @@ class ZombieHunters(_BasicScraper):
class Zwarwald(_BasicScraper): class Zwarwald(_BasicScraper):
url = "http://www.zwarwald.de/" url = "http://www.zwarwald.de/"
rurl = escape(url)
stripUrl = url + 'index.php/page/%s/' stripUrl = url + 'index.php/page/%s/'
# anything before page 495 seems to be flash # anything before page 495 seems to be flash
firstStripUrl = stripUrl % '495' firstStripUrl = stripUrl % '495'
lang = 'de' lang = 'de'
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.zwarwald\.de|wp1163540.wp190.webpack.hosteurope.de/wordpress)/images/\d+/\d+/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://(?:www\.zwarwald\.de|wp1163540.wp190.webpack.hosteurope.de/wordpress)/images/\d+/\d+/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.zwarwald\.de/index\.php/page/\d+/)') + prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'")) tagre("img", "src", r'%simages/prev\.jpg' % rurl, quote="'"))
help = 'Index format: number' help = 'Index format: number'
waitSeconds = 1 waitSeconds = 1