Sort comics alphabetically & PEP8 style fixes.

This commit is contained in:
Tobias Gruetzmacher 2016-03-31 23:13:54 +02:00
parent dda920f353
commit 7f1e136d8b
17 changed files with 1186 additions and 1031 deletions

View file

@ -1,31 +1,23 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, MULTILINE from re import compile, escape, MULTILINE
from ..util import tagre from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import regexNamer, bounceStarter, indirectStarter from ..helpers import regexNamer, bounceStarter, indirectStarter
class AbsurdNotions(_BasicScraper):
baseUrl = 'http://www.absurdnotions.org/'
url = baseUrl + 'page129.html'
stripUrl = baseUrl + 'page%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif'))
help = 'Index format: n (unpadded)'
class AbstruseGoose(_BasicScraper): class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/' url = 'http://abstrusegoose.com/'
rurl = escape(url) rurl = escape(url)
starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »"))
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) imageSearch = compile(tagre('img', 'src',
r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous') prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous')
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next &raquo;') nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next &raquo;')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@ -38,12 +30,25 @@ class AbstruseGoose(_BasicScraper):
return 'c%03d-%s' % (index, name) return 'c%03d-%s' % (index, name)
class AbsurdNotions(_BasicScraper):
baseUrl = 'http://www.absurdnotions.org/'
url = baseUrl + 'page129.html'
stripUrl = baseUrl + 'page%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(an[^"]+)'))
multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', r'([^"]+)') +
tagre('img', 'src', 'nprev\.gif'))
help = 'Index format: n (unpadded)'
class AcademyVale(_BasicScraper): class AcademyVale(_BasicScraper):
url = 'http://www.imagerie.com/vale/' url = 'http://www.imagerie.com/vale/'
stripUrl = url + 'avarch.cgi?%s' stripUrl = url + 'avarch.cgi?%s'
firstStripUrl = stripUrl % '001' firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)'))
prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") +
tagre('img', 'src', 'AVNavBack\.gif'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -52,7 +57,8 @@ class Achewood(_BasicScraper):
stripUrl = url + 'index.php?date=%s' stripUrl = url + 'index.php?date=%s'
firstStripUrl = stripUrl % '00000000' firstStripUrl = stripUrl % '00000000'
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)',
after="Previous"))
help = 'Index format: mmddyyyy' help = 'Index format: mmddyyyy'
namer = regexNamer(compile(r'date=(\d+)')) namer = regexNamer(compile(r'date=(\d+)'))
@ -70,8 +76,7 @@ class AfterStrife(_BasicScraper):
class AGirlAndHerFed(_BasicScraper): class AGirlAndHerFed(_BasicScraper):
url = 'http://www.agirlandherfed.com/' url = 'http://www.agirlandherfed.com/'
starter = bounceStarter(url, starter = bounceStarter(url, compile(r'<a href="([^"]+)">[^>]+Back'))
compile(r'<a href="([^"]+)">[^>]+Back'))
stripUrl = url + '1.%s.html' stripUrl = url + '1.%s.html'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
@ -79,24 +84,16 @@ class AGirlAndHerFed(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class AhoyEarth(_ParserScraper):
url = 'http://www.ahoyearth.com/'
rurl = escape(url)
stripUrl = url + '%s/'
css = True
imageSearch = '#comic-1 img'
prevSearch = '.navi-prev'
help = 'Index format: ddmmyyyy'
class AhoiPolloi(_BasicScraper): class AhoiPolloi(_BasicScraper):
url = 'http://ahoipolloi.blogger.de/' url = 'http://ahoipolloi.blogger.de/'
stripUrl = url + '?day=%s' stripUrl = url + '?day=%s'
firstStripUrl = stripUrl % '20060306' firstStripUrl = stripUrl % '20060306'
multipleImagesPerStrip = True multipleImagesPerStrip = True
lang = 'de' lang = 'de'
imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)')) imageSearch = compile(tagre('img', 'src',
prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)')) r'(/static/antville/ahoipolloi/images/[^"]+)'))
prevSearch = compile(tagre('a', 'href',
r'(http://ahoipolloi\.blogger\.de/\?day=\d+)'))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@classmethod @classmethod
@ -104,6 +101,15 @@ class AhoiPolloi(_BasicScraper):
return imageUrl.rsplit('/', 1)[1] return imageUrl.rsplit('/', 1)[1]
class AhoyEarth(_ParserScraper):
url = 'http://www.ahoyearth.com/'
stripUrl = url + '%s/'
css = True
imageSearch = '#comic-1 img'
prevSearch = '.navi-prev'
help = 'Index format: ddmmyyyy'
class AirForceBlues(_BasicScraper): class AirForceBlues(_BasicScraper):
url = 'http://www.afblues.com/' url = 'http://www.afblues.com/'
stripUrl = url + 'wordpress/%s/' stripUrl = url + 'wordpress/%s/'
@ -115,7 +121,8 @@ class AirForceBlues(_BasicScraper):
class ALessonIsLearned(_BasicScraper): class ALessonIsLearned(_BasicScraper):
url = 'http://www.alessonislearned.com/' url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
quote="'")+r"[^>]+previous")
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s' stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
@ -127,7 +134,8 @@ class AlienLovesPredator(_BasicScraper):
url = 'http://alienlovespredator.com/' url = 'http://alienlovespredator.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay' firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay'
imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) imageSearch = compile(tagre("img", "src", r'([^"]+)',
after='border="1" alt="" width="750"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -244,10 +252,13 @@ class Antics(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '3' firstStripUrl = stripUrl % '3'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev')) r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after='prev'))
help = 'Index format: number' help = 'Index format: number'
class AoiHouse(_ParserScraper): class AoiHouse(_ParserScraper):
url = 'http://www.aoihouse.net/' url = 'http://www.aoihouse.net/'
imageSearch = '//div[@id="comic"]/a[2]/img' imageSearch = '//div[@id="comic"]/a[2]/img'
@ -270,10 +281,19 @@ class ARedTailsDream(_BasicScraper):
url = baseUrl + 'comic/recent.php' url = baseUrl + 'comic/recent.php'
imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)'))
prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') +
tagre("img", "src", r'.*?aprev.*?')) tagre("img", "src", r'.*?aprev.*?'))
help = 'Index format: nn' help = 'Index format: nn'
class ASkeweredParadise(_BasicScraper):
url = 'http://aspcomics.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
help = 'Index format: nnn'
class ASofterWorld(_ParserScraper): class ASofterWorld(_ParserScraper):
url = 'http://www.asofterworld.com/' url = 'http://www.asofterworld.com/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
@ -283,7 +303,6 @@ class ASofterWorld(_ParserScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class AstronomyPOTD(_BasicScraper): class AstronomyPOTD(_BasicScraper):
baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/'
url = baseUrl + 'astropix.html' url = baseUrl + 'astropix.html'
@ -299,22 +318,13 @@ class AstronomyPOTD(_BasicScraper):
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return url in (
self.stripUrl % '130217', # video self.stripUrl % '130217', # video
self.stripUrl % '130218', # video self.stripUrl % '130218', # video
self.stripUrl % '130226', # video self.stripUrl % '130226', # video
self.stripUrl % '130424', # video self.stripUrl % '130424', # video
) )
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:],
imageUrl.split('/')[-1].split('.')[0]) imageUrl.split('/')[-1].split('.')[0])
class ASkeweredParadise(_BasicScraper):
url = 'http://aspcomics.net/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '001'
imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+'))
prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous")
help = 'Index format: nnn'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..util import tagre, getPageContent from ..util import tagre, getPageContent
@ -22,7 +24,8 @@ class BadassMuthas(_BasicScraper):
stripUrl = url + '?%s' stripUrl = url + '?%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) prevSearch = compile(tagre("a", "href", r'([^"]+)') +
tagre("img", "src", r'/images/comicsbuttonBack\.gif'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -51,7 +54,8 @@ class Bearmageddon(_BasicScraper):
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2011/08/01/page-1' firstStripUrl = stripUrl % '2011/08/01/page-1'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev')) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after='navi-prev'))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -63,7 +67,9 @@ class Beetlebum(_BasicScraper):
starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark'))) starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
multipleImagesPerStrip = True multipleImagesPerStrip = True
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)')) imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl, after='prev')) prevSearch = compile(tagre('a', 'href',
r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
after='prev'))
help = 'Index format: yyyy/mm/dd/striptitle' help = 'Index format: yyyy/mm/dd/striptitle'
lang = 'de' lang = 'de'
@ -71,7 +77,7 @@ class Beetlebum(_BasicScraper):
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:]) indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
name = '%s-%s-%s-%s' % indexes name = '%s-%s-%s-%s' % indexes
name = name + '_' + imageUrl.split( '/' )[-1] name = name + '_' + imageUrl.split('/')[-1]
return name return name
@ -89,14 +95,16 @@ class BetweenFailures(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'comics1/%s' stripUrl = url + 'comics1/%s'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl, after="previous")) prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl,
after="previous"))
help = 'Index format: stripname' help = 'Index format: stripname'
class BigFatWhale(_BasicScraper): class BigFatWhale(_BasicScraper):
url = 'http://www.bigfatwhale.com/' url = 'http://www.bigfatwhale.com/'
stripUrl = url + 'archives/bfw_%s.htm' stripUrl = url + 'archives/bfw_%s.htm'
imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)')) imageSearch = compile(tagre("img", "src",
r'(archives/bfw_[^"]+|bfw_[^"]+)'))
prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"') prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"')
help = 'Index format: nnn' help = 'Index format: nnn'
@ -125,7 +133,8 @@ class BizarreUprising(_BasicScraper):
stripUrl = url + 'view/%s' stripUrl = url + 'view/%s'
firstStripUrl = stripUrl % '1/awakening-splash' firstStripUrl = stripUrl % '1/awakening-splash'
imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif')) prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') +
tagre("img", "src", r'images/b_prev\.gif'))
help = 'Index format: n/name' help = 'Index format: n/name'
@ -133,7 +142,8 @@ class BlankIt(_BasicScraper):
url = 'http://blankitcomics.com/' url = 'http://blankitcomics.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '0001' firstStripUrl = stripUrl % '0001'
imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)')) imageSearch = compile(tagre("img", "src",
r'(http://blankitcomics\.com/bicomics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -151,26 +161,28 @@ class Blip(_BasicScraper):
if prevUrl: if prevUrl:
return prevUrl.replace("www.blipcomic.com", "blipcomic.com") return prevUrl.replace("www.blipcomic.com", "blipcomic.com")
class BloomingFaeries(_BasicScraper): class BloomingFaeries(_BasicScraper):
adult = True adult = True
url = 'http://www.bloomingfaeries.com/' url = 'http://www.bloomingfaeries.com/'
rurl = escape(url)
stripUrl = url + 'comic/public/%s/' stripUrl = url + 'comic/public/%s/'
firstStripUrl = stripUrl % "pit-stop" firstStripUrl = stripUrl % "pit-stop"
imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title')) imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='comic-nav-base comic-nav-previous')) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after='comic-nav-base comic-nav-previous'))
help = 'Index format: stripname' help = 'Index format: stripname'
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
bf = imageUrl.split('/') bf = imageUrl.split('/')
name = bf[-1] name = bf[-1]
re = compile(tagre("div","class",r'comic-id-([^"]+)')) re = compile(tagre("div", "class", r'comic-id-([^"]+)'))
content = getPageContent(pageUrl, cls.session) content = getPageContent(pageUrl, cls.session)
match = re.search(content) match = re.search(content)
if not match: if not match:
return None return None
return "BF%s_%s" % (match.group(1),name) return "BF%s_%s" % (match.group(1), name)
class BMovieComic(_BasicScraper): class BMovieComic(_BasicScraper):
url = 'http://www.bmoviecomic.com/' url = 'http://www.bmoviecomic.com/'
@ -187,7 +199,9 @@ class BobWhite(_BasicScraper):
stripUrl = url + '?webcomic_post=%s' stripUrl = url + '?webcomic_post=%s'
firstStripUrl = stripUrl % '20110504' firstStripUrl = stripUrl % '20110504'
imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl)) imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl))
prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous') prevSearch = compile(tagre("a", "href",
"(%s\?webcomic_post=\d+)" % rurl) +
r'[^"]+Previous')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -214,9 +228,11 @@ class BoxerHockey(_BasicScraper):
url = 'http://boxerhockey.fireball20xl.com/' url = 'http://boxerhockey.fireball20xl.com/'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
firstStripUrl = stripUrl % '56' firstStripUrl = stripUrl % '56'
imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg")) imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)',
prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') + after="comicimg"))
r'[^>]+Previous') prevSearch = compile(tagre("a", "href",
r'(http://www\.boxerhockey\.com/\?id=\d+)') +
r'[^>]+Previous')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod @classmethod
@ -230,7 +246,8 @@ class BoyOnAStickAndSlither(_BasicScraper):
stripUrl = url + 'page/%s' stripUrl = url + 'page/%s'
firstStripUrl = stripUrl % '2' firstStripUrl = stripUrl % '2'
imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)')) imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "<span>Next page") prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
"<span>Next page")
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod @classmethod
@ -276,16 +293,6 @@ class BrentalFlossGuest(BrentalFloss):
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
class Brink(_BasicScraper):
url = 'http://paperfangs.com/brink/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '5'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: number'
class BrightlyWound(_BasicScraper): class BrightlyWound(_BasicScraper):
baseUrl = 'http://www.brightlywound.com/' baseUrl = 'http://www.brightlywound.com/'
url = baseUrl + '?comic=137' url = baseUrl + '?comic=137'
@ -296,6 +303,16 @@ class BrightlyWound(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class Brink(_BasicScraper):
url = 'http://paperfangs.com/brink/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '5'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: number'
class ButtercupFestival(_ParserScraper): class ButtercupFestival(_ParserScraper):
url = 'http://www.buttercupfestival.com/' url = 'http://www.buttercupfestival.com/'
stripUrl = url + '%s.htm' stripUrl = url + '%s.htm'
@ -305,16 +322,6 @@ class ButtercupFestival(_ParserScraper):
help = 'Index format: 2-number' help = 'Index format: 2-number'
class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ButternutSquash(_BasicScraper): class ButternutSquash(_BasicScraper):
url = 'http://www.butternutsquash.net/' url = 'http://www.butternutsquash.net/'
rurl = escape(url) rurl = escape(url)
@ -323,3 +330,14 @@ class ButternutSquash(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/strip-name-author-name' help = 'Index format: yyyy/mm/dd/strip-name-author-name'
class ButterSafe(_BasicScraper):
url = 'http://buttersafe.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl,
after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'

View file

@ -1,9 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015 Tobias Gruetzmacher # Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
@ -12,6 +12,7 @@ from ..helpers import bounceStarter, indirectStarter
from ..util import tagre from ..util import tagre
from .wordpress import _WordpressScraper from .wordpress import _WordpressScraper
class Caggage(_BasicScraper): class Caggage(_BasicScraper):
url = 'http://caggagecomic.com/' url = 'http://caggagecomic.com/'
rurl = escape(url) rurl = escape(url)
@ -21,6 +22,7 @@ class Caggage(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev"))
help = 'Index format: number' help = 'Index format: number'
class CampComic(_BasicScraper): class CampComic(_BasicScraper):
url = 'http://campcomic.com/comic/' url = 'http://campcomic.com/comic/'
rurl = escape(url) rurl = escape(url)
@ -30,24 +32,28 @@ class CampComic(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev"))
help = 'Index Format: number' help = 'Index Format: number'
class CaptainSNES(_BasicScraper): class CaptainSNES(_BasicScraper):
url = 'http://www.captainsnes.com/' url = 'http://www.captainsnes.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2001/07/10/the-mistake' firstStripUrl = stripUrl % '2001/07/10/the-mistake'
imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, quote="'")) imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl,
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("span", "class", "prev")) quote="'"))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
tagre("span", "class", "prev"))
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/dd/nnn-stripname' help = 'Index format: yyyy/mm/dd/nnn-stripname'
class Carciphona(_BasicScraper): class Carciphona(_BasicScraper):
url = 'http://carciphona.com/' url = 'http://carciphona.com/'
stripUrl = url + 'view.php?page=%s&chapter=%s' imageSearch = compile(tagre("div", "style",
imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)')) r'background-image:url\((_pages[^)]*)\)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea")) prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)',
latestSearch = compile(tagre("a", "href", r'(view\.php\?page=[0-9]+[^"]*)')) after="prevarea"))
help = 'Index format: None' latestSearch = compile(tagre("a", "href",
r'(view\.php\?page=[0-9]+[^"]*)'))
starter = indirectStarter(url, latestSearch) starter = indirectStarter(url, latestSearch)
@classmethod @classmethod
@ -61,8 +67,8 @@ class CaseyAndAndy(_BasicScraper):
stripUrl = url + 'view.php?strip=%s' stripUrl = url + 'view.php?strip=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)')) imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') +
+ tagre("img", "src", r'previous\.gif')) tagre("img", "src", r'previous\.gif'))
help = 'Index format: number' help = 'Index format: number'
@ -70,8 +76,10 @@ class CasuallyKayla(_BasicScraper):
url = 'http://casuallykayla.com/' url = 'http://casuallykayla.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '89' firstStripUrl = stripUrl % '89'
imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)')) r'(http://casuallykayla\.com/comics/[^"]+)'))
prevSearch = compile(tagre("div", "class", r'nav-previous') +
tagre("a", "href", r'([^"]+)'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -82,7 +90,9 @@ class Catalyst(_BasicScraper):
stripUrl = baseUrl + "comic.php?comic_id=%s" stripUrl = baseUrl + "comic.php?comic_id=%s"
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl))
prevSearch = compile("<center>" + tagre("a", "href", r'(%scomic\.php\?comic_id=\d+)' % rurl)) prevSearch = compile("<center>" +
tagre("a", "href",
r'(%scomic\.php\?comic_id=\d+)' % rurl))
help = 'Index format: number' help = 'Index format: number'
@ -101,10 +111,12 @@ class CatAndGirl(_BasicScraper):
self.stripUrl % '4299', self.stripUrl % '4299',
) )
class CatNine(_WordpressScraper): class CatNine(_WordpressScraper):
url = 'http://cat-nine.net' url = 'http://cat-nine.net'
firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/' firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/'
class CatVersusHuman(_ParserScraper): class CatVersusHuman(_ParserScraper):
url = 'http://www.catversushuman.com' url = 'http://www.catversushuman.com'
multipleImagesPerStrip = True multipleImagesPerStrip = True
@ -130,7 +142,8 @@ class Champ2010(_BasicScraper):
stripUrl = baseUrl + '%s.html' stripUrl = baseUrl + '%s.html'
firstStripUrl = stripUrl % 'champ1-1-10-fuck' firstStripUrl = stripUrl % 'champ1-1-10-fuck'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="Previous"))
help = 'Index format: yy-dd-mm' help = 'Index format: yy-dd-mm'
@ -138,8 +151,10 @@ class ChannelAte(_BasicScraper):
url = 'http://www.channelate.com/' url = 'http://www.channelate.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href",
r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -186,13 +201,15 @@ class CigarroAndCerveja(_ParserScraper):
imageSearch = '//div[@id="comic"]//img', imageSearch = '//div[@id="comic"]//img',
prevSearch = '//a[contains(text()," Prev")]', prevSearch = '//a[contains(text()," Prev")]',
class Collar6(_BasicScraper): class Collar6(_BasicScraper):
url = 'http://collar6.com/' url = 'http://collar6.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'archive/%s' stripUrl = url + 'archive/%s'
firstStripUrl = stripUrl % 'collar-6-187' firstStripUrl = stripUrl % 'collar-6-187'
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl,
after="previous"))
help = 'Index format: <name>' help = 'Index format: <name>'
@ -211,7 +228,8 @@ class Commissioned(_BasicScraper):
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '139' firstStripUrl = stripUrl % '139'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="prev"))
help = 'Index format: n' help = 'Index format: n'
@ -222,7 +240,7 @@ class CompanyY(_BasicScraper):
firstStripUrl = stripUrl % '2009/08/14/coming-soon' firstStripUrl = stripUrl % '2009/08/14/coming-soon'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("div", "class", r"nav-previous") + prevSearch = compile(tagre("div", "class", r"nav-previous") +
tagre("a", "href", r'(%s[^"]+)' % rurl)) tagre("a", "href", r'(%s[^"]+)' % rurl))
help = 'Index format: yyyy/mm/dd/strip-name' help = 'Index format: yyyy/mm/dd/strip-name'
@ -268,6 +286,15 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper):
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
class CrimsonDark(_BasicScraper):
url = 'http://www.davidcsimon.com/crimsondark/'
stripUrl = url + 'index.php?view=comic&strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'src="(.+?strips/.+?)"')
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&amp;strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
help = 'Index format: n (unpadded)'
class CtrlAltDel(_BasicScraper): class CtrlAltDel(_BasicScraper):
url = 'http://www.cad-comic.com/cad/' url = 'http://www.cad-comic.com/cad/'
stripUrl = url + '%s' stripUrl = url + '%s'
@ -290,22 +317,13 @@ class CtrlAltDelSillies(CtrlAltDel):
stripUrl = url + '%s' stripUrl = url + '%s'
class CrimsonDark(_BasicScraper):
url = 'http://www.davidcsimon.com/crimsondark/'
stripUrl = url + 'index.php?view=comic&strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'src="(.+?strips/.+?)"')
prevSearch = compile(r'<a href=[\'"](/crimsondark/index\.php\?view=comic&amp;strip_id=\d+)[\'"]><img src=[\'"]themes/cdtheme/images/active_prev.png[\'"]')
help = 'Index format: n (unpadded)'
class CucumberQuest(_BasicScraper): class CucumberQuest(_BasicScraper):
url = 'http://cucumber.gigidigi.com/' url = 'http://cucumber.gigidigi.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'cq/%s/' stripUrl = url + 'cq/%s/'
firstStripUrl = stripUrl % 'page-1' firstStripUrl = stripUrl % 'page-1'
starter = indirectStarter(url + 'recent.html', starter = indirectStarter(url + 'recent.html',
compile(r'window\.location="(/cq/[^"]+/)"')) compile(r'window\.location="(/cq/[^"]+/)"'))
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..helpers import indirectStarter from ..helpers import indirectStarter
@ -18,13 +20,16 @@ class EarthsongSaga(_ParserScraper):
@classmethod @classmethod
def fetchUrls(cls, url, data, urlSearch): def fetchUrls(cls, url, data, urlSearch):
urls = super(EarthsongSaga, cls).fetchUrls(url, data, urlSearch) urls = super(EarthsongSaga, cls).fetchUrls(url, data, urlSearch)
return [x.replace('earthsongsaga.com/../', 'earthsongsaga.com/') for x in urls] return [x.replace('earthsongsaga.com/../',
'earthsongsaga.com/') for x in urls]
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', IGNORECASE).search(imageUrl) imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
IGNORECASE).search(imageUrl)
if not imgmatch: if not imgmatch:
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$', IGNORECASE).search(imageUrl) imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
IGNORECASE).search(imageUrl)
suffix = "cover" suffix = "cover"
else: else:
suffix = "" suffix = ""
@ -33,16 +38,18 @@ class EarthsongSaga(_ParserScraper):
int(imgmatch.group(3)), suffix) int(imgmatch.group(3)), suffix)
class EatLiver(_BasicScraper): class EatLiver(_BasicScraper):
url = 'http://www.eatliver.com/' url = 'http://www.eatliver.com/'
rurl = escape(url) rurl = escape(url)
starter = indirectStarter(url, compile(tagre("a", "href", r'(i\.php\?n=\d+)') + starter = indirectStarter(url, compile(
tagre("a", "href", r'(i\.php\?n=\d+)') +
tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br")) tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br"))
stripUrl = url + "i.php?n=%s" stripUrl = url + "i.php?n=%s"
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl, before="image_src")) imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') + "&#060;&#060; Previous") before="image_src"))
prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
"&#060;&#060; Previous")
class EatThatToast(_BasicScraper): class EatThatToast(_BasicScraper):
@ -64,6 +71,7 @@ class EdibleDirt(_BasicScraper):
prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)")+"Previous") prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)")+"Previous")
help = 'Index format: number' help = 'Index format: number'
class EdmundFinney(_ParserScraper): class EdmundFinney(_ParserScraper):
url = 'http://eqcomics.com/' url = 'http://eqcomics.com/'
firstStripUrl = url + '2009/03/08/sunday-aliens/' firstStripUrl = url + '2009/03/08/sunday-aliens/'
@ -71,6 +79,7 @@ class EdmundFinney(_ParserScraper):
prevSearch = '//a[@class="navi navi-prev"]' prevSearch = '//a[@class="navi navi-prev"]'
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class EerieCuties(_BasicScraper): class EerieCuties(_BasicScraper):
url = 'http://www.eeriecuties.com/' url = 'http://www.eeriecuties.com/'
stripUrl = url + 'strips-ec/%s' stripUrl = url + 'strips-ec/%s'
@ -79,37 +88,13 @@ class EerieCuties(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class Eriadan(_BasicScraper):
url = 'http://www.shockdom.com/webcomics/eriadan/'
rurl = escape(url)
stripUrl = url + '%s/'
multipleImagesPerStrip = True
imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl, after='width="[68]00"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
)
class Erstwhile(_ParserScraper):
url = 'http://www.erstwhiletales.com/'
stripUrl = url + '%s/'
css = True
imageSearch = 'div.comicpane a img'
prevSearch = 'a.navi-prev'
help = 'Index format: title-nn'
class ElfOnlyInn(_BasicScraper): class ElfOnlyInn(_BasicScraper):
url = 'http://www.elfonlyinn.net/' url = 'http://www.elfonlyinn.net/'
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'
firstStripUrl = stripUrl % '20020523' firstStripUrl = stripUrl % '20020523'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
tagre("img", "src", r'/images/previous_day\.gif')) tagre("img", "src", r'/images/previous_day\.gif'))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -117,8 +102,10 @@ class ElGoonishShive(_BasicScraper):
name = 'KeenSpot/ElGoonishShive' name = 'KeenSpot/ElGoonishShive'
url = 'http://www.egscomics.com/' url = 'http://www.egscomics.com/'
stripUrl = url + 'index.php?id=%s' stripUrl = url + 'index.php?id=%s'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)', after="comic")) imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)',
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev")) after="comic"))
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)',
after="prev"))
help = 'Index format: number' help = 'Index format: number'
@ -126,8 +113,10 @@ class ElGoonishShiveNP(_BasicScraper):
name = 'KeenSpot/ElGoonishShiveNP' name = 'KeenSpot/ElGoonishShiveNP'
url = 'http://www.egscomics.com/egsnp.php' url = 'http://www.egscomics.com/egsnp.php'
stripUrl = url + '?id=%s' stripUrl = url + '?id=%s'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)', after="comic")) imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)',
prevSearch = compile(tagre("a", "href", r'(/egsnp\.php\?id=\d+)', after="prev")) after="comic"))
prevSearch = compile(tagre("a", "href", r'(/egsnp\.php\?id=\d+)',
after="prev"))
help = 'Index format: number' help = 'Index format: number'
@ -150,6 +139,22 @@ class EmergencyExit(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class Eriadan(_BasicScraper):
url = 'http://www.shockdom.com/webcomics/eriadan/'
rurl = escape(url)
stripUrl = url + '%s/'
multipleImagesPerStrip = True
imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl,
after='width="[68]00"'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
def shouldSkipUrl(self, url, data):
return url in (
self.stripUrl % "2013/04/02/istruzioni-per-il-non-uso", # video
)
class ErrantStory(_BasicScraper): class ErrantStory(_BasicScraper):
url = 'http://www.errantstory.com/' url = 'http://www.errantstory.com/'
stripUrl = url + '%s' stripUrl = url + '%s'
@ -158,6 +163,15 @@ class ErrantStory(_BasicScraper):
help = 'Index format: yyyy-mm-dd/num' help = 'Index format: yyyy-mm-dd/num'
class Erstwhile(_ParserScraper):
url = 'http://www.erstwhiletales.com/'
stripUrl = url + '%s/'
css = True
imageSearch = 'div.comicpane a img'
prevSearch = 'a.navi-prev'
help = 'Index format: title-nn'
class EverybodyLovesEricRaymond(_BasicScraper): class EverybodyLovesEricRaymond(_BasicScraper):
url = 'http://geekz.co.uk/lovesraymond/' url = 'http://geekz.co.uk/lovesraymond/'
stripUrl = url + 'archive/%s' stripUrl = url + 'archive/%s'
@ -190,11 +204,13 @@ class EvilInc(_BasicScraper):
url = 'http://evil-inc.com/' url = 'http://evil-inc.com/'
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'monday-3' firstStripUrl = stripUrl % 'monday-3'
imageSearch = compile(tagre("div", "id", "comic") + imageSearch = compile(
r'\s*.*\s*' + #filter out the variant href tag tagre("div", "id", "comic") +
tagre("img", "src", r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)')) r'\s*.*\s*' + # filter out the variant href tag
tagre("img", "src",
r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("span", "class", "mininav-prev") + prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'([^"]+)')) tagre("a", "href", r'([^"]+)'))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -214,7 +230,8 @@ class ExploitationNow(_BasicScraper):
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '2000-07-07/9' firstStripUrl = stripUrl % '2000-07-07/9'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num' help = 'Index format: yyyy-mm-dd/num'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..util import tagre from ..util import tagre
@ -24,7 +26,8 @@ class FantasyRealms(_BasicScraper):
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE) imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE) prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter(url, starter = indirectStarter(
url,
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)) compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
@ -82,7 +85,7 @@ class Flemcomics(_BasicScraper):
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') +
tagre("img", "src", r'/images/previous_day\.jpg')) tagre("img", "src", r'/images/previous_day\.jpg'))
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -92,7 +95,8 @@ class Flipside(_BasicScraper):
stripUrl = url + '?i=%s' stripUrl = url + '?i=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?i=\d+)' % rurl,
after="prev"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
@ -112,7 +116,7 @@ class FonFlatter(_BasicScraper):
self.stripUrl % "2006/09/21/danke", self.stripUrl % "2006/09/21/danke",
self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen", self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen",
self.stripUrl % "2005/10/19/naq-never-asked-questions", self.stripUrl % "2005/10/19/naq-never-asked-questions",
) )
class ForLackOfABetterComic(_BasicScraper): class ForLackOfABetterComic(_BasicScraper):
@ -138,13 +142,6 @@ class FragileSpanish(_ParserScraper):
prevSearch = '//a[@class="comicnav" and contains(text(),"Anterior")]' prevSearch = '//a[@class="comicnav" and contains(text(),"Anterior")]'
lang = 'es' lang = 'es'
class Freefall(_BasicScraper):
url = 'http://freefall.purrsia.com/default.htm'
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
help = 'Index format: nnnn/nnnnn'
class FredoAndPidjin(_BasicScraper): class FredoAndPidjin(_BasicScraper):
url = 'http://www.pidjin.net/' url = 'http://www.pidjin.net/'
@ -157,10 +154,19 @@ class FredoAndPidjin(_BasicScraper):
) )
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>") prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
starter = indirectStarter(url, starter = indirectStarter(
url,
compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))) compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
class Freefall(_BasicScraper):
url = 'http://freefall.purrsia.com/default.htm'
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
imageSearch = compile(r'<img src="(/ff\d+/.+?.\w{3,4})"')
prevSearch = compile(r'<A HREF="(/ff\d+/.+?.htm)">Previous</A>')
help = 'Index format: nnnn/nnnnn'
class FullFrontalNerdity(_BasicScraper): class FullFrontalNerdity(_BasicScraper):
url = 'http://ffn.nodwick.com/' url = 'http://ffn.nodwick.com/'
rurl = escape(url) rurl = escape(url)

View file

@ -1,15 +1,21 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from ..scraper import _ParserScraper from ..scraper import _ParserScraper
class Footloose(_ParserScraper): class Footloose(_ParserScraper):
url = 'http://footloosecomic.com/footloose.php' url = 'http://footloosecomic.com/footloose.php'
imageSearch='//body/p[1]//img' imageSearch = '//body/p[1]//img'
prevSearch='//body/a[2]' prevSearch = '//body/a[2]'
class Cherry(Footloose): class Cherry(Footloose):
url = 'http://footloosecomic.com/cherry/index.php' url = 'http://footloosecomic.com/cherry/index.php'
class Desigaspring(Footloose): class Desigaspring(Footloose):
url = 'http://footloosecomic.com/dspring/index.php' url = 'http://footloosecomic.com/dspring/index.php'

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher # Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
@ -155,6 +156,16 @@ class GrrlPower(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class GUComics(_BasicScraper):
url = 'http://www.gucomics.com/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % '20000710'
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
tagre("img", "src", r'/images/nav/prev\.png'))
help = 'Index format: yyyymmdd'
class GunnerkriggCourt(_BasicScraper): class GunnerkriggCourt(_BasicScraper):
url = 'http://www.gunnerkrigg.com/' url = 'http://www.gunnerkrigg.com/'
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
@ -176,13 +187,3 @@ class Gunshow(_BasicScraper):
tagre("a", "href", r'([^"]+)') + tagre("a", "href", r'([^"]+)') +
tagre("img", "src", r'[^"]*menu/small/previous\.gif')) tagre("img", "src", r'[^"]*menu/small/previous\.gif'))
help = 'Index format: n' help = 'Index format: n'
class GUComics(_BasicScraper):
url = 'http://www.gucomics.com/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % '20000710'
imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\d+)') +
tagre("img", "src", r'/images/nav/prev\.png'))
help = 'Index format: yyyymmdd'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
@ -25,21 +27,19 @@ class MadamAndEve(_BasicScraper):
class Magellan(_ParserScraper): class Magellan(_ParserScraper):
description = u'A comic strip about Superheroes and Not-Superheroes'
url = 'http://magellanverse.com/' url = 'http://magellanverse.com/'
stripUrl = url + '%s/'
css = True css = True
imageSearch = '#comic-1 > a:first-child img' imageSearch = '#comic-1 > a:first-child img'
prevSearch = '.nav-previous > a' prevSearch = '.nav-previous > a'
help = 'Index format: stripname'
class MagickChicks(_BasicScraper): class MagickChicks(_BasicScraper):
url = 'http://www.magickchicks.com/' url = 'http://www.magickchicks.com/'
stripUrl = url + 'strips-mc/%s' stripUrl = url + 'strips-mc/%s'
firstStripUrl = stripUrl % 'tis_but_a_trifle' firstStripUrl = stripUrl % 'tis_but_a_trifle'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)', before="cn[id]prevt")) prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)',
before="cn[id]prevt"))
help = 'Index format: name' help = 'Index format: name'
@ -54,7 +54,6 @@ class ManlyGuysDoingManlyThings(_ParserScraper):
class MareInternum(_ParserScraper): class MareInternum(_ParserScraper):
description = u'Mare Internum is an online science fiction graphic novel about the isolated inhabitants of the planet Mars. '
url = 'http://marecomic.com/' url = 'http://marecomic.com/'
stripUrl = url + 'comics/ch%s' stripUrl = url + 'comics/ch%s'
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
@ -76,7 +75,8 @@ class MarriedToTheSea(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '022806' firstStripUrl = stripUrl % '022806'
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow")) imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
before="overflow"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday") prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
help = 'Index format: mmddyy' help = 'Index format: mmddyy'
@ -85,6 +85,7 @@ class MarriedToTheSea(_BasicScraper):
unused, date, filename = imageUrl.rsplit('/', 2) unused, date, filename = imageUrl.rsplit('/', 2)
return '%s-%s' % (date, filename) return '%s-%s' % (date, filename)
class MaxOveracts(_ParserScraper): class MaxOveracts(_ParserScraper):
url = 'http://occasionalcomics.com/' url = 'http://occasionalcomics.com/'
stripUrl = url + '%s/' stripUrl = url + '%s/'
@ -108,7 +109,8 @@ class MenageA3(_BasicScraper):
url = 'http://www.ma3comic.com/' url = 'http://www.ma3comic.com/'
stripUrl = url + 'strips-ma3/%s' stripUrl = url + 'strips-ma3/%s'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)', before="cn[id]prev")) prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)',
before="cn[id]prev"))
help = 'Index format: name' help = 'Index format: name'
@ -117,40 +119,43 @@ class Misfile(_BasicScraper):
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
firstStripUrl = stripUrl % '2004-02-22' firstStripUrl = stripUrl % '2004-02-22'
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'")) imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", before="Previous")) prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'",
before="Previous"))
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class Moonsticks(_ParserScraper):
url = "http://moonsticks.org/"
stripUrl = url
imageSearch = "//div[@class='entry']//img"
prevSearch = u"//a[text()='« Previous']"
help = 'Index format: stripname'
class MonsieurLeChien(_BasicScraper): class MonsieurLeChien(_BasicScraper):
url = 'http://www.monsieur-le-chien.fr/' url = 'http://www.monsieur-le-chien.fr/'
stripUrl = url + 'index.php?planche=%s' stripUrl = url + 'index.php?planche=%s'
firstStripUrl = stripUrl % '2' firstStripUrl = stripUrl % '2'
lang = 'fr' lang = 'fr'
imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "i/precedent.gif")) prevSearch = compile(tagre("a", "href", r'([^"]+)') +
tagre("img", "src", "i/precedent.gif"))
help = 'Index format: n' help = 'Index format: n'
class Moonsticks(_ParserScraper):
url = "http://moonsticks.org/"
imageSearch = "//div[@class='entry']//img"
prevSearch = u"//a[text()='« Previous']"
class MrLovenstein(_BasicScraper): class MrLovenstein(_BasicScraper):
url = 'http://www.mrlovenstein.com/' url = 'http://www.mrlovenstein.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s#comic' stripUrl = url + 'comic/%s#comic'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = ( imageSearch = (
#captures rollover comic # captures rollover comic
compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" + tagre("div", "style", r'display: none;') + "\s*.*\s*" + tagre("img", "src", r'(/images/comics/[^"]+)')), compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" +
#captures standard comic tagre("div", "style", r'display: none;') + "\s*.*\s*" +
compile(tagre("img", "src", r'(/images/comics/[^"]+)', before="comic_main_image")), tagre("img", "src", r'(/images/comics/[^"]+)')),
# captures standard comic
compile(tagre("img", "src", r'(/images/comics/[^"]+)',
before="comic_main_image")),
) )
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "/images/nav_left.png")) prevSearch = compile(tagre("a", "href", r'([^"]+)') +
tagre("img", "src", "/images/nav_left.png"))
textSearch = compile(r'<meta name="description" content="(.+?)" />') textSearch = compile(r'<meta name="description" content="(.+?)" />')
help = 'Index Format: n' help = 'Index Format: n'
@ -163,7 +168,8 @@ class MyCartoons(_BasicScraper):
compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)),
compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)),
) )
prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + "&laquo;") prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) +
"&laquo;")
help = 'Index format: number' help = 'Index format: number'
lang = 'de' lang = 'de'
@ -172,4 +178,3 @@ class MysteriesOfTheArcana(_ParserScraper):
url = 'http://mysteriesofthearcana.com/' url = 'http://mysteriesofthearcana.com/'
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="navprevious"]' prevSearch = '//a[@class="navprevious"]'
help = 'Index format: n (unpadded)'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
@ -37,7 +39,8 @@ class NatalieDee(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '022806' firstStripUrl = stripUrl % '022806'
imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow")) imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
before="overflow"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday") prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
help = 'Index format: mmddyy' help = 'Index format: mmddyy'
@ -47,6 +50,23 @@ class NatalieDee(_BasicScraper):
return '%s-%s' % (date, filename) return '%s-%s' % (date, filename)
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '7'
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NekoTheKitty(_ParserScraper):
url = 'http://www.nekothekitty.net/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % '936393/001-video-games'
imageSearch = '//a[@id="comic_image"]/img'
prevSearch = '//a[text()="<-"]'
class NeoEarth(_BasicScraper): class NeoEarth(_BasicScraper):
url = 'http://www.neo-earth.com/NE/' url = 'http://www.neo-earth.com/NE/'
stripUrl = url + 'index.php?date=%s' stripUrl = url + 'index.php?date=%s'
@ -72,23 +92,6 @@ class NewWorld(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripn' help = 'Index format: yyyy/mm/dd/stripn'
class NekkoAndJoruba(_BasicScraper):
url = 'http://www.nekkoandjoruba.com/'
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '7'
imageSearch = compile(r'<img src="(http://www\.nekkoandjoruba\.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
help = 'Index format: nnn'
class NekoTheKitty(_ParserScraper):
url = 'http://www.nekothekitty.net/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % '936393/001-video-games'
imageSearch = '//a[@id="comic_image"]/img'
prevSearch = '//a[text()="<-"]'
class NichtLustig(_BasicScraper): class NichtLustig(_BasicScraper):
url = 'http://www.nichtlustig.de/main.html' url = 'http://www.nichtlustig.de/main.html'
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html' stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
@ -96,13 +99,12 @@ class NichtLustig(_BasicScraper):
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)') imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)')) prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
starter = indirectStarter(url, starter = indirectStarter(
compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))) url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
class Nimona(_BasicScraper): class Nimona(_BasicScraper):
url = 'http://gingerhaze.com/nimona/' url = 'http://gingerhaze.com/nimona/'
rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % "comic/page-1" firstStripUrl = stripUrl % "comic/page-1"
imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)')) imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)'))
@ -111,20 +113,6 @@ class Nimona(_BasicScraper):
endOfLife = True endOfLife = True
class Nnewts(_BasicScraper):
url = 'http://nnewts.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'nnewts-page-1'
imageSearch = compile(tagre("img", "src", r'(%snewty/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s(?:nnewts-)?page-\d+/)' % rurl, after="navi-prev"))
help = 'Index format: page-number'
@classmethod
def getDisabledReasons(cls):
return {'cannotReadOnline': 'Comic is not available for reading online.'}
class NobodyScores(_BasicScraper): class NobodyScores(_BasicScraper):
url = 'http://nobodyscores.loosenutstudio.com/' url = 'http://nobodyscores.loosenutstudio.com/'
rurl = escape(url) rurl = escape(url)
@ -143,11 +131,14 @@ class NoNeedForBushido(_BasicScraper):
imageSearch = compile( imageSearch = compile(
tagre("a", "rel", "next") + tagre("a", "rel", "next") +
tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl,
after="attachment-full")) after="attachment-full"))
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="previous-webcomic")) prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
after="previous-webcomic"))
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter(url, starter = indirectStarter(
compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="last-webcomic"))) url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
after="last-webcomic")))
class NotInventedHere(_BasicScraper): class NotInventedHere(_BasicScraper):
url = 'http://notinventedhe.re/' url = 'http://notinventedhe.re/'
@ -158,6 +149,7 @@ class NotInventedHere(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous') prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous')
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
class Nukees(_BasicScraper): class Nukees(_BasicScraper):
url = 'http://www.nukees.com/' url = 'http://www.nukees.com/'
stripUrl = url + 'd/%s' stripUrl = url + 'd/%s'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
@ -47,9 +49,12 @@ class OhJoySexToy(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'introduction' firstStripUrl = stripUrl % 'introduction'
imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='navi navi-prev')) tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "alt", r'([^"]+)')) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
after='navi navi-prev'))
textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" +
tagre("img", "alt", r'([^"]+)'))
help = 'Index Format: name' help = 'Index Format: name'
adult = True adult = True
@ -75,6 +80,15 @@ class OmakeTheater(_ParserScraper):
help = 'Index format: number (unpadded)' help = 'Index format: number (unpadded)'
class OneQuestion(_BasicScraper):
url = 'http://onequestioncomic.com/'
stripUrl = url + 'comic.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
help = 'Index format: n (unpadded)'
class OnTheFastrack(_BasicScraper): class OnTheFastrack(_BasicScraper):
url = 'http://onthefastrack.com/' url = 'http://onthefastrack.com/'
stripUrl = url + 'comics/%s' stripUrl = url + 'comics/%s'
@ -82,7 +96,7 @@ class OnTheFastrack(_BasicScraper):
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"') imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url) prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
help = 'Index format: monthname-dd-yyyy' help = 'Index format: monthname-dd-yyyy'
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
name = pageUrl.rsplit('/', 3)[2] name = pageUrl.rsplit('/', 3)[2]
@ -94,21 +108,14 @@ class OnTheFastrack(_BasicScraper):
return "%s.gif" % name.title() return "%s.gif" % name.title()
class OneQuestion(_BasicScraper):
url = 'http://onequestioncomic.com/'
stripUrl = url + 'comic.php?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
help = 'Index format: n (unpadded)'
class Optipess(_BasicScraper): class Optipess(_BasicScraper):
url = 'http://www.optipess.com/' url = 'http://www.optipess.com/'
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/' firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url)) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev")) r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url)) textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -119,8 +126,9 @@ class OrnerBoy(_BasicScraper):
stripUrl = url + 'index.php?comicID=%s' stripUrl = url + 'index.php?comicID=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comics/\d+\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?comicID=\d+)' % rurl) + prevSearch = compile(tagre("a", "href",
tagre("img", "src", r'images/prev_a\.gif')) r'(%sindex\.php\?comicID=\d+)' % rurl) +
tagre("img", "src", r'images/prev_a\.gif'))
help = 'Index format: number' help = 'Index format: number'
@ -138,6 +146,6 @@ class OverCompensating(_BasicScraper):
stripUrl = url + 'oc/index.php?comic=%s' stripUrl = url + 'oc/index.php?comic=%s'
firstStripUrl = stripUrl % '0' firstStripUrl = stripUrl % '0'
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
r'(/oc/index\.php\?comic=\d+)', after="go back")) after="go back"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, queryNamer, indirectStarter from ..helpers import bounceStarter, queryNamer, indirectStarter
@ -10,10 +12,12 @@ from ..util import tagre
class ParadigmShift(_BasicScraper): class ParadigmShift(_BasicScraper):
url = 'http://www.paradigmshiftmanga.com/' url = 'http://www.paradigmshiftmanga.com/'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after="next-comic-link"))) starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)',
after="next-comic-link")))
stripUrl = url + 'ps/%s.html' stripUrl = url + 'ps/%s.html'
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)')) imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-comic-link")) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="previous-comic-link"))
help = 'Index format: custom' help = 'Index format: custom'
@ -22,9 +26,10 @@ class ParallelUniversum(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '001-der-comic-ist-tot' firstStripUrl = stripUrl % '001-der-comic-ist-tot'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src",
r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl) +
tagre("span", "class", "prev")) tagre("span", "class", "prev"))
help = 'Index format: number-stripname' help = 'Index format: number-stripname'
lang = 'de' lang = 'de'
@ -74,8 +79,10 @@ class PennyArcade(_BasicScraper):
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1998/11/18' firstStripUrl = stripUrl % '1998/11/18'
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnPrev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext")) before="btnPrev"))
nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
before="btnNext"))
help = 'Index format: yyyy/mm/dd/' help = 'Index format: yyyy/mm/dd/'
@classmethod @classmethod
@ -110,7 +117,8 @@ class PeppermintSaga(_BasicScraper):
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '3' firstStripUrl = stripUrl % '3'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="prev"))
help = 'Index format: number' help = 'Index format: number'
adult = True adult = True
@ -121,14 +129,16 @@ class PHDComics(_BasicScraper):
stripUrl = baseUrl + 'comics/archive.php?comicid=%s' stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd[^ ]+)', quote="")) imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd[^ ]+)', quote=""))
prevSearch = compile(tagre("a", "href", r'((?:comics/)?archive\.php\?comicid=\d+)', quote="") + prevSearch = compile(
tagre("a", "href", r'((?:comics/)?archive\.php\?comicid=\d+)',
quote="") +
tagre("img", "src", r'(?:comics/)?images/prev_button\.gif', quote="")) tagre("img", "src", r'(?:comics/)?images/prev_button\.gif', quote=""))
help = 'Index format: number' help = 'Index format: number'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return url in (
self.stripUrl % '1669', # video self.stripUrl % '1669', # video
) )
@ -138,20 +148,11 @@ class PicPakDog(_BasicScraper):
stripUrl = url + 'comic/%s/' stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'dogs-cant-spell' firstStripUrl = stripUrl % 'dogs-cant-spell'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="nav-prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
after="nav-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
class Pixel(_BasicScraper):
url = 'http://pixelcomic.net/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '000.shtml'
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl, before="prev"))
help = 'Index format: nnn'
class PiledHigherAndDeeper(_BasicScraper): class PiledHigherAndDeeper(_BasicScraper):
url = 'http://www.phdcomics.com/comics.php' url = 'http://www.phdcomics.com/comics.php'
starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')) starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif'))
@ -172,6 +173,17 @@ class Pimpette(_ParserScraper):
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class Pixel(_BasicScraper):
url = 'http://pixelcomic.net/'
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '000.shtml'
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl,
before="prev"))
help = 'Index format: nnn'
class PlanescapeSurvival(_BasicScraper): class PlanescapeSurvival(_BasicScraper):
url = 'http://planescapecomic.com/' url = 'http://planescapecomic.com/'
stripUrl = url + '%s.html' stripUrl = url + '%s.html'
@ -204,14 +216,16 @@ class PoorlyDrawnLines(_BasicScraper):
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % 'campus-characters/' firstStripUrl = stripUrl % 'campus-characters/'
imageSearch = compile(tagre("img", "src", r'(http://poorlydrawnlines\.com/wp-content/uploads/\d+/\d+/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://poorlydrawnlines\.com/wp-content/uploads/\d+/\d+/[^"]+)'))
prevSearch = compile(tagre("li", "class", r'previous') + tagre("a", "href", r'(%s[^"]+)' % rurl)) prevSearch = compile(tagre("li", "class", r'previous') +
tagre("a", "href", r'(%s[^"]+)' % rurl))
help = 'Index Format: name' help = 'Index Format: name'
class Precocious(_BasicScraper): class Precocious(_BasicScraper):
url = 'http://www.precociouscomic.com/' url = 'http://www.precociouscomic.com/'
starter = indirectStarter(url, starter = indirectStarter(
compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png")) url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
) )
stripUrl = url + 'archive/comic/%s' stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))')) imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
@ -234,7 +248,8 @@ class PunksAndNerds(_BasicScraper):
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '15' firstStripUrl = stripUrl % '15'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="navi-prev"))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -250,5 +265,6 @@ class PvPonline(_BasicScraper):
url = 'http://pvponline.com/comic' url = 'http://pvponline.com/comic'
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)')) imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="left divider")) prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)',
after="left divider"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'

View file

@ -1,9 +1,11 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE, sub from re import compile, escape, IGNORECASE, sub
from os.path import splitext, basename from os.path import splitext
from datetime import datetime from datetime import datetime
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter from ..helpers import indirectStarter, bounceStarter
@ -14,7 +16,7 @@ class SabrinaOnline(_BasicScraper):
url = 'http://sabrina-online.com/' url = 'http://sabrina-online.com/'
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)')) imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") + prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
tagre("img", "src", "b_back.gif")) tagre("img", "src", "b_back.gif"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
adult = True adult = True
multipleImagesPerStrip = True multipleImagesPerStrip = True
@ -32,9 +34,10 @@ class SabrinaOnline(_BasicScraper):
class SafelyEndangered(_BasicScraper): class SafelyEndangered(_BasicScraper):
url = 'http://www.safelyendangered.com/' url = 'http://www.safelyendangered.com/'
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'ignored' firstStripUrl = stripUrl % 'ignored'
imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*')) imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads')) textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads'))
help = 'Index format: yyyy/mm/stripname' help = 'Index format: yyyy/mm/stripname'
@ -84,9 +87,12 @@ class ScenesFromAMultiverse(_BasicScraper):
firstStripUrl = stripUrl % '2010/06/14/parenthood' firstStripUrl = stripUrl % '2010/06/14/parenthood'
imageSearch = ( imageSearch = (
compile(tagre("div", "id", "comic") + r"\s*" + compile(tagre("div", "id", "comic") + r"\s*" +
tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), tagre("img", "src",
compile(tagre("div", "id", "comic") + r"\s*" + tagre("a", "href", r'[^"]*') + r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), compile(tagre("div", "id", "comic") + r"\s*" +
tagre("a", "href", r'[^"]*') +
tagre("img", "src",
r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')),
) )
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -98,7 +104,8 @@ class SchlockMercenary(_BasicScraper):
firstStripUrl = stripUrl % '2000-06-12' firstStripUrl = stripUrl % '2000-06-12'
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous")) prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'",
after="nav-previous"))
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
@ -137,8 +144,8 @@ class SequentialArt(_BasicScraper):
stripUrl = url + '?s=%s' stripUrl = url + '?s=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip")) imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') +
+ tagre("img", "src", "Nav_BackOne\.gif")) tagre("img", "src", "Nav_BackOne\.gif"))
help = 'Index format: name' help = 'Index format: name'
@ -165,7 +172,8 @@ class Sheldon(_BasicScraper):
stripUrl = url + 'archive/%s.html' stripUrl = url + 'archive/%s.html'
firstStripUrl = stripUrl % '011130' firstStripUrl = stripUrl % '011130'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, after="sidenav-prev")) prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
after="sidenav-prev"))
help = 'Index format: yymmdd' help = 'Index format: yymmdd'
@ -194,7 +202,8 @@ class Shivae(_BasicScraper):
stripUrl = url + 'blog/%s/' stripUrl = url + 'blog/%s/'
firstStripUrl = stripUrl % '2007/09/21/09212007' firstStripUrl = stripUrl % '2007/09/21/09212007'
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -210,9 +219,10 @@ class Shortpacked(_ParserScraper):
class ShotgunShuffle(_BasicScraper): class ShotgunShuffle(_BasicScraper):
url = 'http://shotgunshuffle.com/' url = 'http://shotgunshuffle.com/'
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'pilot/' firstStripUrl = stripUrl % 'pilot/'
imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev")) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="navi navi-prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -220,28 +230,19 @@ class SinFest(_BasicScraper):
name = 'KeenSpot/SinFest' name = 'KeenSpot/SinFest'
url = 'http://www.sinfest.net/' url = 'http://www.sinfest.net/'
stripUrl = url + 'view.php?date=%s' stripUrl = url + 'view.php?date=%s'
imageSearch = compile(tagre("img","src", r'(btphp/comics/.+)', after="alt")) imageSearch = compile(tagre("img", "src", r'(btphp/comics/.+)',
prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' + tagre("img", "src", r'\.\./images/prev\.gif')) after="alt"))
prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' +
tagre("img", "src", r'\.\./images/prev\.gif'))
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
# XXX disallowed by robots.txt
class _Sketchesnatched(_BasicScraper):
url = 'http://sketchesnatched.blogspot.com/'
stripUrl = url + 'search?updated-max=%s%%2B01:00&max-results=1'
firstStripUrl = stripUrl % '2011-01-27T08:32:00'
imageSearch = compile(tagre("meta", "content", r"(http://\d+\.bp\.blogspot\.com/[^']+)",
after=r'image_url', quote="'"))
prevSearch = compile(tagre("a", "href", r"(http://sketchesnatched\.blogspot\.[a-z]+/search[^']+)",
before=r"blog-pager-older-link", quote="'"))
help = 'Index format: yyyy-mm-ddThh:mm:ss'
class SkinDeep(_BasicScraper): class SkinDeep(_BasicScraper):
url = 'http://www.skindeepcomic.com/' url = 'http://www.skindeepcomic.com/'
stripUrl = url + 'archive/%s/' stripUrl = url + 'archive/%s/'
imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"') imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-webcomic-link")) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="previous-webcomic-link"))
help = 'Index format: custom' help = 'Index format: custom'
@ -261,7 +262,8 @@ class SleeplessDomain(_ParserScraper):
start = '' start = ''
tsmatch = compile(r'/(\d+)-').search(imageUrl) tsmatch = compile(r'/(\d+)-').search(imageUrl)
if tsmatch: if tsmatch:
start = datetime.utcfromtimestamp(int(tsmatch.group(1))).strftime("%Y-%m-%d") start = datetime.utcfromtimestamp(
int(tsmatch.group(1))).strftime("%Y-%m-%d")
else: else:
# There were only chapter 1, page 4 and 5 not matching when writing # There were only chapter 1, page 4 and 5 not matching when writing
# this... # this...
@ -315,10 +317,11 @@ class SnowFlakes(_BasicScraper):
endOfLife = True endOfLife = True
imageSearch = ( imageSearch = (
compile(tagre("img", "src", r'(comics/[^"]+)')), compile(tagre("img", "src", r'(comics/[^"]+)')),
compile(tagre("img", "src", r'(http://www.snowflakescomic.com/comics/[^"]+)')), compile(tagre("img", "src",
r'(http://www.snowflakescomic.com/comics/[^"]+)')),
) )
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") + prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
tagre("img", "src", r'images/nav_prior-ON\.gif')) tagre("img", "src", r'images/nav_prior-ON\.gif'))
help = 'Index format: number' help = 'Index format: number'
@classmethod @classmethod
@ -338,12 +341,12 @@ class SnowFlakes(_BasicScraper):
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip pages without images.""" """Skip pages without images."""
return url in ( return url in (
self.stripUrl % ('279', '2'), # no comic self.stripUrl % ('279', '2'), # no comic
self.stripUrl % ('278', '2'), # no comic self.stripUrl % ('278', '2'), # no comic
self.stripUrl % ('277', '2'), # no comic self.stripUrl % ('277', '2'), # no comic
self.stripUrl % ('276', '2'), # no comic self.stripUrl % ('276', '2'), # no comic
self.stripUrl % ('275', '2'), # no comic self.stripUrl % ('275', '2'), # no comic
self.stripUrl % ('214', '2'), # no comic self.stripUrl % ('214', '2'), # no comic
) )
@ -354,10 +357,11 @@ class SnowFlame(_BasicScraper):
firstStripUrl = stripUrl % ('01', '01') firstStripUrl = stripUrl % ('01', '01')
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame ")) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame "))
prevSearch = compile(tagre("span", "class", "mininav-prev") + prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)) tagre("a", "href",
starter = bounceStarter(url, r'(%s\?comic=snowflame[^"]+)' % rurl))
compile(tagre("span", "class", "mininav-next") + starter = bounceStarter(
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))) url, compile(tagre("span", "class", "mininav-next") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
help = 'Index format: chapter-page' help = 'Index format: chapter-page'
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):
@ -378,18 +382,33 @@ class SodiumEyes(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2007/11/08/damning-evidence' firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote="")) imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl,
quote=""))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class SomethingPositive(_BasicScraper):
url = 'http://www.somethingpositive.net/'
stripUrl = url + 'sp%s.shtml'
imageSearch = (
compile(tagre("img", "src", r'(sp\d+\.png)')),
compile(tagre("img", "src", r'(twither\.gif)')),
)
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "(?:" +
tagre("img", "src", r'images/previous\.gif') +
"|Previous)")
help = 'Index format: mmddyyyy'
class Sorcery101(_BasicScraper): class Sorcery101(_BasicScraper):
baseUrl = 'http://www.sorcery101.net/' baseUrl = 'http://www.sorcery101.net/'
url = baseUrl + 'sorcery-101/' url = baseUrl + 'sorcery-101/'
rurl = escape(baseUrl) rurl = escape(baseUrl)
stripUrl = url + '%s/' stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-")) prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl,
after="previous-"))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -399,7 +418,8 @@ class SpaceTrawler(_BasicScraper):
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4' firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -408,7 +428,8 @@ class Spamusement(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s' stripUrl = url + 'index.php/comics/view/%s'
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE) imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, IGNORECASE) prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
IGNORECASE)
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter(url, prevSearch) starter = indirectStarter(url, prevSearch)
@ -419,7 +440,8 @@ class SpareParts(_BasicScraper):
stripUrl = baseUrl + 'comics/index.php?date=%s' stripUrl = baseUrl + 'comics/index.php?date=%s'
firstStripUrl = stripUrl % '20031022' firstStripUrl = stripUrl % '20031022'
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic") prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)',
quote="'") + "Previous Comic")
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@ -433,6 +455,29 @@ class StandStillStaySilent(_ParserScraper):
help = 'Index Format: number' help = 'Index Format: number'
class StarCrossdDestiny(_BasicScraper):
baseUrl = 'http://www.starcrossd.net/'
rurl = escape(baseUrl)
url = baseUrl + 'comic.html'
stripUrl = baseUrl + 'archives/%s.html'
firstStripUrl = stripUrl % '00000001'
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
help = 'Index format: nnnnnnnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but
# that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl)
elif not imageUrl.find('strips') == -1:
imageUrl = imageUrl.replace('strips/', '')
directory, filename = imageUrl.split('/')[-2:]
filename, extension = splitext(filename)
return directory + '-' + filename
class StationV3(_ParserScraper): class StationV3(_ParserScraper):
url = 'http://www.stationv3.com/' url = 'http://www.stationv3.com/'
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'
@ -447,62 +492,18 @@ class StickyDillyBuns(_BasicScraper):
stripUrl = url + 'strips-sdb/%s' stripUrl = url + 'strips-sdb/%s'
firstStripUrl = stripUrl % 'awesome_leading_man' firstStripUrl = stripUrl % 'awesome_leading_man'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', before="cn[id]prev")) prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)',
before="cn[id]prev"))
help = 'Index format: name' help = 'Index format: name'
class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: number'
class StuffNoOneToldMe(_BasicScraper):
url = 'http://www.snotm.com/'
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter(url,
compile(tagre("a", "href", olderHref, quote="'")))
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:</a>|<br />)"),
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + r"(?:(?:&nbsp;)?</a>|<span |<br />)"),
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
)
prevSearch = compile(tagre("a", "href", olderHref, quote="'", before="older-link"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL to construct meaningful image name."""
parts, year, month, stripname = pageUrl.rsplit('/', 3)
stripname = stripname.rsplit('.', 1)[0]
parts, imagename = imageUrl.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '2012/08/self-rant', # no comic
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
self.stripUrl % '2010/11/ear-infection', # no comic
)
class StrawberryDeathCake(_BasicScraper): class StrawberryDeathCake(_BasicScraper):
url = 'http://strawberrydeathcake.com/' url = 'http://strawberrydeathcake.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'archive/%s/' imageSearch = compile(tagre("img", "src",
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl)) r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl,
help = 'Index format: stripname' after="previous"))
class StrongFemaleProtagonist(_ParserScraper): class StrongFemaleProtagonist(_ParserScraper):
@ -524,63 +525,72 @@ class StrongFemaleProtagonist(_ParserScraper):
self.stripUrl % 'issue-5/hiatus-2', self.stripUrl % 'issue-5/hiatus-2',
) )
class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="navi-prev"))
help = 'Index format: number'
class StuffNoOneToldMe(_BasicScraper):
url = 'http://www.snotm.com/'
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter(
url, compile(tagre("a", "href", olderHref, quote="'")))
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
r"(?:</a>|<br />)"),
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') +
r"(?:(?:&nbsp;)?</a>|<span |<br />)"),
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
)
prevSearch = compile(tagre("a", "href", olderHref, quote="'",
before="older-link"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL to construct meaningful image name."""
parts, year, month, stripname = pageUrl.rsplit('/', 3)
stripname = stripname.rsplit('.', 1)[0]
parts, imagename = imageUrl.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
return url in (
self.stripUrl % '2012/08/self-rant', # no comic
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
self.stripUrl % '2010/11/ear-infection', # no comic
)
class SuburbanTribe(_BasicScraper): class SuburbanTribe(_BasicScraper):
url = 'http://www.pixelwhip.com/' url = 'http://www.pixelwhip.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="prev"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
class SomethingPositive(_BasicScraper):
url = 'http://www.somethingpositive.net/'
stripUrl = url + 'sp%s.shtml'
imageSearch = (
compile(tagre("img", "src", r'(sp\d+\.png)')),
compile(tagre("img", "src", r'(twither\.gif)')),
)
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') +
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
help = 'Index format: mmddyyyy'
class StarCrossdDestiny(_BasicScraper):
baseUrl = 'http://www.starcrossd.net/'
rurl = escape(baseUrl)
url = baseUrl + 'comic.html'
stripUrl = baseUrl + 'archives/%s.html'
firstStripUrl = stripUrl % '00000001'
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
help = 'Index format: nnnnnnnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
elif not imageUrl.find('strips') == -1:
imageUrl = imageUrl.replace('strips/','')
directory, filename = imageUrl.split('/')[-2:]
filename, extension = splitext(filename)
return directory + '-' + filename
# XXX disallowed by robots.txt
class _StrangeCandy(_BasicScraper):
url = 'http://www.strangecandy.net/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
help = 'Index format: yyyyddmm'
class SupernormalStep(_BasicScraper): class SupernormalStep(_BasicScraper):
url = 'http://supernormalstep.com/' url = 'http://supernormalstep.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + '?p=%s' stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '8' firstStripUrl = stripUrl % '8'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl,
after="prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter from ..helpers import indirectStarter
@ -23,7 +25,8 @@ class TheDevilsPanties(_BasicScraper):
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '300' firstStripUrl = stripUrl % '300'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.thedevilspanties\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://cdn\.thedevilspanties\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archives/\d+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(/archives/\d+)',
after="Previous"))
help = 'Index format: number' help = 'Index format: number'
@ -42,16 +45,20 @@ class TheLandscaper(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/comics/comic/comic_page/[^"]+)')) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)')+'&lsaquo; Previous') r'(/comics/comic/comic_page/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)') +
'&lsaquo; Previous')
help = 'Index format: name' help = 'Index format: name'
class TheNoob(_BasicScraper): class TheNoob(_BasicScraper):
url = 'http://www.thenoobcomic.com/index.php' url = 'http://www.thenoobcomic.com/index.php'
stripUrl = url + '?pos=%s' stripUrl = url + '?pos=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button")) prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)',
before="comic_nav_previous_button"))
help = 'Index format: nnnn' help = 'Index format: nnnn'
@ -70,6 +77,16 @@ class TheOrderOfTheStick(_BasicScraper):
return pageUrl.rsplit('/', 1)[-1][:-5] return pageUrl.rsplit('/', 1)[-1][:-5]
class TheOuterQuarter(_BasicScraper):
url = 'http://theouterquarter.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'oq-the-first-take/4'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn'
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
baseUrl = 'http://plif.courageunfettered.com/' baseUrl = 'http://plif.courageunfettered.com/'
url = baseUrl + 'archive/arch2002.htm' url = baseUrl + 'archive/arch2002.htm'
@ -81,6 +98,40 @@ class TheParkingLotIsFull(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
class TheThinHLine(_BasicScraper):
url = 'http://thinhline.tumblr.com/'
rurl = escape(url)
stripUrl = url + 'post/%s'
firstStripUrl = stripUrl % '3517345105'
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>')
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
adult = True
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
def getComicStrip(self, url, data):
"""The comic strip image is in a separate page."""
pageUrl = self.fetchUrl(url, data, self.indirectImageSearch)
pageData = self.getPage(pageUrl)
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL sequence which is apparently increasing."""
num = pageUrl.split('/')[-1]
ext = imageUrl.rsplit('.', 1)[1]
return "thethinhline-%s.%s" % (num, ext)
class TheWhiteboard(_BasicScraper):
url = 'http://www.the-whiteboard.com/'
stripUrl = url + 'auto%s.html'
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
prevSearch = compile(r'&nbsp<a href="(.+?)">previous</a>', IGNORECASE)
help = 'Index format: twb or wb + n wg. twb1000'
class TheWotch(_BasicScraper): class TheWotch(_BasicScraper):
url = 'http://www.thewotch.com/' url = 'http://www.thewotch.com/'
stripUrl = url + '?date=%s' stripUrl = url + '?date=%s'
@ -101,6 +152,16 @@ class ThisIsIndexed(_BasicScraper):
help = 'Index format: number' help = 'Index format: number'
class ThreePanelSoul(_BasicScraper):
url = 'http://threepanelsoul.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/05/11/a-test-comic'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class ThunderAndLightning(_BasicScraper): class ThunderAndLightning(_BasicScraper):
url = 'http://www.talcomic.com/wp/' url = 'http://www.talcomic.com/wp/'
rurl = escape(url) rurl = escape(url)
@ -137,68 +198,6 @@ class ToonHole(_BasicScraper):
return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",) return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
class TwoLumps(_BasicScraper):
url = 'http://www.twolumps.net/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev"))
help = 'Index format: yyyymmdd'
class TheWhiteboard(_BasicScraper):
url = 'http://www.the-whiteboard.com/'
stripUrl = url + 'auto%s.html'
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE)
prevSearch = compile(r'&nbsp<a href="(.+?)">previous</a>', IGNORECASE)
help = 'Index format: twb or wb + n wg. twb1000'
class TheOuterQuarter(_BasicScraper):
url = 'http://theouterquarter.com/'
rurl = escape(url)
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'oq-the-first-take/4'
imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
help = 'Index format: nnn'
class TheThinHLine(_BasicScraper):
url = 'http://thinhline.tumblr.com/'
rurl = escape(url)
stripUrl = url + 'post/%s'
firstStripUrl = stripUrl % '3517345105'
imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>')
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
adult = True
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
def getComicStrip(self, url, data):
"""The comic strip image is in a separate page."""
pageUrl = self.fetchUrl(url, data, self.indirectImageSearch)
pageData = self.getPage(pageUrl)
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL sequence which is apparently increasing."""
num = pageUrl.split('/')[-1]
ext = imageUrl.rsplit('.', 1)[1]
return "thethinhline-%s.%s" % (num, ext)
class ThreePanelSoul(_BasicScraper):
url = 'http://threepanelsoul.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/05/11/a-test-comic'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class TracyAndTristan(_BasicScraper): class TracyAndTristan(_BasicScraper):
url = 'http://tandt.thecomicseries.com/' url = 'http://tandt.thecomicseries.com/'
rurl = escape(url) rurl = escape(url)
@ -214,6 +213,15 @@ class TwoGuysAndGuy(_BasicScraper):
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '4' firstStripUrl = stripUrl % '4'
imageSearch = compile(tagre('img', 'src', r'(%scomics/\d{4}-\d{2}-\d{2}[^"]*)' % rurl)) imageSearch = compile(tagre('img', 'src', r'(%scomics/\d{4}-\d{2}-\d{2}[^"]*)' % rurl))
prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl, after='title="Previous"')) prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl,
after='title="Previous"'))
help = 'Index format: number' help = 'Index format: number'
adult = True adult = True
class TwoLumps(_BasicScraper):
url = 'http://www.twolumps.net/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev"))
help = 'Index format: yyyymmdd'

View file

@ -1,12 +1,15 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape from re import compile, escape
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import indirectStarter
from ..util import getQueryParams, tagre from ..util import tagre
class Underling(_BasicScraper): class Underling(_BasicScraper):
url = 'http://underlingcomic.com/' url = 'http://underlingcomic.com/'
@ -14,7 +17,8 @@ class Underling(_BasicScraper):
rurl = escape(url) rurl = escape(url)
firstStripUrl = stripUrl + 'page-one/' firstStripUrl = stripUrl + 'page-one/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after = r'class="[^"]*navi-prev')) prevSearch = compile(tagre("a", "href", r'([^"]+)',
after=r'class="[^"]*navi-prev'))
help = 'Index format: nnn' help = 'Index format: nnn'
@ -45,26 +49,12 @@ class Unsounded(_BasicScraper):
rurl = escape(url) rurl = escape(url)
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)')) imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back')) prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
starter = indirectStarter(url, starter = indirectStarter(
compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) + url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
tagre("img", "src", r"%simages/newpages\.png" % rurl))) tagre("img", "src", r"%simages/newpages\.png" % rurl)))
help = 'Index format: chapter-number' help = 'Index format: chapter-number'
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):
"""Get comic strip URL from index.""" """Get comic strip URL from index."""
chapter, num = index.split('-') chapter, num = index.split('-')
return self.stripUrl % (chapter, chapter, num) return self.stripUrl % (chapter, chapter, num)
# XXX disallowed by robots.txt
class _UserFriendly(_BasicScraper):
url = 'http://ars.userfriendly.org/cartoons/?mode=classic'
stripUrl = url + '&id=%s'
starter = bounceStarter(url, compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="">'))
imageSearch = compile(r'<img border="0" src="\s*(http://www.userfriendly.org/cartoons/archives/\d{2}\w{3}/.+?\.gif)"')
prevSearch = compile(r'<area shape="rect" href="(/cartoons/\?id=\d{8}&mode=classic)" coords="[\d, ]+?" alt="Previous Cartoon">')
help = 'Index format: yyyymmdd'
@classmethod
def namer(cls, imageUrl, pageUrl):
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -23,28 +25,29 @@ class VGCats(_BasicScraper):
firstStripUrl = stripUrl % '0' firstStripUrl = stripUrl % '0'
imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)')) imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') +
tagre("img", "src", r"back\.gif")) tagre("img", "src", r"back\.gif"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class VGCatsSuper(VGCats):
name = 'VGCats/Super'
url = 'http://www.vgcats.com/super/'
stripUrl = url + '?strip_id=%s'
class VGCatsAdventure(VGCats): class VGCatsAdventure(VGCats):
name = 'VGCats/Adventure' name = 'VGCats/Adventure'
url = 'http://www.vgcats.com/ffxi/' url = 'http://www.vgcats.com/ffxi/'
stripUrl = url + '?strip_id=%s' stripUrl = url + '?strip_id=%s'
class VGCatsSuper(VGCats):
name = 'VGCats/Super'
url = 'http://www.vgcats.com/super/'
stripUrl = url + '?strip_id=%s'
class VictimsOfTheSystem(_BasicScraper): class VictimsOfTheSystem(_BasicScraper):
url = 'http://www.votscomic.com/' url = 'http://www.votscomic.com/'
stripUrl = url + '?id=%s.jpg' stripUrl = url + '?id=%s.jpg'
firstStripUrl = stripUrl % '070103-002452' firstStripUrl = stripUrl % '070103-002452'
imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') + "Previous") prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') +
"Previous")
help = 'Index format: nnn-nnn' help = 'Index format: nnn-nnn'
@ -52,7 +55,8 @@ class ViiviJaWagner(_BasicScraper):
url = 'http://www.hs.fi/viivijawagner/' url = 'http://www.hs.fi/viivijawagner/'
stripUrl = None stripUrl = None
imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm")) prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)',
before="prev-cm"))
help = 'Index format: none' help = 'Index format: none'
lang = 'fi' lang = 'fi'

View file

@ -1,7 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
@ -24,7 +26,8 @@ class WastedTalent(_BasicScraper):
stripUrl = url + 'comic/%s' stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'anime-crack' firstStripUrl = stripUrl % 'anime-crack'
imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev")) prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)',
after="comic_prev"))
help = 'Index format: stripname' help = 'Index format: stripname'
@ -50,7 +53,8 @@ class WebDesignerCOTW(_BasicScraper):
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')), compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
) )
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'")) prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
before='prev', quote="'"))
help = 'Index format: yyyy/mm/stripname' help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
@ -78,8 +82,10 @@ class Weregeek(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/11/27/' firstStripUrl = stripUrl % '2006/11/27/'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src",
prevSearch = compile(tagre("a", "href", r'((%s)?(/)?\d+/\d+/\d+/)'% rurl)+'\s*'+ tagre('img', 'src', '[^"]*previous_day.gif')) r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'((%s)?/?\d+/\d+/\d+/)' % rurl) +
'\s*' + tagre('img', 'src', '[^"]*previous_day.gif'))
help = 'Index format: yyyy/mm/dd' help = 'Index format: yyyy/mm/dd'
@ -108,7 +114,8 @@ class Whomp(_BasicScraper):
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/06/14/06142010' firstStripUrl = stripUrl % '2010/06/14/06142010'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
@ -118,7 +125,8 @@ class WhyTheLongFace(_BasicScraper):
url = baseUrl + 'wtlf200709.html' url = baseUrl + 'wtlf200709.html'
stripUrl = baseUrl + 'wtlf%s.html' stripUrl = baseUrl + 'wtlf%s.html'
firstStripUrl = stripUrl % '200306' firstStripUrl = stripUrl % '200306'
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl, IGNORECASE) imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl,
IGNORECASE)
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ') prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm' help = 'Index format: yyyymm'
@ -129,7 +137,8 @@ class Wigu(_BasicScraper):
stripUrl = url + 'oc/index.php?comic=%s' stripUrl = url + 'oc/index.php?comic=%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)', after="go back")) prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
after="go back"))
help = 'Index format: n' help = 'Index format: n'
@ -138,9 +147,11 @@ class Wonderella(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays' firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') + r"\s*" + imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') +
tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) r"\s*" +
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
@ -187,6 +198,13 @@ class WorldOfMrToast(_BasicScraper):
return None return None
class WorldOfWarcraftEh(_BasicScraper):
url = 'http://woweh.com/'
stripUrl = None
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')
class WormWorldSaga(_BasicScraper): class WormWorldSaga(_BasicScraper):
url = 'http://www.wormworldsaga.com/' url = 'http://www.wormworldsaga.com/'
stripUrl = url + 'chapters/%s/index.php' stripUrl = url + 'chapters/%s/index.php'
@ -214,15 +232,17 @@ class WormWorldSaga(_BasicScraper):
return None return None
class WormWorldSagaFrench(WormWorldSaga):
lang = 'fr'
class WormWorldSagaGerman(WormWorldSaga): class WormWorldSagaGerman(WormWorldSaga):
lang = 'de' lang = 'de'
class WormWorldSagaSpanish(WormWorldSaga): class WormWorldSagaSpanish(WormWorldSaga):
lang = 'es' lang = 'es'
class WormWorldSagaFrench(WormWorldSaga):
lang = 'fr'
class WotNow(_BasicScraper): class WotNow(_BasicScraper):
url = 'http://shadowburn.binmode.com/wotnow/' url = 'http://shadowburn.binmode.com/wotnow/'
@ -231,11 +251,3 @@ class WotNow(_BasicScraper):
imageSearch = compile(r'<IMG SRC="(comics/.+?)"') imageSearch = compile(r'<IMG SRC="(comics/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ') prevSearch = compile(r'<A HREF="(.+?)"><IMG SRC="images/b_prev.gif" ')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
# XXX disallowed by robots.txt
class _WorldOfWarcraftEh(_BasicScraper):
url = 'http://woweh.com/'
stripUrl = None
imageSearch = compile(r'http://woweh.com/(comics/.+?)"')
prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev')