dosage/dosagelib/plugins/s.py

612 lines
26 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2014-01-05 15:50:57 +00:00
# Copyright (C) 2012-2014 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-11-07 20:22:49 +00:00
from re import compile, escape, IGNORECASE, sub
2012-06-20 19:58:13 +00:00
from os.path import splitext
2012-10-11 10:03:12 +00:00
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter
2013-04-25 19:14:32 +00:00
from ..util import tagre, getPageContent
class SabrinaOnline(_BasicScraper):
description = u'Skunks, computers and porn'
2013-04-26 04:53:05 +00:00
url = 'http://sabrina-online.com/'
2013-04-25 19:14:32 +00:00
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
tagre("img", "src", "b_back.gif"))
help = 'Index format: n (unpadded)'
adult = True
multipleImagesPerStrip = True
@classmethod
def starter(cls):
"""Pick last one in a list of archive pages."""
2013-04-26 04:53:05 +00:00
archive = cls.url + 'archive.html'
data = getPageContent(archive, cls.session)[0]
2013-04-25 19:14:32 +00:00
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
archivepages = search.findall(data)
2013-04-26 04:53:05 +00:00
return cls.url + archivepages[-1]
2012-06-20 19:58:13 +00:00
class SailorsunOrg(_BasicScraper):
url = 'http://sailorsun.org/'
rurl = escape(url)
stripUrl = url + '?p=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '21'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
2012-06-20 19:58:13 +00:00
help = 'Index format: n (unpadded)'
class SamAndFuzzy(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u"Serial about a cab driver and his bear-like friend by Sam Logan. Offers a reader's guide, forum, and frequently asked questions."
url = 'http://www.samandfuzzy.com/'
2012-11-13 18:10:19 +00:00
stripUrl = 'http://samandfuzzy.com/%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '1'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'(/comics/.+?)" alt')
prevSearch = compile(r'"><a href="(.+?)"><img src="imgint/nav_prev.gif"')
help = 'Index format: nnnn'
2013-02-13 16:53:11 +00:00
class SandraAndWoo(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'Sandra and Woo: a webcomic about friendship, life and the art of (not) eating squirrels, featuring the girl Sandra and her pet raccoon Woo.'
2013-02-13 16:53:11 +00:00
url = 'http://www.sandraandwoo.com/'
rurl = escape(url)
2013-02-13 16:53:11 +00:00
stripUrl = url + '%s/'
2013-03-19 19:54:16 +00:00
firstStripUrl = stripUrl % '2000/01/01/welcome-to-sandra-and-woo'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
2013-02-13 16:53:11 +00:00
help = 'Index format: yyyy/mm/dd/number-stripname'
2013-03-19 19:54:16 +00:00
class SandraAndWooGerman(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u'Sandra und Woo: ein Webcomic \xfcber Freundschaft, das Leben und die Kunst (keine) Eichh\xf6rnchen zu essen; mit dem M\xe4dchen Sandra und ihrem Waschb\xe4ren Woo in den Hauptrollen'
2013-03-19 19:54:16 +00:00
url = 'http://www.sandraandwoo.com/woode/'
rurl = escape(url)
2013-03-19 19:54:16 +00:00
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2008/10/19/ein-ausgefuchster-waschbar'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="prev"))
2013-03-19 19:54:16 +00:00
help = 'Index format: yyyy/mm/dd/number-stripname'
lang = 'de'
2013-04-20 16:51:06 +00:00
class SandraOnTheRocks(_BasicScraper):
url = 'http://www.sandraontherocks.com/'
stripUrl = url + 'strips-sotr/%s'
2013-04-20 16:51:06 +00:00
firstStripUrl = stripUrl % 'start_by_running'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sotr/[^"]+)', before="cn[id]prev"))
2013-04-20 16:51:06 +00:00
help = 'Index format: name'
2013-03-19 19:54:16 +00:00
class ScandinaviaAndTheWorld(_BasicScraper):
description = u'Scandinavia and the World'
url = 'http://satwcomic.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
imageSearch = compile(tagre("img", "src", r'(%sart/[^"/]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"/]+)' % rurl)+"\s*"+tagre('span', 'class', 'spritePrevious'))
help = 'Index format: stripname'
2012-06-20 19:58:13 +00:00
class ScaryGoRound(_BasicScraper):
url = 'http://www.scarygoround.com/'
stripUrl = url + '?date=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '20090918'
2012-11-21 20:57:26 +00:00
imageSearch = compile(tagre("img", "src", r'(strips/\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + "Previous")
2012-06-20 19:58:13 +00:00
help = 'Index format: n (unpadded)'
2013-02-06 21:08:36 +00:00
class ScenesFromAMultiverse(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'SFAM Guest Month wraps up today with a contribution by Meredith Gran of Octopus Pie that is sure to tickle and delight even the grumpiest of codgers.'
2013-02-06 21:08:36 +00:00
url = 'http://amultiverse.com/'
rurl = escape(url)
2013-02-06 21:08:36 +00:00
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2010/06/14/parenthood'
2013-11-12 17:33:14 +00:00
imageSearch = (
compile(tagre("div", "id", "comic") + r"\s*" +
tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)),
compile(tagre("div", "id", "comic") + r"\s*" + tagre("a", "href", r'[^"]*') +
tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)),
)
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev"))
2013-02-06 21:08:36 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
class SchlockMercenary(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'2 days ago ... Travel the galaxy. Meet new and fascinating life-forms.'
url = 'http://www.schlockmercenary.com/'
stripUrl = url + '%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2000-06-12'
2012-11-21 20:57:26 +00:00
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous"))
2012-11-21 20:57:26 +00:00
help = 'Index format: yyyy-mm-dd'
2012-06-20 19:58:13 +00:00
class SchoolBites(_BasicScraper):
url = 'http://schoolbites.net/'
stripUrl = url + 'd/%s.html'
2012-11-21 20:57:26 +00:00
imageSearch = compile(tagre("img", "src", r'(http://cdn\.schoolbites\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://schoolbites\.net/d/\d+\.html)', after="prev"))
2012-06-20 19:58:13 +00:00
help = 'Index format: yyyymmdd'
2013-03-25 18:48:32 +00:00
class Schuelert(_BasicScraper):
url = 'http://www.schuelert.de/'
rurl = escape(url)
2013-04-10 21:57:09 +00:00
stripUrl = url + 'index.php?paged=%s'
firstStripUrl = stripUrl % '5'
imageSearch = compile(tagre("img", "src", r"(%swp-content/[^']+)" % rurl, quote="'"))
prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?paged=\d+)' % rurl) + "&laquo;")
2013-03-25 18:48:32 +00:00
multipleImagesPerStrip = True
help = 'Index format: none'
lang = 'de'
2013-04-09 17:38:16 +00:00
class Science(_BasicScraper):
url = 'http://sci-ence.org/'
rurl = escape(url)
2013-04-09 17:38:16 +00:00
stripUrl = url + '%s/'
firstStripUrl = stripUrl % 'periodic-table-element-ass'
prevSearch = compile(tagre("a", "href", r'(%s[^"]+/)' % rurl, after="prev"))
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
2013-04-09 17:38:16 +00:00
help = 'Index format: stripname'
description = u'A comic about science, technology, skepticism, geekery, video games, atheism, and more.'
2013-01-29 20:23:32 +00:00
class SequentialArt(_BasicScraper):
url = 'http://www.collectedcurios.com/sequentialart.php'
stripUrl = url + '?s=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '1'
2013-01-29 20:23:32 +00:00
imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)')
+ tagre("img", "src", "Nav_BackOne\.gif"))
help = 'Index format: name'
2013-03-06 19:21:10 +00:00
class SexyLosers(_BasicScraper):
adult = True
url = 'http://www.sexylosers.com/'
stripUrl = url + '%s.html'
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
@classmethod
def namer(cls, imageUrl, pageUrl):
index = pageUrl.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title
2013-04-28 17:58:38 +00:00
# XXX site has been hacked
class _ShadowGirls(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u"It's like H.P. Lovecraft meets the Gilmore Girls!"
2013-03-03 16:46:57 +00:00
url = 'http://www.shadowgirlscomic.com/'
stripUrl = url + 'comics/%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % 'book-1/chapter-1-broken-dreams/welcome'
2013-03-03 16:46:57 +00:00
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='navi-prev'))
help = 'Index format: custom'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]*/comics/[^"]+)')))
class Sheldon(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'The story of a software company tycoon billionaire ten-year-old, his grampa, his duck, his pug and a lizard.'
url = 'http://www.sheldoncomics.com/'
2013-07-09 20:21:17 +00:00
rurl = escape(url)
stripUrl = url + 'archive/%s.html'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '011130'
2013-07-09 20:21:17 +00:00
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, after="sidenav-prev"))
2012-11-21 20:57:26 +00:00
help = 'Index format: yymmdd'
2013-07-04 18:20:26 +00:00
class ShermansLagoon(_BasicScraper):
description = u"Sherman's Lagoon by Jim Toomey"
url = 'http://shermanslagoon.com/'
rurl = escape(url)
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % '/december-29-2003/'
imageSearch = compile(tagre("img", "src", r'(http://safr\.kingfeatures\.com/idn/etv/zone/xml/content.php\?file=[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%scomics/[^"]+/)' % rurl) + '&laquo; previous')
starter = bounceStarter(url,
compile(tagre("a", "href", r'(%scomics/[^"]+/)' % rurl, after="next")))
@classmethod
def namer(cls, imageUrl, pageUrl):
name = pageUrl.split('/')[-2]
# name is monthname-day-year
month, day, year = name.split('-')
return "%s-%s-%s" % (year, month, day)
2012-12-08 20:30:51 +00:00
class Shivae(_BasicScraper):
url = 'http://shivae.net/'
rurl = escape(url)
stripUrl = url + 'blog/%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2007/09/21/09212007'
2013-11-12 17:33:14 +00:00
imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="navi-prev"))
2012-12-08 20:30:51 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
2012-12-12 16:41:29 +00:00
# XXX disallowed by robots.txt
class _Shortpacked(_BasicScraper):
url = 'http://www.shortpacked.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/comic/[^"]+)' % rurl, after="prev"))
2012-12-02 17:35:06 +00:00
help = 'Index format: yyyy/comic/book-nn/mm-name1/name2'
2012-06-20 19:58:13 +00:00
class SinFest(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'Strip dealing with contemporary issues and religion. Created by Tatsuya Ishida.'
2012-06-20 19:58:13 +00:00
name = 'KeenSpot/SinFest'
url = 'http://www.sinfest.net/'
stripUrl = url + 'archive_page.php?comicID=%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src=".+?(/comikaze/comics/.+?)"')
prevSearch = compile(r'(/archive_page.php\?comicID=.+?)".+?prev_a')
help = 'Index format: n (unpadded)'
2013-04-05 05:20:50 +00:00
# XXX disallowed by robots.txt
2013-04-05 16:47:51 +00:00
class _Sketchesnatched(_BasicScraper):
2013-04-05 05:20:50 +00:00
url = 'http://sketchesnatched.blogspot.com/'
stripUrl = url + 'search?updated-max=%s%%2B01:00&max-results=1'
2013-04-05 05:31:22 +00:00
firstStripUrl = stripUrl % '2011-01-27T08:32:00'
2013-04-05 05:20:50 +00:00
imageSearch = compile(tagre("meta", "content", r"(http://\d+\.bp\.blogspot\.com/[^']+)",
after=r'image_url', quote="'"))
prevSearch = compile(tagre("a", "href", r"(http://sketchesnatched\.blogspot\.[a-z]+/search[^']+)",
before=r"blog-pager-older-link", quote="'"))
help = 'Index format: yyyy-mm-ddThh:mm:ss'
description = u"Artwork by Massimo Carnevale"
class SkinDeep(_BasicScraper):
url = 'http://www.skindeepcomic.com/'
stripUrl = url + 'archive/%s/'
imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-webcomic-link"))
help = 'Index format: custom'
2012-06-20 19:58:13 +00:00
class SlightlyDamned(_BasicScraper):
url = 'http://www.sdamned.com/'
rurl = escape(url)
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2004/03/03142004'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
2012-11-21 20:57:26 +00:00
help = 'Index format: yyyy/mm/number'
2012-06-20 19:58:13 +00:00
class SluggyFreelance(_BasicScraper):
url = 'http://www.sluggy.com/'
stripUrl = url + 'comics/archives/daily/%s'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(/images/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?><span class="ui-icon ui-icon-seek-prev">')
multipleImagesPerStrip = True
2012-06-20 19:58:13 +00:00
help = 'Index format: yymmdd'
2013-03-06 19:21:10 +00:00
class SMBC(_BasicScraper):
2013-04-25 18:32:21 +00:00
description = u"Saturday Morning Breakfast Cereal"
2013-03-06 19:21:10 +00:00
url = 'http://www.smbc-comics.com/'
2013-04-25 18:32:21 +00:00
rurl = escape(url)
2013-07-18 18:39:53 +00:00
stripUrl = url + '?id=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '1'
2013-04-25 18:32:21 +00:00
imageSearch = compile(tagre("img", "src", r"(%scomics/\d{8}(?:\w2?|-\d)?\.\w{3})\s*" % rurl, quote="'"))
2013-07-18 18:39:53 +00:00
prevSearch = compile(tagre("a", "href", r'([^"]+)#comic', after="backRollover"))
2013-03-06 19:21:10 +00:00
help = 'Index format: nnnn'
def shouldSkipUrl(self, url, data):
2013-04-25 18:32:21 +00:00
"""Skip promo or missing update pages."""
return url in (
self.stripUrl % '2865',
self.stripUrl % '2653',
self.stripUrl % '2424',
self.stripUrl % '2226',
self.stripUrl % '2069',
self.stripUrl % '1895',
self.stripUrl % '1896',
self.stripUrl % '1589',
)
2013-03-06 19:21:10 +00:00
2013-04-03 18:30:16 +00:00
class SnowFlakes(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'Snowflakes - A comic by James Ashby, Chris Jones and Zach Weiner.'
2013-04-03 18:30:16 +00:00
url = 'http://www.snowflakescomic.com/'
stripUrl = url + '?id=%s&sl=%s'
firstStripUrl = stripUrl % ('103', '1')
2013-04-29 18:31:07 +00:00
endOfLife = True
2013-04-03 18:30:16 +00:00
imageSearch = (
compile(tagre("img", "src", r'(comics/[^"]+)')),
compile(tagre("img", "src", r'(http://www.snowflakescomic.com/comics/[^"]+)')),
)
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
tagre("img", "src", r'images/nav_prior-ON\.gif'))
help = 'Index format: number'
@classmethod
def starter(cls):
return cls.stripUrl % ('530', '5')
def getStripIndexUrl(self, index):
return self.stripUrl % (index, index[0])
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use strip index number for image name."""
index = int(compile(r'id=(\d+)').search(pageUrl).group(1))
ext = imageUrl.rsplit('.', 1)[1]
return "SnowFlakes-%d.%s" % (index, ext)
def shouldSkipUrl(self, url, data):
2013-04-03 18:30:16 +00:00
"""Skip pages without images."""
return url in (
self.stripUrl % ('279', '2'), # no comic
self.stripUrl % ('278', '2'), # no comic
self.stripUrl % ('277', '2'), # no comic
self.stripUrl % ('276', '2'), # no comic
self.stripUrl % ('275', '2'), # no comic
self.stripUrl % ('214', '2'), # no comic
)
class SnowFlame(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u'The fan-comic series featuring "The Man Powered by Cocaine"'
url = 'http://www.snowflamecomic.com/'
rurl = escape(url)
stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01')
2013-11-12 17:33:14 +00:00
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame "))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))
starter = bounceStarter(url,
compile(tagre("span", "class", "mininav-next") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
help = 'Index format: chapter-page'
def getStripIndexUrl(self, index):
return self.stripUrl % index.split('-')
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, filename = imageUrl.rsplit('/', 1)
ro = compile(r'snowflame-([^-]+)-([^-]+)')
mo = ro.search(pageUrl)
chapter = mo.group(1)
page = mo.group(2)
return "%s-%s-%s" % (chapter, page, filename)
2012-06-20 19:58:13 +00:00
class SodiumEyes(_BasicScraper):
url = 'http://sodiumeyes.com/'
rurl = escape(url)
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
2012-11-21 20:57:26 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
2012-06-20 19:58:13 +00:00
2012-12-08 20:30:51 +00:00
class Sorcery101(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'Welcome to the site of Kel McDonald, professional comic illustrator and writer.'
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.sorcery101.net/'
url = baseUrl + 'sorcery-101/'
rurl = escape(baseUrl)
2013-02-27 18:40:54 +00:00
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-"))
2012-12-08 20:30:51 +00:00
help = 'Index format: stripname'
2013-02-06 21:08:36 +00:00
class SpaceTrawler(_BasicScraper):
url = 'http://spacetrawler.com/'
rurl = escape(url)
2013-02-06 21:08:36 +00:00
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
2013-02-06 21:08:36 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
2013-04-10 16:36:33 +00:00
class Spamusement(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u'Spamusement! Poorly-drawn cartoons inspired by actual spam subject lines!'
2013-04-10 16:36:33 +00:00
url = 'http://spamusement.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s'
imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl, IGNORECASE)
help = 'Index format: n (unpadded)'
starter = indirectStarter(url, prevSearch)
2012-06-20 19:58:13 +00:00
class SpareParts(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u'Spare Parts by Terrence and Isabel Marks!'
2012-11-21 20:57:26 +00:00
baseUrl = 'http://www.sparepartscomics.com/'
url = baseUrl + 'comics/?date=20080328'
2012-12-04 06:02:40 +00:00
stripUrl = baseUrl + 'comics/index.php?date=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '20031022'
2012-12-04 06:02:40 +00:00
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic")
help = 'Index format: yyyymmdd'
2012-06-20 19:58:13 +00:00
2013-01-29 20:52:26 +00:00
class Spinnerette(_BasicScraper):
url = 'http://www.spinnyverse.com/'
rurl = escape(url)
2013-11-12 17:33:14 +00:00
stripUrl = url + 'index.php?id=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2010/02/09/02092010'
2013-11-12 17:33:14 +00:00
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)', after="comic"))
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=[^"]+)', after="prev"))
2013-01-29 20:52:26 +00:00
help = 'Index format: number'
2012-12-08 20:30:51 +00:00
class SPQRBlues(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u"You can skip the next comic if you'd like to pass over the rest of this (very mildly) mature theme. I've tried to clarify the legalities as pointed out in the comments."
url = 'http://spqrblues.com/IV/'
rurl = escape(url)
stripUrl = url + '?p=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '1467'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
2012-12-08 20:30:51 +00:00
help = 'Index format: number'
class StandStillStaySilent(_BasicScraper):
url = 'http://www.sssscomic.com/comic.php'
rurl = escape(url)
stripUrl = url + '?page=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(comicpages/[^"]+)', before="comicnormal"))
prevSearch = compile(tagre("a", "href", r"([^']+)", quote="'") + tagre("div", "id", r'navprev'))
help = 'Index Format: number'
description = u'"Stand Still. Stay Silent" is a post-apocalyptic adventure story with a rather light tone and careless pace.'
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _StationV3(_BasicScraper):
url = 'http://www.stationv3.com/'
rurl = escape(url)
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sd/\d+\.html)' % rurl) +
2012-12-08 20:30:51 +00:00
tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif'))
help = 'Index format: yyyymmdd'
class StickyDillyBuns(_BasicScraper):
url = 'http://www.stickydillybuns.com/'
stripUrl = url + 'strips-sdb/%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % 'awesome_leading_man'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', before="cn[id]prev"))
help = 'Index format: name'
2012-06-20 19:58:13 +00:00
class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
2012-11-21 20:57:26 +00:00
help = 'Index format: number'
2012-06-20 19:58:13 +00:00
2013-04-03 18:30:29 +00:00
class StuffNoOneToldMe(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u"Everyday's life advices in the shape of witty and humorous cartoons."
2013-04-03 18:30:29 +00:00
url = 'http://www.snotm.com/'
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter(url,
compile(tagre("a", "href", olderHref, quote="'")))
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:</a>|<br />)"),
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + r"(?:(?:&nbsp;)?</a>|<span |<br />)"),
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r"</a>"),
)
prevSearch = compile(tagre("a", "href", olderHref, quote="'", before="older-link"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL to construct meaningful image name."""
parts, year, month, stripname = pageUrl.rsplit('/', 3)
stripname = stripname.rsplit('.', 1)[0]
parts, imagename = imageUrl.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url, data):
2013-04-03 18:30:29 +00:00
"""Skip pages without images."""
return url in (
self.stripUrl % '2012/08/self-rant', # no comic
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
self.stripUrl % '2010/11/ear-infection', # no comic
)
2012-06-20 19:58:13 +00:00
class StrawberryDeathCake(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u"Update2 I'm alive and still working on the comic, but progress has been slow. I'm inching my way through sketches. Update-A little break from the comic."
url = 'http://strawberrydeathcake.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
2012-11-21 20:57:26 +00:00
help = 'Index format: stripname'
2012-06-20 19:58:13 +00:00
class SuburbanTribe(_BasicScraper):
url = 'http://www.pixelwhip.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
2012-06-20 19:58:13 +00:00
help = 'Index format: nnnn'
class SomethingPositive(_BasicScraper):
url = 'http://www.somethingpositive.net/'
stripUrl = url + 'sp%s.shtml'
2013-04-11 16:27:43 +00:00
imageSearch = (
compile(tagre("img", "src", r'(sp\d+\.png)')),
compile(tagre("img", "src", r'(twither\.gif)')),
)
2012-12-12 16:41:29 +00:00
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') +
2012-12-04 06:02:40 +00:00
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
2012-06-20 19:58:13 +00:00
help = 'Index format: mmddyyyy'
class StarCrossdDestiny(_BasicScraper):
2013-04-13 06:00:03 +00:00
description = u'Furturistic fantasy. A group of outcasts fight to survive in a world that shuns them as freaks.'
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.starcrossd.net/'
rurl = escape(baseUrl)
url = baseUrl + 'comic.html'
stripUrl = baseUrl + 'archives/%s.html'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '00000001'
2013-05-25 21:24:33 +00:00
imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
2013-04-10 21:57:09 +00:00
prevSearch = compile(r'<a href="(%s(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
2012-06-20 19:58:13 +00:00
help = 'Index format: nnnnnnnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
elif not imageUrl.find('strips') == -1:
imageUrl = imageUrl.replace('strips/','')
directory, filename = imageUrl.split('/')[-2:]
filename, extension = splitext(filename)
return directory + '-' + filename
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _StrangeCandy(_BasicScraper):
url = 'http://www.strangecandy.net/'
stripUrl = url + 'd/%s.html'
2012-11-21 20:57:26 +00:00
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
2012-06-20 19:58:13 +00:00
help = 'Index format: yyyyddmm'
2013-02-13 16:53:11 +00:00
class SupernormalStep(_BasicScraper):
2013-04-14 07:02:14 +00:00
description = u'Supernormal Step - Magic, Face Punching, and a Robot or Two'
2013-02-13 16:53:11 +00:00
url = 'http://supernormalstep.com/'
rurl = escape(url)
2013-02-13 16:53:11 +00:00
stripUrl = url + '?p=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '8'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
2013-02-13 16:53:11 +00:00
help = 'Index format: number'