# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam
from re import compile, escape, MULTILINE, IGNORECASE, sub
from os.path import splitext
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, bounceStarter
from ..util import tagre, getPageContent
class SabrinaOnline(_BasicScraper):
description = u'Skunks, computers and porn'
url = 'http://sabrina-online.com/'
imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)'))
prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") +
tagre("img", "src", "b_back.gif"))
help = 'Index format: n (unpadded)'
adult = True
multipleImagesPerStrip = True
@classmethod
def starter(cls):
"""Pick last one in a list of archive pages."""
archive = cls.url + 'archive.html'
data = getPageContent(archive, cls.session)[0]
search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)"))
archivepages = search.findall(data)
return cls.url + archivepages[-1]
class SailorsunOrg(_BasicScraper):
url = 'http://sailorsun.org/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '21'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
rurl = escape(url)
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: n (unpadded)'
class SamAndFuzzy(_BasicScraper):
description = u"Serial about a cab driver and his bear-like friend by Sam Logan. Offers a reader's guide, forum, and frequently asked questions."
url = 'http://www.samandfuzzy.com/'
stripUrl = 'http://samandfuzzy.com/%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(r'(/comics/.+?)" alt')
prevSearch = compile(r'"><<', IGNORECASE)
help = 'Index format: nnn'
starter = indirectStarter(url,
compile(r'SEXY LOSERS Latest SL Comic \(#\d+\)', IGNORECASE))
@classmethod
def namer(cls, imageUrl, pageUrl):
index = pageUrl.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title
# XXX site has been hacked
class _ShadowGirls(_BasicScraper):
description = u"It's like H.P. Lovecraft meets the Gilmore Girls!"
url = 'http://www.shadowgirlscomic.com/'
stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % 'book-1/chapter-1-broken-dreams/welcome'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='navi-prev'))
help = 'Index format: custom'
starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]*/comics/[^"]+)')))
class Sheldon(_BasicScraper):
description = u'The story of a software company tycoon billionaire ten-year-old, his grampa, his duck, his pug and a lizard.'
url = 'http://www.sheldoncomics.com/'
stripUrl = url + 'archive/%s.html'
firstStripUrl = stripUrl % '011130'
imageSearch = compile(tagre("img", "src", r'(/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+\.html)', after="sidenav-prev"))
help = 'Index format: yymmdd'
class Shivae(_BasicScraper):
url = 'http://shivae.net/'
rurl = escape(url)
stripUrl = url + 'blog/%s/'
firstStripUrl = stripUrl % '2007/09/21/09212007'
imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname'
# XXX disallowed by robots.txt
class _Shortpacked(_BasicScraper):
url = 'http://www.shortpacked.com/'
rurl = escape(url)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/comic/[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/comic/book-nn/mm-name1/name2'
class SinFest(_BasicScraper):
description = u'Strip dealing with contemporary issues and religion. Created by Tatsuya Ishida.'
name = 'KeenSpot/SinFest'
url = 'http://www.sinfest.net/'
stripUrl = url + 'archive_page.php?comicID=%s'
imageSearch = compile(r']+?>')
help = 'Index format: yymmdd'
class SMBC(_BasicScraper):
description = u"Saturday Morning Breakfast Cereal"
url = 'http://www.smbc-comics.com/'
rurl = escape(url)
stripUrl = url + 'index.php?db=comics&id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r"(%scomics/\d{8}(?:\w2?|-\d)?\.\w{3})\s*" % rurl, quote="'"))
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
help = 'Index format: nnnn'
def shouldSkipUrl(self, url):
"""Skip promo or missing update pages."""
return url in (
self.stripUrl % '2865',
self.stripUrl % '2653',
self.stripUrl % '2424',
self.stripUrl % '2226',
self.stripUrl % '2069',
self.stripUrl % '1895',
self.stripUrl % '1896',
self.stripUrl % '1589',
)
class SnowFlakes(_BasicScraper):
description = u'Snowflakes - A comic by James Ashby, Chris Jones and Zach Weiner.'
url = 'http://www.snowflakescomic.com/'
stripUrl = url + '?id=%s&sl=%s'
firstStripUrl = stripUrl % ('103', '1')
imageSearch = (
compile(tagre("img", "src", r'(comics/[^"]+)')),
compile(tagre("img", "src", r'(http://www.snowflakescomic.com/comics/[^"]+)')),
)
prevSearch = compile(tagre("a", "href", r'(/\?id=\d+\&sl=\d)', quote="") +
tagre("img", "src", r'images/nav_prior-ON\.gif'))
help = 'Index format: number'
@classmethod
def starter(cls):
return cls.stripUrl % ('530', '5')
def getStripIndexUrl(self, index):
return self.stripUrl % (index, index[0])
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use strip index number for image name."""
index = int(compile(r'id=(\d+)').search(pageUrl).group(1))
ext = imageUrl.rsplit('.', 1)[1]
return "SnowFlakes-%d.%s" % (index, ext)
def shouldSkipUrl(self, url):
"""Skip pages without images."""
return url in (
self.stripUrl % ('279', '2'), # no comic
self.stripUrl % ('278', '2'), # no comic
self.stripUrl % ('277', '2'), # no comic
self.stripUrl % ('276', '2'), # no comic
self.stripUrl % ('275', '2'), # no comic
self.stripUrl % ('214', '2'), # no comic
)
class SnowFlame(_BasicScraper):
description = u'The fan-comic series featuring "The Man Powered by Cocaine"'
url = 'http://www.snowflamecomic.com/'
rurl = escape(url)
stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01')
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, after="Snow[Ff]lame the fan made"))
prevSearch = compile(tagre("span", "class", "mininav-prev") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl))
starter = bounceStarter(url,
compile(tagre("span", "class", "mininav-next") +
tagre("a", "href", r'(%s\?comic=snowflame[^"]+)' % rurl)))
help = 'Index format: chapter-page'
def getStripIndexUrl(self, index):
return self.stripUrl % index.split('-')
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, filename = imageUrl.rsplit('/', 1)
ro = compile(r'snowflame-([^-]+)-([^-]+)')
mo = ro.search(pageUrl)
chapter = mo.group(1)
page = mo.group(2)
return "%s-%s-%s" % (chapter, page, filename)
class SodiumEyes(_BasicScraper):
url = 'http://sodiumeyes.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2007/11/08/damning-evidence'
imageSearch = compile(tagre("img", "src", r'(%scomic/[^ ]+)' % rurl, quote=""))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Sorcery101(_BasicScraper):
description = u'Welcome to the site of Kel McDonald, professional comic illustrator and writer.'
baseUrl = 'http://www.sorcery101.net/'
url = baseUrl + 'sorcery-101/'
rurl = escape(baseUrl)
stripUrl = url + '%s/'
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%ssorcery-101/[^"]+)' % rurl, after="previous-"))
help = 'Index format: stripname'
class SpaceTrawler(_BasicScraper):
url = 'http://spacetrawler.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/01/01/spacetrawler-4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class Spamusement(_BasicScraper):
description = u'Spamusement! Poorly-drawn cartoons inspired by actual spam subject lines!'
url = 'http://spamusement.com/'
rurl = escape(url)
stripUrl = url + 'index.php/comics/view/%s'
imageSearch = compile(r'' % rurl, IGNORECASE)
help = 'Index format: n (unpadded)'
starter = indirectStarter(url, prevSearch)
class SpareParts(_BasicScraper):
description = u'Spare Parts by Terrence and Isabel Marks!'
baseUrl = 'http://www.sparepartscomics.com/'
url = baseUrl + 'comics/?date=20080328'
stripUrl = baseUrl + 'comics/index.php?date=%s'
firstStripUrl = stripUrl % '20031022'
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic")
help = 'Index format: yyyymmdd'
class Spinnerette(_BasicScraper):
url = 'http://www.spinnyverse.com/'
rurl = escape(url)
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2010/02/09/02092010'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="Previous Comic"))
help = 'Index format: number'
class SPQRBlues(_BasicScraper):
description = u"You can skip the next comic if you'd like to pass over the rest of this (very mildly) mature theme. I've tried to clarify the legalities as pointed out in the comments."
url = 'http://spqrblues.com/IV/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '1467'
imageSearch = compile(tagre("img", "src", r'(%scomics/\d+\.png)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'
# XXX disallowed by robots.txt
class _StationV3(_BasicScraper):
url = 'http://www.stationv3.com/'
rurl = escape(url)
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sd/\d+\.html)' % rurl) +
tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif'))
help = 'Index format: yyyymmdd'
class StickyDillyBuns(_BasicScraper):
url = 'http://www.stickydillybuns.com/'
stripUrl = url + 'strips-sdb/%s'
firstStripUrl = stripUrl % 'awesome_leading_man'
imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', before="cn[id]prev"))
help = 'Index format: name'
class Stubble(_BasicScraper):
url = 'http://stubblecomics.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '4'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev"))
help = 'Index format: number'
class StuffNoOneToldMe(_BasicScraper):
description = u"Everyday's life advices in the shape of witty and humorous cartoons."
url = 'http://www.snotm.com/'
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
starter = indirectStarter(url,
compile(tagre("a", "href", olderHref, quote="'")))
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:|
)"),
compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + r"(?:(?: )?|)"),
compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r""),
)
prevSearch = compile(tagre("a", "href", olderHref, quote="'", before="older-link"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname'
@classmethod
def namer(cls, imageUrl, pageUrl):
"""Use page URL to construct meaningful image name."""
parts, year, month, stripname = pageUrl.rsplit('/', 3)
stripname = stripname.rsplit('.', 1)[0]
parts, imagename = imageUrl.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url):
"""Skip pages without images."""
return url in (
self.stripUrl % '2012/08/self-rant', # no comic
self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video
self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video
self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic
self.stripUrl % '2010/11/ear-infection', # no comic
)
class StrawberryDeathCake(_BasicScraper):
description = u"Update2 I'm alive and still working on the comic, but progress has been slow. I'm inching my way through sketches. Update-A little break from the comic."
url = 'http://strawberrydeathcake.com/'
rurl = escape(url)
stripUrl = url + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous"))
help = 'Index format: stripname'
class SuburbanTribe(_BasicScraper):
url = 'http://www.pixelwhip.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: nnnn'
class SomethingPositive(_BasicScraper):
url = 'http://www.somethingpositive.net/'
stripUrl = url + 'sp%s.shtml'
imageSearch = (
compile(tagre("img", "src", r'(sp\d+\.png)')),
compile(tagre("img", "src", r'(twither\.gif)')),
)
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') +
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
help = 'Index format: mmddyyyy'
class StarCrossdDestiny(_BasicScraper):
description = u'Furturistic fantasy. A group of outcasts fight to survive in a world that shuns them as freaks.'
baseUrl = 'http://www.starcrossd.net/'
rurl = escape(baseUrl)
url = baseUrl + 'comic.html'
stripUrl = baseUrl + 'archives/%s.html'
firstStripUrl = stripUrl % '00000001'
imageSearch = compile(tagre("img", "src", r'(%s(?:ch1|strips|book2)/[^"]+)' % rurl))
prevSearch = compile(r']*"[^"]*"[^>]*>prev' % rurl, IGNORECASE)
help = 'Index format: nnnnnnnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl)
elif not imageUrl.find('strips') == -1:
imageUrl = imageUrl.replace('strips/','')
directory, filename = imageUrl.split('/')[-2:]
filename, extension = splitext(filename)
return directory + '-' + filename
# XXX disallowed by robots.txt
class _StrangeCandy(_BasicScraper):
url = 'http://www.strangecandy.net/'
stripUrl = url + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
help = 'Index format: yyyyddmm'
class SupernormalStep(_BasicScraper):
description = u'Supernormal Step - Magic, Face Punching, and a Robot or Two'
url = 'http://supernormalstep.com/'
rurl = escape(url)
stripUrl = url + '?p=%s'
firstStripUrl = stripUrl % '8'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev"))
help = 'Index format: number'