2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2014-01-05 15:50:57 +00:00
# Copyright (C) 2012-2014 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-11-07 20:22:49 +00:00
from re import compile , escape , IGNORECASE , sub
2012-06-20 19:58:13 +00:00
from os . path import splitext
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
2013-03-06 19:00:30 +00:00
from . . helpers import indirectStarter , bounceStarter
2013-04-25 19:14:32 +00:00
from . . util import tagre , getPageContent
class SabrinaOnline ( _BasicScraper ) :
description = u ' Skunks, computers and porn '
2013-04-26 04:53:05 +00:00
url = ' http://sabrina-online.com/ '
2013-04-25 19:14:32 +00:00
imageSearch = compile ( tagre ( " a " , " href " , r ' (strips/[^ " ]*) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r " ( \ d \ d \ d \ d- \ d \ d.html) " ) +
tagre ( " img " , " src " , " b_back.gif " ) )
help = ' Index format: n (unpadded) '
adult = True
multipleImagesPerStrip = True
@classmethod
def starter ( cls ) :
""" Pick last one in a list of archive pages. """
2013-04-26 04:53:05 +00:00
archive = cls . url + ' archive.html '
data = getPageContent ( archive , cls . session ) [ 0 ]
2013-04-25 19:14:32 +00:00
search = compile ( tagre ( " a " , " href " , r " ( \ d \ d \ d \ d- \ d \ d.html) " ) )
archivepages = search . findall ( data )
2013-04-26 04:53:05 +00:00
return cls . url + archivepages [ - 1 ]
2012-06-20 19:58:13 +00:00
class SailorsunOrg ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://sailorsun.org/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 21 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: n (unpadded) '
class SamAndFuzzy ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " Serial about a cab driver and his bear-like friend by Sam Logan. Offers a reader ' s guide, forum, and frequently asked questions. "
2013-02-04 20:00:26 +00:00
url = ' http://www.samandfuzzy.com/ '
2012-11-13 18:10:19 +00:00
stripUrl = ' http://samandfuzzy.com/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' (/comics/.+?) " alt ' )
prevSearch = compile ( r ' " ><a href= " (.+?) " ><img src= " imgint/nav_prev.gif " ' )
help = ' Index format: nnnn '
2013-02-13 16:53:11 +00:00
class SandraAndWoo ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Sandra and Woo: a webcomic about friendship, life and the art of (not) eating squirrels, featuring the girl Sandra and her pet raccoon Woo. '
2013-02-13 16:53:11 +00:00
url = ' http://www.sandraandwoo.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-13 16:53:11 +00:00
stripUrl = url + ' %s / '
2013-03-19 19:54:16 +00:00
firstStripUrl = stripUrl % ' 2000/01/01/welcome-to-sandra-and-woo '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+-[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+/) ' % rurl , after = " prev " ) )
2013-02-13 16:53:11 +00:00
help = ' Index format: yyyy/mm/dd/number-stripname '
2013-03-19 19:54:16 +00:00
class SandraAndWooGerman ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Sandra und Woo: ein Webcomic \xfc ber Freundschaft, das Leben und die Kunst (keine) Eichh \xf6 rnchen zu essen; mit dem M \xe4 dchen Sandra und ihrem Waschb \xe4 ren Woo in den Hauptrollen '
2013-03-19 19:54:16 +00:00
url = ' http://www.sandraandwoo.com/woode/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-19 19:54:16 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2008/10/19/ein-ausgefuchster-waschbar '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+-[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+/) ' % rurl , after = " prev " ) )
2013-03-19 19:54:16 +00:00
help = ' Index format: yyyy/mm/dd/number-stripname '
lang = ' de '
2013-04-20 16:51:06 +00:00
class SandraOnTheRocks ( _BasicScraper ) :
url = ' http://www.sandraontherocks.com/ '
2013-04-20 17:59:01 +00:00
stripUrl = url + ' strips-sotr/ %s '
2013-04-20 16:51:06 +00:00
firstStripUrl = stripUrl % ' start_by_running '
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]*/comics/[^ " ]+) ' ) )
2013-04-20 17:59:01 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]*/strips-sotr/[^ " ]+) ' , before = " cn[id]prev " ) )
2013-04-20 16:51:06 +00:00
help = ' Index format: name '
2013-03-19 19:54:16 +00:00
2013-12-10 18:50:21 +00:00
class ScandinaviaAndTheWorld ( _BasicScraper ) :
description = u ' Scandinavia and the World '
url = ' http://satwcomic.com/ '
rurl = escape ( url )
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' sweden-denmark-and-norway '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s art/[^ " /]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " /]+) ' % rurl ) + " \ s* " + tagre ( ' span ' , ' class ' , ' spritePrevious ' ) )
help = ' Index format: stripname '
2012-06-20 19:58:13 +00:00
class ScaryGoRound ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.scarygoround.com/ '
stripUrl = url + ' ?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20090918 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (strips/ \ d+ \ .png) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?date= \ d+) ' ) + " Previous " )
2012-06-20 19:58:13 +00:00
help = ' Index format: n (unpadded) '
2013-02-06 21:08:36 +00:00
class ScenesFromAMultiverse ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' SFAM Guest Month wraps up today with a contribution by Meredith Gran of Octopus Pie that is sure to tickle and delight even the grumpiest of codgers. '
2013-02-06 21:08:36 +00:00
url = ' http://amultiverse.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2010/06/14/parenthood '
2013-11-12 17:33:14 +00:00
imageSearch = (
compile ( tagre ( " div " , " id " , " comic " ) + r " \ s* " +
tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " div " , " id " , " comic " ) + r " \ s* " + tagre ( " a " , " href " , r ' [^ " ]* ' ) +
tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) ) ,
)
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/ \ d+ \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl , after = " prev " ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2012-11-20 17:53:53 +00:00
class SchlockMercenary ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' 2 days ago ... Travel the galaxy. Meet new and fascinating life-forms. '
2013-02-04 20:00:26 +00:00
url = ' http://www.schlockmercenary.com/ '
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2000-06-12 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://static \ .schlockmercenary \ .com/comics/[^ " ]+) ' ) )
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ d+- \ d+- \ d+) ' , quote = " ' " , after = " nav-previous " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: yyyy-mm-dd '
2012-11-20 17:53:53 +00:00
2012-06-20 19:58:13 +00:00
class SchoolBites ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://schoolbites.net/ '
stripUrl = url + ' d/ %s .html '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://cdn \ .schoolbites \ .net/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (http://schoolbites \ .net/d/ \ d+ \ .html) ' , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyymmdd '
2013-03-25 18:48:32 +00:00
class Schuelert ( _BasicScraper ) :
url = ' http://www.schuelert.de/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-10 21:57:09 +00:00
stripUrl = url + ' index.php?paged= %s '
firstStripUrl = stripUrl % ' 5 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r " ( %s wp-content/[^ ' ]+) " % rurl , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s index \ .php \ ?paged= \ d+) ' % rurl ) + " « " )
2013-03-25 18:48:32 +00:00
multipleImagesPerStrip = True
help = ' Index format: none '
lang = ' de '
2013-04-09 17:38:16 +00:00
class Science ( _BasicScraper ) :
url = ' http://sci-ence.org/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-09 17:38:16 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' periodic-table-element-ass '
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+/) ' % rurl , after = " prev " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
2013-04-09 17:38:16 +00:00
help = ' Index format: stripname '
description = u ' A comic about science, technology, skepticism, geekery, video games, atheism, and more. '
2013-01-29 20:23:32 +00:00
class SequentialArt ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.collectedcurios.com/sequentialart.php '
stripUrl = url + ' ?s= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2013-01-29 20:23:32 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]+) ' , before = " strip " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/sequentialart \ .php \ ?s= \ d+) ' )
+ tagre ( " img " , " src " , " Nav_BackOne \ .gif " ) )
help = ' Index format: name '
2013-03-06 19:21:10 +00:00
class SexyLosers ( _BasicScraper ) :
adult = True
url = ' http://www.sexylosers.com/ '
stripUrl = url + ' %s .html '
imageSearch = compile ( r ' <img src \ s*= \ s* " \ s*(comics/[ \ w \ .]+?) " ' , IGNORECASE )
prevSearch = compile ( r ' <a href= " (/ \ d {3} \ . \ w+?) " ><font color = FFAAAA><< ' , IGNORECASE )
help = ' Index format: nnn '
starter = indirectStarter ( url ,
compile ( r ' SEXY LOSERS <A HREF= " (.+?) " >Latest SL Comic \ (# \ d+ \ )</A> ' , IGNORECASE ) )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
index = pageUrl . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ]
title = imageUrl . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ]
return index + ' - ' + title
2013-04-28 17:58:38 +00:00
# XXX site has been hacked
class _ShadowGirls ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u " It ' s like H.P. Lovecraft meets the Gilmore Girls! "
2013-03-03 16:46:57 +00:00
url = ' http://www.shadowgirlscomic.com/ '
stripUrl = url + ' comics/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' book-1/chapter-1-broken-dreams/welcome '
2013-03-03 16:46:57 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]*/comics/[^ " ]*) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]*) ' , after = ' navi-prev ' ) )
help = ' Index format: custom '
starter = indirectStarter ( url , compile ( tagre ( " a " , " href " , r ' ([^ " ]*/comics/[^ " ]+) ' ) ) )
2012-11-20 17:53:53 +00:00
class Sheldon ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' The story of a software company tycoon billionaire ten-year-old, his grampa, his duck, his pug and a lizard. '
2013-02-04 20:00:26 +00:00
url = ' http://www.sheldoncomics.com/ '
2013-07-09 20:21:17 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' archive/ %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 011130 '
2013-07-09 20:21:17 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://cdn \ .sheldoncomics \ .com/strips/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archive/ \ d+ \ .html) ' % rurl , after = " sidenav-prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: yymmdd '
2012-11-20 17:53:53 +00:00
2013-07-04 18:20:26 +00:00
class ShermansLagoon ( _BasicScraper ) :
description = u " Sherman ' s Lagoon by Jim Toomey "
url = ' http://shermanslagoon.com/ '
rurl = escape ( url )
stripUrl = url + ' comics/ %s '
firstStripUrl = stripUrl % ' /december-29-2003/ '
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://safr \ .kingfeatures \ .com/idn/etv/zone/xml/content.php \ ?file=[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comics/[^ " ]+/) ' % rurl ) + ' « previous ' )
starter = bounceStarter ( url ,
compile ( tagre ( " a " , " href " , r ' ( %s comics/[^ " ]+/) ' % rurl , after = " next " ) ) )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
name = pageUrl . split ( ' / ' ) [ - 2 ]
# name is monthname-day-year
month , day , year = name . split ( ' - ' )
return " %s - %s - %s " % ( year , month , day )
2012-12-08 20:30:51 +00:00
class Shivae ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://shivae.net/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' blog/ %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2007/09/21/09212007 '
2013-11-12 17:33:14 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/blogs \ .dir/ \ d+/files/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s blog/[^ " ]+) ' % rurl , after = " navi-prev " ) )
2012-12-08 20:30:51 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2012-12-12 16:41:29 +00:00
# XXX disallowed by robots.txt
class _Shortpacked ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.shortpacked.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/comic/[^ " ]+) ' % rurl , after = " prev " ) )
2012-12-02 17:35:06 +00:00
help = ' Index format: yyyy/comic/book-nn/mm-name1/name2 '
2012-11-20 17:53:53 +00:00
2012-06-20 19:58:13 +00:00
class SinFest ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Strip dealing with contemporary issues and religion. Created by Tatsuya Ishida. '
2012-06-20 19:58:13 +00:00
name = ' KeenSpot/SinFest '
2013-02-04 20:00:26 +00:00
url = ' http://www.sinfest.net/ '
stripUrl = url + ' archive_page.php?comicID= %s '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src= " .+?(/comikaze/comics/.+?) " ' )
prevSearch = compile ( r ' (/archive_page.php \ ?comicID=.+?) " .+?prev_a ' )
help = ' Index format: n (unpadded) '
2013-04-05 05:20:50 +00:00
# XXX disallowed by robots.txt
2013-04-05 16:47:51 +00:00
class _Sketchesnatched ( _BasicScraper ) :
2013-04-05 05:20:50 +00:00
url = ' http://sketchesnatched.blogspot.com/ '
stripUrl = url + ' search?updated-max= %s %% 2B01:00&max-results=1 '
2013-04-05 05:31:22 +00:00
firstStripUrl = stripUrl % ' 2011-01-27T08:32:00 '
2013-04-05 05:20:50 +00:00
imageSearch = compile ( tagre ( " meta " , " content " , r " (http:// \ d+ \ .bp \ .blogspot \ .com/[^ ' ]+) " ,
after = r ' image_url ' , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " (http://sketchesnatched \ .blogspot \ .[a-z]+/search[^ ' ]+) " ,
before = r " blog-pager-older-link " , quote = " ' " ) )
help = ' Index format: yyyy-mm-ddThh:mm:ss '
description = u " Artwork by Massimo Carnevale "
2013-02-07 22:02:54 +00:00
class SkinDeep ( _BasicScraper ) :
url = ' http://www.skindeepcomic.com/ '
stripUrl = url + ' archive/ %s / '
imageSearch = compile ( r ' <span class= " webcomic-object[^>]*><img src= " ([^ " ]*) " ' )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' , after = " previous-webcomic-link " ) )
help = ' Index format: custom '
2012-06-20 19:58:13 +00:00
class SlightlyDamned ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.sdamned.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2004/03/03142004 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: yyyy/mm/number '
2012-06-20 19:58:13 +00:00
class SluggyFreelance ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.sluggy.com/ '
stripUrl = url + ' comics/archives/daily/ %s '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src= " (/images/comics/.+?) " ' )
prevSearch = compile ( r ' <a href= " (.+?) " [^>]+?><span class= " ui-icon ui-icon-seek-prev " > ' )
2014-01-28 18:08:39 +00:00
multipleImagesPerStrip = True
2012-06-20 19:58:13 +00:00
help = ' Index format: yymmdd '
2013-03-06 19:21:10 +00:00
class SMBC ( _BasicScraper ) :
2013-04-25 18:32:21 +00:00
description = u " Saturday Morning Breakfast Cereal "
2013-03-06 19:21:10 +00:00
url = ' http://www.smbc-comics.com/ '
2013-04-25 18:32:21 +00:00
rurl = escape ( url )
2013-07-18 18:39:53 +00:00
stripUrl = url + ' ?id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2013-04-25 18:32:21 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r " ( %s comics/ \ d {8} (?: \ w2?|- \ d)? \ . \ w {3} ) \ s* " % rurl , quote = " ' " ) )
2013-07-18 18:39:53 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+)#comic ' , after = " backRollover " ) )
2013-03-06 19:21:10 +00:00
help = ' Index format: nnnn '
2013-04-25 18:32:21 +00:00
def shouldSkipUrl ( self , url ) :
""" Skip promo or missing update pages. """
return url in (
self . stripUrl % ' 2865 ' ,
self . stripUrl % ' 2653 ' ,
self . stripUrl % ' 2424 ' ,
self . stripUrl % ' 2226 ' ,
self . stripUrl % ' 2069 ' ,
self . stripUrl % ' 1895 ' ,
self . stripUrl % ' 1896 ' ,
self . stripUrl % ' 1589 ' ,
)
2013-03-06 19:21:10 +00:00
2013-04-03 18:30:16 +00:00
class SnowFlakes ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Snowflakes - A comic by James Ashby, Chris Jones and Zach Weiner. '
2013-04-03 18:30:16 +00:00
url = ' http://www.snowflakescomic.com/ '
stripUrl = url + ' ?id= %s &sl= %s '
firstStripUrl = stripUrl % ( ' 103 ' , ' 1 ' )
2013-04-29 18:31:07 +00:00
endOfLife = True
2013-04-03 18:30:16 +00:00
imageSearch = (
compile ( tagre ( " img " , " src " , r ' (comics/[^ " ]+) ' ) ) ,
compile ( tagre ( " img " , " src " , r ' (http://www.snowflakescomic.com/comics/[^ " ]+) ' ) ) ,
)
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ ?id= \ d+ \ &sl= \ d) ' , quote = " " ) +
tagre ( " img " , " src " , r ' images/nav_prior-ON \ .gif ' ) )
help = ' Index format: number '
@classmethod
def starter ( cls ) :
return cls . stripUrl % ( ' 530 ' , ' 5 ' )
def getStripIndexUrl ( self , index ) :
return self . stripUrl % ( index , index [ 0 ] )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
""" Use strip index number for image name. """
index = int ( compile ( r ' id=( \ d+) ' ) . search ( pageUrl ) . group ( 1 ) )
ext = imageUrl . rsplit ( ' . ' , 1 ) [ 1 ]
return " SnowFlakes- %d . %s " % ( index , ext )
def shouldSkipUrl ( self , url ) :
""" Skip pages without images. """
return url in (
self . stripUrl % ( ' 279 ' , ' 2 ' ) , # no comic
self . stripUrl % ( ' 278 ' , ' 2 ' ) , # no comic
self . stripUrl % ( ' 277 ' , ' 2 ' ) , # no comic
self . stripUrl % ( ' 276 ' , ' 2 ' ) , # no comic
self . stripUrl % ( ' 275 ' , ' 2 ' ) , # no comic
self . stripUrl % ( ' 214 ' , ' 2 ' ) , # no comic
)
2013-03-06 19:00:30 +00:00
class SnowFlame ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' The fan-comic series featuring " The Man Powered by Cocaine " '
2013-03-06 19:00:30 +00:00
url = ' http://www.snowflamecomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-06 19:00:30 +00:00
stripUrl = url + ' ?comic=snowflame- %s - %s '
firstStripUrl = stripUrl % ( ' 01 ' , ' 01 ' )
2013-11-12 17:33:14 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl , after = " Snow[Ff]lame " ) )
2013-03-06 19:00:30 +00:00
prevSearch = compile ( tagre ( " span " , " class " , " mininav-prev " ) +
2013-04-10 16:19:11 +00:00
tagre ( " a " , " href " , r ' ( %s \ ?comic=snowflame[^ " ]+) ' % rurl ) )
2013-03-06 19:00:30 +00:00
starter = bounceStarter ( url ,
compile ( tagre ( " span " , " class " , " mininav-next " ) +
2013-04-10 16:19:11 +00:00
tagre ( " a " , " href " , r ' ( %s \ ?comic=snowflame[^ " ]+) ' % rurl ) ) )
2013-03-06 19:00:30 +00:00
help = ' Index format: chapter-page '
def getStripIndexUrl ( self , index ) :
return self . stripUrl % index . split ( ' - ' )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
prefix , filename = imageUrl . rsplit ( ' / ' , 1 )
ro = compile ( r ' snowflame-([^-]+)-([^-]+) ' )
mo = ro . search ( pageUrl )
chapter = mo . group ( 1 )
page = mo . group ( 2 )
return " %s - %s - %s " % ( chapter , page , filename )
2012-06-20 19:58:13 +00:00
class SodiumEyes ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://sodiumeyes.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2007/11/08/damning-evidence '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comic/[^ ]+) ' % rurl , quote = " " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2012-06-20 19:58:13 +00:00
2012-12-08 20:30:51 +00:00
class Sorcery101 ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Welcome to the site of Kel McDonald, professional comic illustrator and writer. '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.sorcery101.net/ '
url = baseUrl + ' sorcery-101/ '
rurl = escape ( baseUrl )
2013-02-27 18:40:54 +00:00
stripUrl = url + ' %s / '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s sorcery-101/[^ " ]+) ' % rurl , after = " previous- " ) )
2012-12-08 20:30:51 +00:00
help = ' Index format: stripname '
2013-02-06 21:08:36 +00:00
class SpaceTrawler ( _BasicScraper ) :
url = ' http://spacetrawler.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2010/01/01/spacetrawler-4 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl , after = " navi-prev " ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2013-04-10 16:36:33 +00:00
class Spamusement ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Spamusement! Poorly-drawn cartoons inspired by actual spam subject lines! '
2013-04-10 16:36:33 +00:00
url = ' http://spamusement.com/ '
rurl = escape ( url )
stripUrl = url + ' index.php/comics/view/ %s '
imageSearch = compile ( r ' <img src= " ( %s gfx/ \ d+ \ ..+?) " ' % rurl , IGNORECASE )
prevSearch = compile ( r ' <a href= " ( %s index.php/comics/view/.+?) " > ' % rurl , IGNORECASE )
help = ' Index format: n (unpadded) '
starter = indirectStarter ( url , prevSearch )
2012-06-20 19:58:13 +00:00
class SpareParts ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Spare Parts by Terrence and Isabel Marks! '
2012-11-21 20:57:26 +00:00
baseUrl = ' http://www.sparepartscomics.com/ '
2013-02-04 20:00:26 +00:00
url = baseUrl + ' comics/?date=20080328 '
2012-12-04 06:02:40 +00:00
stripUrl = baseUrl + ' comics/index.php?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20031022 '
2012-12-04 06:02:40 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://www \ .sparepartscomics \ .com/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (index \ .php \ ?date= \ d+) ' , quote = " ' " ) + " Previous Comic " )
2012-11-20 17:53:53 +00:00
help = ' Index format: yyyymmdd '
2012-06-20 19:58:13 +00:00
2013-01-29 20:52:26 +00:00
class Spinnerette ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.spinnyverse.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-11-12 17:33:14 +00:00
stripUrl = url + ' index.php?id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2010/02/09/02092010 '
2013-11-12 17:33:14 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (comics/[^ " ]+) ' , after = " comic " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/index \ .php \ ?id=[^ " ]+) ' , after = " prev " ) )
2013-01-29 20:52:26 +00:00
help = ' Index format: number '
2012-12-08 20:30:51 +00:00
class SPQRBlues ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " You can skip the next comic if you ' d like to pass over the rest of this (very mildly) mature theme. I ' ve tried to clarify the legalities as pointed out in the comments. "
2013-02-04 20:00:26 +00:00
url = ' http://spqrblues.com/IV/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1467 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+ \ .png) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " prev " ) )
2012-12-08 20:30:51 +00:00
help = ' Index format: number '
2014-01-08 03:47:58 +00:00
class StandStillStaySilent ( _BasicScraper ) :
url = ' http://www.sssscomic.com/comic.php '
rurl = escape ( url )
stripUrl = url + ' ?page= %s '
firstStripUrl = stripUrl % ' 1 '
imageSearch = compile ( tagre ( " img " , " src " , r " (comicpages/[^ ' ]+) " , before = " comicnormal " , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " ([^ ' ]+) " , quote = " ' " ) + tagre ( " div " , " id " , r ' navprev ' , quote = " ' " ) )
help = ' Index Format: number '
description = u ' " Stand Still. Stay Silent " is a post-apocalyptic adventure story with a rather light tone and careless pace. '
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _StationV3 ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.stationv3.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' d/ %s .html '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s d/ \ d+ \ .html) ' % rurl ) +
2012-12-08 20:30:51 +00:00
tagre ( " img " , " src " , r ' http://www \ .stationv3 \ .com/images/previous \ .gif ' ) )
help = ' Index format: yyyymmdd '
2013-03-03 21:03:27 +00:00
class StickyDillyBuns ( _BasicScraper ) :
url = ' http://www.stickydillybuns.com/ '
stripUrl = url + ' strips-sdb/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' awesome_leading_man '
2013-03-03 21:03:27 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]*/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]*/strips-sdb/[^ " ]+) ' , before = " cn[id]prev " ) )
help = ' Index format: name '
2012-06-20 19:58:13 +00:00
class Stubble ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://stubblecomics.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 4 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " navi-prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
2013-04-03 18:30:29 +00:00
class StuffNoOneToldMe ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " Everyday ' s life advices in the shape of witty and humorous cartoons. "
2013-04-03 18:30:29 +00:00
url = ' http://www.snotm.com/ '
stripUrl = url + ' %s .html '
firstStripUrl = stripUrl % ' 2010/05/01 '
olderHref = r " (http://www \ .snotm \ .com/ \ d+/ \ d+/[^ ' ]+ \ .html) "
starter = indirectStarter ( url ,
compile ( tagre ( " a " , " href " , olderHref , quote = " ' " ) ) )
imageSearch = (
compile ( tagre ( " img " , " src " , r ' (http://i \ .imgur \ .com/[^ " ]+) ' ) + r " (?:</a>|<br />) " ) ,
compile ( tagre ( " img " , " src " , r ' (http:// \ d+ \ .bp \ .blogspot \ .com/[^ " ]+) ' ) + r " (?:(?: )?</a>|<span |<br />) " ) ,
compile ( tagre ( " img " , " src " , r ' (https://lh \ d+ \ .googleusercontent \ .com/[^ " ]+) ' ) + r " </a> " ) ,
)
prevSearch = compile ( tagre ( " a " , " href " , olderHref , quote = " ' " , before = " older-link " ) )
multipleImagesPerStrip = True
help = ' Index format: yyyy/mm/stripname '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
""" Use page URL to construct meaningful image name. """
parts , year , month , stripname = pageUrl . rsplit ( ' / ' , 3 )
stripname = stripname . rsplit ( ' . ' , 1 ) [ 0 ]
parts , imagename = imageUrl . rsplit ( ' / ' , 1 )
return ' %s - %s - %s - %s ' % ( year , month , stripname , imagename )
def shouldSkipUrl ( self , url ) :
""" Skip pages without images. """
return url in (
self . stripUrl % ' 2012/08/self-rant ' , # no comic
self . stripUrl % ' 2012/06/if-you-wonder-where-ive-been ' , # video
self . stripUrl % ' 2011/10/i-didnt-make-this-nor-have-anything-to ' , # video
self . stripUrl % ' 2010/12/first-snotm-fans-in-sao-paulo ' , # no comic
self . stripUrl % ' 2010/11/ear-infection ' , # no comic
)
2012-06-20 19:58:13 +00:00
class StrawberryDeathCake ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " Update2 I ' m alive and still working on the comic, but progress has been slow. I ' m inching my way through sketches. Update-A little break from the comic. "
2013-02-04 20:00:26 +00:00
url = ' http://strawberrydeathcake.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' archive/ %s / '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/webcomic/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archive/[^ " ]+) ' % rurl , after = " previous " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: stripname '
2012-06-20 19:58:13 +00:00
class SuburbanTribe ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.pixelwhip.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnnn '
class SomethingPositive ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.somethingpositive.net/ '
stripUrl = url + ' sp %s .shtml '
2013-04-11 16:27:43 +00:00
imageSearch = (
compile ( tagre ( " img " , " src " , r ' (sp \ d+ \ .png) ' ) ) ,
compile ( tagre ( " img " , " src " , r ' (twither \ .gif) ' ) ) ,
)
2012-12-12 16:41:29 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (sp \ d+ \ .shtml) ' ) +
2012-12-04 06:02:40 +00:00
" (?: " + tagre ( " img " , " src " , r ' images/previous \ .gif ' ) + " |Previous) " )
2012-06-20 19:58:13 +00:00
help = ' Index format: mmddyyyy '
class StarCrossdDestiny ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Furturistic fantasy. A group of outcasts fight to survive in a world that shuns them as freaks. '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.starcrossd.net/ '
rurl = escape ( baseUrl )
url = baseUrl + ' comic.html '
stripUrl = baseUrl + ' archives/ %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 00000001 '
2013-05-25 21:24:33 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://(?:www \ .)?starcrossd \ .net/(?:ch1|strips|book2)/[^ " ]+) ' ) )
2013-04-10 21:57:09 +00:00
prevSearch = compile ( r ' <a href= " ( %s (?:ch1/)?archives/ \ d+ \ .html) " [^>]* " [^ " ]* " [^>]*>prev ' % rurl , IGNORECASE )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnnnnnnn '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
if imageUrl . find ( ' ch1 ' ) == - 1 :
# At first all images were stored in a strips/ directory but that was changed with the introduction of book2
imageUrl = sub ( ' (?:strips)|(?:images) ' , ' book1 ' , imageUrl )
elif not imageUrl . find ( ' strips ' ) == - 1 :
imageUrl = imageUrl . replace ( ' strips/ ' , ' ' )
directory , filename = imageUrl . split ( ' / ' ) [ - 2 : ]
filename , extension = splitext ( filename )
return directory + ' - ' + filename
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _StrangeCandy ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.strangecandy.net/ '
stripUrl = url + ' d/ %s .html '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/ \ d+ \ .jpg) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/d/ \ d+ \ .html) ' ) + tagre ( " img " , " alt " , " Previous comic " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyyddmm '
2013-02-13 16:53:11 +00:00
class SupernormalStep ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Supernormal Step - Magic, Face Punching, and a Robot or Two '
2013-02-13 16:53:11 +00:00
url = ' http://supernormalstep.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-13 16:53:11 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 8 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " prev " ) )
2013-02-13 16:53:11 +00:00
help = ' Index format: number '