2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2014-01-05 15:50:57 +00:00
# Copyright (C) 2012-2014 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-04-10 16:19:11 +00:00
from re import compile , escape
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
2013-02-22 19:29:05 +00:00
from . . helpers import bounceStarter , indirectStarter
2012-12-04 06:02:40 +00:00
from . . util import tagre
2012-06-20 19:58:13 +00:00
2013-02-21 18:47:21 +00:00
class Caggage ( _BasicScraper ) :
url = ' http://caggagecomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-21 18:47:21 +00:00
stripUrl = url + ' archives/ %s '
2013-04-10 16:19:11 +00:00
firstStripUrl = stripUrl % ' 77 '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archives/ \ d+) ' % rurl , after = " prev " ) )
2013-02-21 18:47:21 +00:00
help = ' Index format: number '
2014-01-06 04:05:00 +00:00
class CampComic ( _BasicScraper ) :
url = ' http://campcomic.com/comic/ '
rurl = escape ( url )
stripUrl = url + ' %s '
firstStripUrl = stripUrl % ' 6 '
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://hw1 \ .pa-cdn \ .com/camp/assets/img/katie/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , before = " btn btnPrev " ) )
help = ' Index Format: number '
2014-01-06 15:25:42 +00:00
description = u ' Camp Weedonwantcha is a place where kids get dropped off for the summer and are never picked up again. '
2013-02-21 18:47:21 +00:00
2012-06-20 19:58:13 +00:00
class CaptainSNES ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Captain SNES '
2013-02-04 20:00:26 +00:00
url = ' http://www.captainsnes.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2001/07/10/the-mistake '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r " ( %s comics/[^ ' ]+) " % rurl , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) + tagre ( " span " , " class " , " prev " ) )
2012-12-02 17:35:06 +00:00
multipleImagesPerStrip = True
2012-11-21 20:57:26 +00:00
help = ' Index format: yyyy/mm/dd/nnn-stripname '
2012-06-20 19:58:13 +00:00
2013-04-25 18:40:15 +00:00
class Carciphona ( _BasicScraper ) :
2013-04-25 19:50:27 +00:00
description = u ' Fantasy webcomic by Shilin. In an era where magic is forbidden, a sorceress struggles to restore her once peaceful life. '
2013-04-25 18:40:15 +00:00
url = ' http://carciphona.com/ '
stripUrl = url + ' view.php?page= %s &chapter= %s '
imageSearch = compile ( tagre ( " div " , " style " , r ' background-image:url \ ((_pages[^)]*) \ ) ' ) )
2013-11-12 17:33:14 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (view \ .php \ ?[^ " ]*) ' , after = " prevarea " ) )
latestSearch = compile ( tagre ( " a " , " href " , r ' (view \ .php \ ?[^ " ]*) ' ) +
tagre ( " span " , " class " , " linkslast " ) )
2013-04-25 18:40:15 +00:00
help = ' Index format: None '
starter = indirectStarter ( url , latestSearch )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
ip = imageUrl . split ( ' / ' )
return " volume_ %s _page_ %s " % ( ip [ - 2 ] , ip [ - 1 ] )
2012-12-08 20:30:51 +00:00
class CaseyAndAndy ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Casey and Andy '
2013-02-04 20:00:26 +00:00
url = ' http://www.galactanet.com/comic/ '
stripUrl = url + ' view.php?strip= %s '
2013-04-10 16:19:11 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-12-08 20:30:51 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (Strip \ d+ \ .gif) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (view \ .php \ ?strip= \ d+) ' )
+ tagre ( " img " , " src " , r ' previous \ .gif ' ) )
help = ' Index format: number '
2013-04-10 16:36:33 +00:00
class CasuallyKayla ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Casually Kayla: Keeping it as Casual as possible '
2013-04-10 16:36:33 +00:00
url = ' http://casuallykayla.com/ '
stripUrl = url + ' ?p= %s '
firstStripUrl = stripUrl % ' 89 '
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://casuallykayla \ .com/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " div " , " class " , r ' nav-previous ' ) + tagre ( " a " , " href " , r ' ([^ " ]+) ' ) )
help = ' Index format: nnn '
2012-12-07 23:45:18 +00:00
class Catalyst ( _BasicScraper ) :
baseUrl = " http://catalyst.spiderforest.com/ "
2013-04-10 16:19:11 +00:00
rurl = escape ( baseUrl )
2013-02-04 20:00:26 +00:00
url = baseUrl + " comic.php?comic_id=415 "
2012-12-07 23:45:18 +00:00
stripUrl = baseUrl + " comic.php?comic_id= %s "
2013-04-10 16:19:11 +00:00
firstStripUrl = stripUrl % ' 1 '
imageSearch = compile ( tagre ( " img " , " src " , r ' ((?: %s )?comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( " <center> " + tagre ( " a " , " href " , r ' ( %s comic \ .php \ ?comic_id= \ d+) ' % rurl ) )
2012-12-07 23:45:18 +00:00
help = ' Index format: number '
2013-04-10 16:36:33 +00:00
class CatAndGirl ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Cat and Girl '
2013-04-10 16:36:33 +00:00
url = ' http://catandgirl.com/ '
2013-04-10 21:57:09 +00:00
rurl = escape ( url )
2013-04-10 16:36:33 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1602 '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s archive/[^ " ]+) ' % rurl ) )
2013-04-10 16:36:33 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' ) + r " [^<]+Previous</a> " )
help = ' Index format: n (unpadded) '
2013-11-12 17:33:14 +00:00
def shouldSkipUrl ( self , url ) :
""" Skip pages without images. """
return url in (
self . stripUrl % ' 4299 ' ,
)
2013-04-10 16:36:33 +00:00
2012-06-20 19:58:13 +00:00
class Catena ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://catenamanor.com/ '
2013-04-10 21:57:09 +00:00
rurl = escape ( url )
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2003/06/17/the-start-of-it-all '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
2012-11-13 18:12:28 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' , after = ' rel= " prev " ' ) )
2012-11-14 19:23:30 +00:00
help = ' Index format: yyyy/mm/dd/<name> '
2012-06-20 19:58:13 +00:00
2013-04-10 16:36:33 +00:00
class CatsAndCameras ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Just when you thought it was safe to go to the photographer '
2013-04-10 16:36:33 +00:00
url = ' http://catsncameras.com/cnc/ '
rurl = escape ( url )
2013-11-12 17:33:14 +00:00
stripUrl = url + ' ?comic= %s '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " span " , " class " , r ' mininav-prev ' ) +
2013-04-11 16:27:43 +00:00
tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) )
2013-11-12 17:33:14 +00:00
help = ' Index format: stripname '
2013-04-10 16:36:33 +00:00
2013-02-06 21:08:36 +00:00
class ChainsawSuit ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' internet humor, fresh-cut '
2013-02-06 21:08:36 +00:00
url = ' http://chainsawsuit.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2008/03/12/strip-338 '
2013-11-12 17:33:14 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/ \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl ) +
2013-07-18 18:39:53 +00:00
tagre ( " img " , " alt " , r ' previous ' ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2013-04-10 16:36:33 +00:00
class Champ2010 ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Champ2010 - an almost daily journal comic from jed collins who is not drinking this year. webcomic '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://jedcollins.com/champ2010/ '
rurl = escape ( baseUrl )
2013-04-10 16:36:33 +00:00
# the latest URL is hard coded since the comic is discontinued
2013-04-13 18:58:00 +00:00
url = baseUrl + ' champ-12-30-10.html '
stripUrl = baseUrl + ' %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' champ1-1-10-fuck '
2013-04-10 16:36:33 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " Previous " ) )
help = ' Index format: yy-dd-mm '
2013-02-06 21:08:36 +00:00
class ChannelAte ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Comics and Cartoons by Ryan Hudson '
2013-02-06 21:08:36 +00:00
url = ' http://www.channelate.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-11 16:27:43 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl , after = " prev " ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/dd/name '
2012-06-20 19:58:13 +00:00
class ChasingTheSunset ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Chasing the Sunset | Fantasy Webcomic | Elves, Pixies and a blue dragon with orange stripes. '
2013-02-04 20:00:26 +00:00
url = ' http://www.fantasycomic.com/ '
stripUrl = url + ' index.php?p=c %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' (/cmsimg/.+?) " .+?comic-img ' )
prevSearch = compile ( r ' <a href= " (.+?) " title= " " ><img src= " (images/eye-prev.png|images/cn-prev.png) " ' )
help = ' Index format: n '
2012-11-20 17:53:53 +00:00
class CheckerboardNightmare ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Checkerboard Nightmare by Kristofer Straub - A Webcomics Institution '
2013-02-04 20:00:26 +00:00
url = ' http://www.checkerboardnightmare.com/ '
stripUrl = url + ' d/ %s .shtml '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20001110 '
2012-11-20 17:53:53 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comic[s|/][^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' [^ " ]*(/d/ \ d+ \ .s?html) ' ) + r " [^>]+/images/(?:nav_02|previous_day) \ .gif " )
help = ' Index format: yyyymmdd '
2012-06-20 19:58:13 +00:00
2013-04-10 16:36:33 +00:00
class Chester5000XYV ( _BasicScraper ) :
url = ' http://jessfink.com/Chester5000XYV/ '
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 34 '
2013-04-10 16:36:33 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://jessfink \ .com/Chester5000XYV/comics/[^ " ]+) ' ) )
prevSearch = compile ( r ' <a href= " (.+?) " ><span class= " prev " > ' )
help = ' Index format: nnn '
2012-06-20 19:58:13 +00:00
class Chisuji ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.chisuji.com/ '
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2009/05/02/chisujiposter01 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src= " (http://www.chisuji.com/comics/.+?) " ' )
prevSearch = compile ( r ' <div class= " nav-previous " ><a href= " (http://www.chisuji.com/.+?) " > ' )
help = ' Index format: yyyy/mm/dd/strip-name '
class ChugworthAcademy ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://chugworth.com/ '
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 12 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src= " (.+?) " alt= " Comic ' )
prevSearch = compile ( r ' <a href= " (http://chugworth.com/ \ ?p= \ d { 1,4}) " [^>]+?title= " Previous " > ' )
help = ' Index format: n (unpadded) '
class ChugworthAcademyArchive ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://chugworth.com/archive/?strip_id=422 '
2012-11-13 18:10:19 +00:00
stripUrl = ' http://chugworth.com/archive/?strip_id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 0 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src=(comics/ \ d+.+?. \ w { 1,4}) ' )
prevSearch = compile ( r ' <a href= \' (.+?) \' ><img src= \' images/previous.gif ' )
help = ' Index format: nnn '
class CigarroAndCerveja ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Cigarro & Cerveja '
2013-02-04 20:00:26 +00:00
url = ' http://www.cigarro.ca/ '
stripUrl = url + ' ?p= %s '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r " (/comics/.+?) ' " )
prevSearch = compile ( r ' (/ \ ?p=.+?) " >&laq ' )
help = ' Index format: non '
2013-04-10 16:36:33 +00:00
class Collar6 ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Collar 6 '
2013-04-10 16:36:33 +00:00
url = ' http://collar6.com/ '
rurl = escape ( url )
stripUrl = url + ' archive/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' collar-6-187 '
2013-04-10 16:36:33 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/webcomic/collar6/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archive/[^ " ]+) ' % rurl , after = " previous " ) )
help = ' Index format: <name> '
2012-06-20 19:58:13 +00:00
class Comedity ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Comedity 2.0 '
2013-02-04 20:00:26 +00:00
url = ' http://www.comedity.com/ '
stripUrl = url + ' index.php?strip_id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img src= " (Comedity_files/.+?) " ' )
prevSearch = compile ( r ' <a href= " (/?index.php \ ?strip_id= \ d+?) " > *<img alt= \ " Prior Strip ' )
help = ' Index format: n (no padding) '
class Commissioned ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.commissionedcomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?p= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 139 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ ?p= \ d+) ' % rurl , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: n '
2013-04-10 21:57:09 +00:00
class CompanyY ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Company-Y '
2013-04-10 21:57:09 +00:00
url = ' http://company-y.com/ '
rurl = escape ( url )
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2009/08/14/coming-soon '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " div " , " class " , r " nav-previous " ) +
tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) )
help = ' Index format: yyyy/mm/dd/strip-name '
2012-12-07 23:45:18 +00:00
class Concession ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://concessioncomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' index.php?pid= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20060701 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl , after = " Comic " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s index \ .php \ ?pid= \ d+) ' % rurl , after = " nav-prev " ) )
2012-12-07 23:45:18 +00:00
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class CoolCatStudio ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.coolcatstudio.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-10 21:57:09 +00:00
stripUrl = url + ' strips-cat/ %s '
firstStripUrl = stripUrl % ' first '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s strips-cat/[^ " ]+) ' % rurl , before = " prev " ) )
2013-04-10 21:57:09 +00:00
help = ' Index format: ccsyyyymmdd '
2012-06-20 19:58:13 +00:00
2013-04-10 16:36:33 +00:00
class CorydonCafe ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Corydon Cafe humorous online comic archive of abstruse awesomeness created by a starving artist '
2013-04-10 16:36:33 +00:00
url = ' http://corydoncafe.com/ '
starter = indirectStarter ( url ,
compile ( tagre ( " a " , " href " , r ' ( \ ./ \ d+/[^ " ]+) ' ) ) )
stripUrl = url + ' %s .php '
imageSearch = compile ( tagre ( " img " , " src " , r " ( \ ./[^ ' ]+) " , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " (http://corydoncafe \ .com/ \ d+/[^ ' ]+) " , after = " prev " , quote = " ' " ) )
help = ' Index format: yyyy/stripname '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
return pageUrl . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ]
2012-06-20 19:58:13 +00:00
class CourtingDisaster ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Courting Disaster by Brad Guigar - A Daily Webcomic '
2013-02-04 20:00:26 +00:00
url = ' http://www.courting-disaster.com/ '
stripUrl = url + ' archive/ %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20050112 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/archive/ \ d+ \ .html) ' ) + tagre ( " img " , " src " , r ' /images/previous \ .gif ' ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyymmdd '
2013-04-10 16:36:33 +00:00
class CowboyJedi ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' A Long Time Ago In A Webcomic Updated Weekly... '
2013-04-10 16:36:33 +00:00
url = ' http://www.cowboyjedi.com/ '
rurl = escape ( url )
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2009/08/10/a-new-webcomic '
2013-04-10 16:36:33 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " navi-prev " ) )
help = ' Index format: yyyy/mm/dd/strip-name '
2012-06-20 19:58:13 +00:00
class CrapIDrewOnMyLunchBreak ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' A semi-biographical web comic about the struggles and occasional humour of daily life, pets, friends, and more. Currently completing the missing archive comics with your help. '
2013-02-04 20:00:26 +00:00
url = ' http://crap.jinwicked.com/ '
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2003/07/30/jin-and-josh-decide-to-move '
2012-11-14 19:23:30 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://crap \ .jinwicked \ .com/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyy/mm/dd/name '
class CtrlAltDel ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.cad-comic.com/cad/ '
stripUrl = url + ' %s '
2012-12-02 17:35:06 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://v \ .cdn \ .cad-comic \ .com/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' , after = " nav-back " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyymmdd '
2013-04-25 17:01:21 +00:00
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
""" Remove random junk from image names. """
imgname = imageUrl . split ( ' / ' ) [ - 1 ]
imgbase = imgname . rsplit ( ' - ' , 1 ) [ 0 ]
imgext = imgname . rsplit ( ' . ' , 1 ) [ 1 ]
return ' %s . %s ' % ( imgbase , imgext )
2012-06-20 19:58:13 +00:00
class CtrlAltDelSillies ( CtrlAltDel ) :
name = ' CtrlAltDel/Sillies '
2013-02-04 20:00:26 +00:00
url = ' http://www.cad-comic.com/sillies/ '
stripUrl = url + ' %s '
2012-11-14 19:23:30 +00:00
2012-06-20 19:58:13 +00:00
2013-04-10 16:36:33 +00:00
class CrimsonDark ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' A Sci-Fi webcomic set in space in the distant future. '
2013-04-10 16:36:33 +00:00
url = ' http://www.davidcsimon.com/crimsondark/ '
stripUrl = url + ' index.php?view=comic&strip_id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2013-04-10 16:36:33 +00:00
imageSearch = compile ( r ' src= " (.+?strips/.+?) " ' )
prevSearch = compile ( r ' <a href=[ \' " ](/crimsondark/index \ .php \ ?view=comic&strip_id= \ d+)[ \' " ]><img src=[ \' " ]themes/cdtheme/images/active_prev.png[ \' " ] ' )
help = ' Index format: n (unpadded) '
class CraftedFables ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Caf-Fiends '
2013-04-10 16:36:33 +00:00
url = ' http://www.craftedfables.com/ '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.caf-fiends.net/ '
rurl = escape ( baseUrl )
stripUrl = baseUrl + ' craftedfables/?p= %s '
2013-04-10 21:57:09 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s craftedfables/comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s craftedfables/[^ " ]+) ' % rurl ) +
tagre ( " span " , " class " , r " prev " ) )
2013-04-10 16:36:33 +00:00
help = ' Index format: nnn '
class CucumberQuest ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Cucumber Quest '
2013-04-10 16:36:33 +00:00
url = ' http://cucumber.gigidigi.com/ '
rurl = escape ( url )
2013-04-28 17:58:38 +00:00
stripUrl = url + ' cq/ %s / '
2013-04-10 16:36:33 +00:00
firstStripUrl = stripUrl % ' page-1 '
starter = indirectStarter ( url + ' recent.html ' ,
2013-04-28 17:58:38 +00:00
compile ( r ' window \ .location= " (/cq/[^ " ]+/) " ' ) )
imageSearch = (
compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/ \ d+[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/ch \ d+[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/bonus[^ " ]+) ' % rurl ) ) ,
)
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s cq/[^ " ]+/) ' % rurl , after = " previous " ) )
2013-04-10 16:36:33 +00:00
help = ' Index format: stripname '
2013-04-25 18:46:05 +00:00
class Curtailed ( _BasicScraper ) :
description = u ' '
url = ' http://curtailedcomic.com/ '
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2012/04/08/sneeze '
rurl = escape ( url )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]*) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d {4} /[^ " ]*) ' % rurl , after = " navi-prev " ) )
help = ' Index format: yyyy/mm/dd/stripname '
2012-06-20 19:58:13 +00:00
class Curvy ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' An erotic sci-fi adventure comic for adults. '
2013-02-04 20:00:26 +00:00
url = ' http://www.c.urvy.org/ '
stripUrl = url + ' ?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20080329 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/c/[^ " ]+) ' ) )
2013-04-04 16:30:27 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ ?date= \ d+) ' ) +
tagre ( " img " , " src " , " /nav/prev \ .png " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyymmdd '
2013-04-04 16:30:27 +00:00
starter = bounceStarter ( url ,
compile ( tagre ( " a " , " href " , r ' (/ \ ?date= \ d+) ' ) +
tagre ( " img " , " src " , " /nav/next \ .png " ) ) )
def shouldSkipUrl ( self , url ) :
""" Skip pages without images. """
return url in (
self . stripUrl % ' 20130402 ' ,
)
2012-06-20 19:58:13 +00:00
class CyanideAndHappiness ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.explosm.net/comics/ '
starter = bounceStarter ( url , compile ( tagre ( " a " , " href " , r " (/comics/ \ d+/) " , before = " next " ) ) )
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 15 '
2013-01-23 20:53:34 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://(?:www \ .)?explosm \ .net/db/files/[^ " ]+) ' , before = " a daily webcomic " ) )
2012-11-21 20:57:26 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/comics/ \ d+/) ' , before = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: n (unpadded) '
2013-03-06 19:00:30 +00:00
def shouldSkipUrl ( self , url ) :
""" Skip pages without images. """
2013-11-12 17:33:14 +00:00
return url in (
self . stripUrl % " 3082 " ,
self . stripUrl % " 3360 " , # video
2013-11-18 21:01:30 +00:00
self . stripUrl % " 3367 " , # video
2013-11-29 17:31:34 +00:00
self . stripUrl % " 3382 " , # video
2014-01-05 10:08:15 +00:00
self . stripUrl % " 3421 " , # video
2013-11-12 17:33:14 +00:00
)
2013-03-06 19:00:30 +00:00
2013-01-23 18:33:10 +00:00
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
imgname = imageUrl . split ( ' / ' ) [ - 1 ]
2014-01-05 10:08:15 +00:00
# only get the first 100 chars for the image name
imgname = imgname [ : 100 ]
2013-01-23 18:33:10 +00:00
imgnum = pageUrl . split ( ' / ' ) [ - 2 ]
return ' %s _ %s ' % ( imgnum , imgname )