2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-02-05 18:51:46 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-04-10 16:19:11 +00:00
from re import compile , escape
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
from . . helpers import indirectStarter
2012-11-20 17:53:53 +00:00
from . . util import tagre
2012-06-20 19:58:13 +00:00
class Galaxion ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Galaxion - Life. Love. Hyperspace. '
2013-02-04 20:00:26 +00:00
url = ' http://galaxioncomics.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1-comic/the-story-so-far/the-story-so-far '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: n-comic/book-n/chapter-n/title-nnn '
2012-06-20 19:58:13 +00:00
class Garanos ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Garanos - A dramatic fantasy webcomic with a dash of adventure, gothic horror, and romance for flavor. '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://garanos.alexheberling.com/ '
rurl = escape ( baseUrl )
url = baseUrl + ' pages/page-1/ '
2013-02-04 20:00:26 +00:00
starter = indirectStarter ( url ,
2013-04-10 16:19:11 +00:00
compile ( tagre ( " a " , " href " , r ' ( %s pages/[^ " ]+) ' % rurl , after = " navi-last " ) ) )
2013-04-13 18:58:00 +00:00
stripUrl = baseUrl + ' pages/page- %s '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s pages/[^ " ]+) ' % rurl , after = " prev " ) )
2013-04-10 21:57:09 +00:00
help = ' Index format: n (unpadded) '
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class GastroPhobia ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Regularly updated comic about a single mom barbarian in Ancient Greece. '
2013-03-06 19:21:10 +00:00
url = ' http://www.gastrophobia.com/ '
stripUrl = url + ' index.php?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2008-07-30 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' <img src= " (http://gastrophobia.com/comix/[^ " ]+) " [^>]*>(?!<br>) ' )
prevSearch = compile ( r ' <a href= " (.+?) " ><img src= " pix/prev.gif " ' )
help = ' Index format: yyyy-mm-dd '
class Geeks ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Geeks Trying To Be Funny '
2013-03-06 19:21:10 +00:00
url = ' http://sevenfloorsdown.com/geeks/ '
stripUrl = url + ' archives/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 10 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' <img src= \' (http://sevenfloorsdown.com/geeks/comics/.+?) \' ' )
prevSearch = compile ( r ' <a href= " (.+?) " >« Previous ' )
help = ' Index format: nnn '
2012-06-20 19:58:13 +00:00
2013-03-11 21:51:45 +00:00
class GeeksNextDoor ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Geeks Next Door '
2013-03-11 21:51:45 +00:00
url = ' http://www.geeksnextcomic.com/ '
stripUrl = url + ' %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2010-10-04 '
2013-03-11 21:51:45 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (images/GND \ d+[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ d+- \ d+- \ d+ \ .html) ' ) +
tagre ( " img " , " src " , r ' images/nav_prev \ .png ' ) )
help = ' Index format: yyyy-mm-dd '
2013-04-25 18:54:48 +00:00
# disallowed by robots.txt
class _GeneralProtectionFault ( _BasicScraper ) :
description = u ' General Protection Fault '
url = ' http://www.gpf-comics.com/ '
rurl = escape ( url )
stripUrl = url + ' archive/ %s '
firstStripUrl = stripUrl % ' 1998/11/02 '
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]*) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) +
tagre ( " img " , " alt " , " Previous Comic " ) )
help = ' Index format: yyyy/mm/dd '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
""" Remove random stuff from filename. """
imageName = imageUrl . split ( ' / ' ) [ - 1 ]
return imageName [ : 11 ] + imageName [ - 4 : ]
2012-06-20 19:58:13 +00:00
class GirlsWithSlingshots ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.girlswithslingshots.com/ '
2013-04-10 21:57:09 +00:00
rurl = escape ( url )
stripUrl = url + ' comic/gws %s / '
firstStripUrl = stripUrl % ' 1 '
imageSearch = (
compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " img " , " src " , r ' (http://cdn \ .girlswithslingshots \ .com/comics/[^ " ]+) ' ) ) ,
)
2013-04-11 16:27:43 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/[^ " ]+) ' % rurl , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnn '
2013-03-06 19:21:10 +00:00
class GlassHalfEmpty ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' A Glass Half Empty cartoon by Dan Markowitz '
2013-03-06 19:21:10 +00:00
url = ' http://www.defectivity.com/ghe/index.php '
stripUrl = url + ' ?strip_id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 0 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' src= " (comics/.+?) " ' )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?strip_id= \ d+) ' ) + tagre ( " img " , " src " , r ' \ . \ ./images/arrowbuttons/onback \ .jpg ' ) )
help = ' Index format: nnn '
2012-11-20 17:53:53 +00:00
class GleefulNihilism ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' pointless comics with a sideways grin '
2013-02-04 20:00:26 +00:00
url = ' http://gleefulnihilism.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' comics/ %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2008/10/20/amoeba '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comics/[^ " ]+) ' % rurl ) + ' Previous ' )
2012-11-20 17:53:53 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2012-06-20 19:58:13 +00:00
class Goats ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' goats: the comic strip | by jonathan rosenberg | new comics every mon-wed-fri '
2013-02-04 20:00:26 +00:00
url = ' http://www.goats.com/ '
stripUrl = url + ' archive/ %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 970401 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img.+?src= " (/comix/.+?) " ' )
prevSearch = compile ( r ' <a href= " (/archive/ \ d {6} .html) " class= " button " title= " go back " > ' )
help = ' Index format: yymmdd '
2013-04-09 17:37:24 +00:00
class GoblinsComic ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Goblins '
2013-04-09 17:37:24 +00:00
url = ' http://www.goblinscomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-09 17:37:24 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 06252005 '
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/) ' % rurl , after = " prev " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+ \ .[^ " ]+) ' % rurl ) )
2013-04-09 17:37:24 +00:00
help = ' Index format: ddmmyyyy '
2012-06-20 19:58:13 +00:00
class GoneWithTheBlastwave ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Gone with the Blastwave - Type E webcomic. '
2013-02-05 18:51:46 +00:00
url = ' http://www.blastwave-comic.com/index.php?p=comic&nro=1 '
starter = indirectStarter ( url ,
2012-06-20 19:58:13 +00:00
compile ( r ' href= " (index.php \ ?p=comic&nro= \ d+) " ><img src= " images/page/default/latest ' ) )
2013-02-05 18:51:46 +00:00
stripUrl = url [ : - 1 ] + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img.+src= " .+(/comics/.+?) " ' )
prevSearch = compile ( r ' href= " (index.php \ ?p=comic&nro= \ d+) " ><img src= " images/page/default/previous ' )
help = ' Index format: n '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
return ' %02d ' % int ( compile ( r ' nro=( \ d+) ' ) . search ( pageUrl ) . group ( 1 ) )
2013-01-29 20:42:10 +00:00
class GrrlPower ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Grrl Power - A webcomic about superheroines. '
2013-02-04 20:00:26 +00:00
url = ' http://www.grrlpowercomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' archives/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 48 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archives/ \ d+) ' % rurl , after = " navi-prev " ) )
2013-01-29 20:42:10 +00:00
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class GunnerkrigCourt ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Gunnerkrigg Court is a science-fantasy webcomic created by Tom Siddell. It is updated online three days a week. '
2013-02-04 20:00:26 +00:00
url = ' http://www.gunnerkrigg.com/ '
stripUrl = url + ' ?p= %s '
2013-01-29 18:00:29 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?p= \ d+) ' ) + tagre ( " img " , " src " , " http://www \ .gunnerkrigg \ .com/images/prev_a \ .jpg " ) )
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class Gunshow ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " Ah there we go! Color! BUT ALSO I WANTED TO SHOW YOU: GUNSHOW VOLUME 4 IS OUT! IT ' S HERE! Get a copy today! "
2013-02-04 20:00:26 +00:00
url = ' http://gunshowcomic.com/ '
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://gunshowcomic \ .com/comics/[^ " ]+) ' ) )
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' ) + tagre ( " img " , " src " , r ' [^ " ]*menu/small/previous \ .gif ' ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: n '
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class GUComics ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' From a gaming news perspective, I detest April Fools Day. No " legitimate " source of news should ever post fake news without a disclaimer. '
2013-03-07 22:08:17 +00:00
url = ' http://www.gucomics.com/ '
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20000710 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/ \ d {4} /gu_[^ " ]+) ' ) )
2013-03-07 22:08:17 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ d+) ' ) +
2013-03-06 19:21:10 +00:00
tagre ( " img " , " src " , r ' /images/nav/prev \ .png ' ) )
help = ' Index format: yyyymmdd '