2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-02-05 18:51:46 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-04-10 16:19:11 +00:00
from re import compile , escape
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
from . . helpers import indirectStarter
2012-11-20 17:53:53 +00:00
from . . util import tagre
2012-06-20 19:58:13 +00:00
2013-12-10 18:50:21 +00:00
class Gaia ( _BasicScraper ) :
description = u ' Gaia, a story about the nature of reality, and the answer to Lilith?s simple, meek, world-shattering question: ?Will you come along?? '
url = ' http://www.sandraandwoo.com/gaia/ '
rurl = escape ( url )
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2000/01/01/welcome-to-gaia/ '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+-[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+/) ' % rurl , after = " prev " ) )
help = ' Index format: yyyy/mm/dd/number-stripname '
class GaiaGerman ( _BasicScraper ) :
description = u ' Gaia, eine Geschichte <20> ber das Wesen der Wirklichkeit und die Antwort auf Liliths einfache, bescheidene, weltersch<63> tternde Frage: ?Kommt ihr mit?? '
url = ' http://www.sandraandwoo.com/gaiade/ '
rurl = escape ( url )
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2000/01/01/welcome-to-gaia/ '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+-[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+/) ' % rurl , after = " prev " ) )
help = ' Index format: yyyy/mm/dd/number-stripname '
lang = ' de '
2012-06-20 19:58:13 +00:00
class Galaxion ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Galaxion - Life. Love. Hyperspace. '
2013-02-04 20:00:26 +00:00
url = ' http://galaxioncomics.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1-comic/the-story-so-far/the-story-so-far '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: n-comic/book-n/chapter-n/title-nnn '
2012-06-20 19:58:13 +00:00
class Garanos ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Garanos - A dramatic fantasy webcomic with a dash of adventure, gothic horror, and romance for flavor. '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://garanos.alexheberling.com/ '
rurl = escape ( baseUrl )
url = baseUrl + ' pages/page-1/ '
2013-02-04 20:00:26 +00:00
starter = indirectStarter ( url ,
2013-11-12 17:33:14 +00:00
compile ( tagre ( " a " , " href " , r ' ( %s pages/[^ " ]+) ' % rurl , after = " nav-last " ) ) )
2013-04-13 18:58:00 +00:00
stripUrl = baseUrl + ' pages/page- %s '
2013-11-12 17:33:14 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/sites/ \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s pages/[^ " ]+) ' % rurl , after = " prev " ) )
2013-04-10 21:57:09 +00:00
help = ' Index format: n (unpadded) '
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class GastroPhobia ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Regularly updated comic about a single mom barbarian in Ancient Greece. '
2013-03-06 19:21:10 +00:00
url = ' http://www.gastrophobia.com/ '
stripUrl = url + ' index.php?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2008-07-30 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' <img src= " (http://gastrophobia.com/comix/[^ " ]+) " [^>]*>(?!<br>) ' )
prevSearch = compile ( r ' <a href= " (.+?) " ><img src= " pix/prev.gif " ' )
help = ' Index format: yyyy-mm-dd '
class Geeks ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Geeks Trying To Be Funny '
2013-03-06 19:21:10 +00:00
url = ' http://sevenfloorsdown.com/geeks/ '
stripUrl = url + ' archives/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 10 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' <img src= \' (http://sevenfloorsdown.com/geeks/comics/.+?) \' ' )
prevSearch = compile ( r ' <a href= " (.+?) " >« Previous ' )
help = ' Index format: nnn '
2012-06-20 19:58:13 +00:00
2013-03-11 21:51:45 +00:00
class GeeksNextDoor ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Geeks Next Door '
2013-03-11 21:51:45 +00:00
url = ' http://www.geeksnextcomic.com/ '
stripUrl = url + ' %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2010-10-04 '
2013-03-11 21:51:45 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (images/GND \ d+[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ d+- \ d+- \ d+ \ .html) ' ) +
tagre ( " img " , " src " , r ' images/nav_prev \ .png ' ) )
help = ' Index format: yyyy-mm-dd '
2013-04-25 19:06:20 +00:00
# 403 error when getting image files, disable for now
2013-04-25 18:54:48 +00:00
class _GeneralProtectionFault ( _BasicScraper ) :
description = u ' General Protection Fault '
url = ' http://www.gpf-comics.com/ '
rurl = escape ( url )
stripUrl = url + ' archive/ %s '
firstStripUrl = stripUrl % ' 1998/11/02 '
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]*) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) +
tagre ( " img " , " alt " , " Previous Comic " ) )
help = ' Index format: yyyy/mm/dd '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
""" Remove random stuff from filename. """
imageName = imageUrl . split ( ' / ' ) [ - 1 ]
return imageName [ : 11 ] + imageName [ - 4 : ]
2013-04-25 18:58:24 +00:00
class GirlGenius ( _BasicScraper ) :
description = u ' Adventure, Romance, Mad Science! '
baseUrl = ' http://www.girlgeniusonline.com/ '
rurl = escape ( baseUrl )
url = baseUrl + ' comic.php '
2013-04-26 17:52:45 +00:00
stripUrl = url + ' ?date= %s '
2013-04-25 18:58:24 +00:00
firstStripUrl = stripUrl % ' 20021104 '
imageSearch = compile ( tagre ( " img " , " src " , r " ( %s ggmain/strips/[^ ' ]*) " % rurl , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " ( %s [^ ' ]+) " % rurl , quote = " ' " ) +
tagre ( " img " , " alt " , " The Previous Comic " , quote = " ' " ) )
help = ' Index format: yyyymmdd '
2012-06-20 19:58:13 +00:00
class GirlsWithSlingshots ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.girlswithslingshots.com/ '
2013-04-10 21:57:09 +00:00
rurl = escape ( url )
stripUrl = url + ' comic/gws %s / '
firstStripUrl = stripUrl % ' 1 '
imageSearch = (
compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " img " , " src " , r ' (http://cdn \ .girlswithslingshots \ .com/comics/[^ " ]+) ' ) ) ,
)
2013-04-11 16:27:43 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/[^ " ]+) ' % rurl , after = " prev " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnn '
2013-03-06 19:21:10 +00:00
class GlassHalfEmpty ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' A Glass Half Empty cartoon by Dan Markowitz '
2013-03-06 19:21:10 +00:00
url = ' http://www.defectivity.com/ghe/index.php '
stripUrl = url + ' ?strip_id= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 0 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( r ' src= " (comics/.+?) " ' )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?strip_id= \ d+) ' ) + tagre ( " img " , " src " , r ' \ . \ ./images/arrowbuttons/onback \ .jpg ' ) )
help = ' Index format: nnn '
2012-11-20 17:53:53 +00:00
class GleefulNihilism ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' pointless comics with a sideways grin '
2013-02-04 20:00:26 +00:00
url = ' http://gleefulnihilism.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-11-12 17:33:14 +00:00
stripUrl = url + ' comic/ %s / '
firstStripUrl = stripUrl % ' amoeba '
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/[^ " ]+) ' % rurl ) + ' ‹ ' )
help = ' Index format: stripname '
2012-06-20 19:58:13 +00:00
2013-04-09 17:37:24 +00:00
class GoblinsComic ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Goblins '
2013-07-09 20:21:17 +00:00
url = ' http://www.goblinscomic.org/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-09 17:37:24 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 06252005 '
2013-07-18 18:39:53 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [- \ d]+/) ' % rurl , after = " prev " ) )
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+ \ .[^ " ]+) ' % rurl ) )
2013-04-09 17:37:24 +00:00
help = ' Index format: ddmmyyyy '
2013-07-04 09:08:16 +00:00
class GoGetARoomie ( _BasicScraper ) :
description = u " Go Get a Roomie! "
url = ' http://www.gogetaroomie.com/index.php '
stripUrl = url + ' ?id= %s '
firstStripUrl = stripUrl % ' 1 '
imageSearch = compile ( tagre ( ' img ' , ' src ' , r ' (comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( ' a ' , ' href ' , r ' ([^ " ]+) ' , after = ' rel= " prev " ' ) )
help = ' Index format: nnn '
2012-06-20 19:58:13 +00:00
class GoneWithTheBlastwave ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Gone with the Blastwave - Type E webcomic. '
2013-02-05 18:51:46 +00:00
url = ' http://www.blastwave-comic.com/index.php?p=comic&nro=1 '
starter = indirectStarter ( url ,
2012-06-20 19:58:13 +00:00
compile ( r ' href= " (index.php \ ?p=comic&nro= \ d+) " ><img src= " images/page/default/latest ' ) )
2013-02-05 18:51:46 +00:00
stripUrl = url [ : - 1 ] + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img.+src= " .+(/comics/.+?) " ' )
prevSearch = compile ( r ' href= " (index.php \ ?p=comic&nro= \ d+) " ><img src= " images/page/default/previous ' )
help = ' Index format: n '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
return ' %02d ' % int ( compile ( r ' nro=( \ d+) ' ) . search ( pageUrl ) . group ( 1 ) )
2013-01-29 20:42:10 +00:00
class GrrlPower ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Grrl Power - A webcomic about superheroines. '
2013-02-04 20:00:26 +00:00
url = ' http://www.grrlpowercomic.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' archives/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 48 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archives/ \ d+) ' % rurl , after = " navi-prev " ) )
2013-01-29 20:42:10 +00:00
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class GunnerkrigCourt ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' Gunnerkrigg Court is a science-fantasy webcomic created by Tom Siddell. It is updated online three days a week. '
2013-02-04 20:00:26 +00:00
url = ' http://www.gunnerkrigg.com/ '
stripUrl = url + ' ?p= %s '
2013-01-29 18:00:29 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?p= \ d+) ' ) + tagre ( " img " , " src " , " http://www \ .gunnerkrigg \ .com/images/prev_a \ .jpg " ) )
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class Gunshow ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u " Ah there we go! Color! BUT ALSO I WANTED TO SHOW YOU: GUNSHOW VOLUME 4 IS OUT! IT ' S HERE! Get a copy today! "
2013-02-04 20:00:26 +00:00
url = ' http://gunshowcomic.com/ '
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-11-21 20:57:26 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://gunshowcomic \ .com/comics/[^ " ]+) ' ) )
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' ) + tagre ( " img " , " src " , r ' [^ " ]*menu/small/previous \ .gif ' ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: n '
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class GUComics ( _BasicScraper ) :
2013-04-13 06:00:03 +00:00
description = u ' From a gaming news perspective, I detest April Fools Day. No " legitimate " source of news should ever post fake news without a disclaimer. '
2013-03-07 22:08:17 +00:00
url = ' http://www.gucomics.com/ '
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20000710 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/ \ d {4} /gu_[^ " ]+) ' ) )
2013-03-07 22:08:17 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ d+) ' ) +
2013-03-06 19:21:10 +00:00
tagre ( " img " , " src " , r ' /images/nav/prev \ .png ' ) )
help = ' Index format: yyyymmdd '