2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-01-28 17:52:26 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-11-21 20:57:26 +00:00
2013-04-10 16:19:11 +00:00
from re import compile , escape
2012-06-20 19:58:13 +00:00
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
2013-03-18 17:15:19 +00:00
from . . helpers import indirectStarter , bounceStarter
2012-12-04 06:02:40 +00:00
from . . util import tagre
2012-06-20 19:58:13 +00:00
2012-12-07 23:45:18 +00:00
class DailyDose ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://dailydoseofcomics.com/ '
starter = indirectStarter ( url ,
2012-12-07 23:45:18 +00:00
compile ( tagre ( " a " , " href " , r ' (http://dailydoseofcomics \ .com/[^ " ]+) ' , after = " preview " ) ) )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2012-12-07 23:45:18 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]+) ' , before = " align(?:none|center) " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (http://dailydoseofcomics \ .com/[^ " ]+) ' , after = " prev " ) )
help = ' Index format: stripname '
2012-06-20 19:58:13 +00:00
2013-04-10 16:19:38 +00:00
class DamnLol ( _BasicScraper ) :
url = ' http://www.damnlol.com/ '
rurl = escape ( url )
stripUrl = url + ' %s .html '
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
imageSearch = (
compile ( tagre ( " img " , " src " , r ' ( %s i/[^ " ]+) ' % rurl ) ) ,
compile ( tagre ( " img " , " src " , r ' ( %s pics/[^ " ]+) ' % rurl ) ) ,
)
help = ' Index format: stripname-number '
description = ' Funny pictures from the internet. Thousands of them. '
starter = bounceStarter ( url ,
compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " next " ) ) )
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
ext = imageUrl . rsplit ( ' . ' , 1 ) [ 1 ]
path = pageUrl . rsplit ( ' / ' , 1 ) [ 1 ] [ : - 5 ]
stripname , number = path . rsplit ( ' - ' , 1 )
return ' %s - %s . %s ' % ( number , stripname , ext )
2012-12-08 20:30:51 +00:00
class Damonk ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.damonk.com/ '
stripUrl = url + ' d/ %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 20060522 '
2012-12-08 20:30:51 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/d/ \ d+ \ .html) ' ) +
tagre ( " img " , " src " , r ' /images/previous_day \ .gif ' ) )
help = ' Index format: yyyymmdd '
2012-12-13 20:05:27 +00:00
# XXX disallowed /search by robots.txt
class _DandyAndCompany ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.dandyandcompany.com/ '
2012-12-04 06:02:40 +00:00
stripUrl = None
multipleImagesPerStrip = True
imageSearch = compile ( tagre ( " a " , " href " , r ' (http:// \ d+ \ .bp \ .blogspot \ .com/[^ " ]+) ' , after = " imageanchor " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " ([^ ' ]+) " , quote = " ' " , after = " Older Posts " ) )
help = ' Index format: none '
2012-06-20 19:58:13 +00:00
2013-03-03 20:31:44 +00:00
class DangerouslyChloe ( _BasicScraper ) :
url = ' http://www.dangerouslychloe.com/ '
stripUrl = url + ' strips-dc/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' chapter_1_-_that_damned_girl '
2013-03-03 20:31:44 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ([^ " ]*/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]*/strips-dc/[^ " ]+) ' , before = " cn[id]prevt " ) )
help = ' Index format: name '
2012-06-20 19:58:13 +00:00
class DarkWings ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u " Dark Wings - You Can ' t Reach Heaven on Broken Wings "
2013-02-04 20:00:26 +00:00
url = ' http://www.flowerlarkstudios.com/dark-wings/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2008/05/31/page-i '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " navi-prev " ) )
2012-11-21 20:57:26 +00:00
help = ' Index format: yyyy/mm/dd/page-nn-mm '
2012-06-20 19:58:13 +00:00
2013-03-21 17:33:16 +00:00
class DasLebenIstKeinPonyhof ( _BasicScraper ) :
url = ' http://sarahburrini.com/wordpress/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-21 17:33:16 +00:00
stripUrl = url + ' comic/ %s / '
firstStripUrl = stripUrl % ' mein-erster-webcomic '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
2013-04-04 16:30:02 +00:00
multipleImagesPerStrip = True
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s comic/[^ " ]+) ' % rurl , after = " navi-prev " ) )
2013-03-21 17:33:16 +00:00
help = ' Index format: stripname '
lang = ' de '
2013-03-06 19:00:30 +00:00
class DeadWinter ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' d e a d . w i n t e r '
2013-03-06 19:00:30 +00:00
url = ' http://deadwinter.cc/ '
stripUrl = url + ' page/ %s '
firstStripUrl = stripUrl % ' 1 '
imageSearch = compile ( tagre ( " img " , " src " , r " (/static/page/strip/ \ d+[^ ' ]+) " , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/page/ \ d+) ' ) + " Previous " )
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class DeathToTheExtremist ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Death To The Extremist '
2013-02-04 20:00:26 +00:00
url = ' http://www.dtecomic.com/ '
stripUrl = url + ' ?n= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' " (comics/.*?) " ' )
prevSearch = compile ( r ' </a> <a href= " ( \ ?n=.*?) " ><.+?/aprev.gif " ' )
help = ' Index format: nnn '
class DeepFried ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Deep Fried-The home of Weapon Brown, Clarissa and Beepo '
2013-02-04 20:00:26 +00:00
url = ' http://www.whatisdeepfried.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' 2001/09/16/new-world-out-of-order '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
2013-04-10 21:57:09 +00:00
help = ' Index format: none '
2012-06-20 19:58:13 +00:00
2013-03-19 19:45:59 +00:00
class DemolitionSquad ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Demolitionsquad.de ist die erste deutsche Videospiel-Webcomic-Seite nach amerikanischen Vorbild und noch viel mehr als das. Auf Demolitionsquad.de findet der wissbegierige, spielebegeisterte Nutzer Comicstrips zu aktuellen Videospielen die ihm die Wartezeit auf den kommenden Top-Titel weiter ves \xfc ssen. '
2013-03-19 19:45:59 +00:00
url = ' http://www.demolitionsquad.de/ '
starter = indirectStarter ( url ,
compile ( tagre ( " a " , " href " , r ' (no_cache/comicstrips/einzelansicht/archive/[^ " ]+) ' ) ) )
stripUrl = url + ' comicstrips/einzelansicht/article/ %s / '
firstStripUrl = stripUrl % ' videospiele-hentai-master '
imageSearch = compile ( tagre ( " img " , " src " , r ' (uploads/pics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (comicstrips/einzelansicht/article/[^ " ]+) ' ) +
tagre ( " img " , " src " , r ' fileadmin/templates/images/button_back.gif ' ) )
help = ' Index format: stripname '
lang = ' de '
def prevUrlModifier ( self , url ) :
# remove CGI params
return url . split ( ' ? ' ) [ 0 ]
2013-03-21 17:33:16 +00:00
class DerTodUndDasMaedchen ( _BasicScraper ) :
url = ' http://www.cartoontomb.de/deutsch/tod2.php '
stripUrl = url + ' ?bild= %s .jpg '
firstStripUrl = stripUrl % ' 00_01_01 '
imageSearch = compile ( tagre ( " img " , " src " , r " ( \ . \ ./images/tod/teil2/[^ ' ]+) " , quote = " ' " ) )
prevSearch = compile ( tagre ( " a " , " href " , r " (/deutsch/tod2 \ .php \ ?bild=[^ ' ]+) " , quote = " ' " ) + " zurück " )
help = ' Index format: nn_nn_nn '
lang = ' de '
2013-03-06 19:21:10 +00:00
class DieselSweeties ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' diesel sweeties : robot webcomic & geeky music t-shirts '
2013-03-06 19:21:10 +00:00
url = ' http://www.dieselsweeties.com/ '
stripUrl = url + ' archive/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/hstrips/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/archive/ \ d+) ' ) +
tagre ( " img " , " src " , r ' (?:http://www \ .dieselsweeties \ .com/ximages/blackbackarrow160.png|/ximages/prev \ .gif) ' ) )
help = ' Index format: n (unpadded) '
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
index = int ( imageUrl . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ] )
return ' sw %02d ' % ( index , )
class Dilbert ( _BasicScraper ) :
url = ' http://dilbert.com/ '
stripUrl = url + ' %s / '
2013-03-19 19:45:18 +00:00
firstStripUrl = stripUrl % ' 1989-04-16 '
2013-03-18 17:15:19 +00:00
starter = bounceStarter ( url ,
compile ( tagre ( " a " , " href " , r ' (/ \ d+- \ d+- \ d+/) ' , after = " STR_Next " ) ) )
2013-03-06 19:21:10 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/ \ d+- \ d+- \ d+/) ' , after = " STR_Prev " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' (/dyn/str_strip/[^ " ]+ \ .strip \ .zoom \ .gif) ' ) )
help = ' Index format: yyyy-mm-dd '
2013-04-13 06:00:03 +00:00
description = ' A comic featuring satirical office humor about a white-collar, micromanaged office featuring the engineer Dilbert as the title character. '
2013-03-06 19:21:20 +00:00
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
ext = imageUrl . rsplit ( " . " , 1 ) [ 1 ]
name = pageUrl . rsplit ( " / " , 2 ) [ 1 ]
return " %s . %s " % ( name , ext )
2013-03-06 19:21:10 +00:00
2012-12-07 23:45:18 +00:00
class DMFA ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.missmab.com/ '
stripUrl = url + ' Comics/Vol_ %s .php '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 001 '
2012-12-07 23:45:18 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ((?:Comics/|Vol)[^ " ]+) ' ) )
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " a " , " href " , r ' ((?:Comics/)?Vol[^ " ]+) ' ) +
tagre ( " img " , " src " , r ' (?:../)?Images/comicprev \ .gif ' ) )
help = ' Index format: nnn (normally, some specials) '
2012-06-20 19:58:13 +00:00
class DoemainOfOurOwn ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.doemain.com/ '
stripUrl = url + ' index.cgi/ %s '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r " <img border= ' 0 ' width= ' \ d+ ' height= ' \ d+ ' src= ' (/strips/ \ d {4} / \ d {6} -[^ \ ' ]+) ' " )
prevSearch = compile ( r ' <a href= " (/index \ .cgi/ \ d {4} - \ d {2} - \ d {2} ) " ><img width= " \ d+ " height= " \ d+ " border= " \ d+ " alt= " Previous Strip " ' )
help = ' Index format: yyyy-mm-dd '
2013-03-15 06:04:19 +00:00
class DogHouseDiaries ( _BasicScraper ) :
url = ' http://thedoghousediaries.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-15 06:04:19 +00:00
stripUrl = url + ' %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 4827 '
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+) ' % rurl , after = " previous-comic " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
2013-03-15 06:04:19 +00:00
help = ' Index format: number '
2013-03-06 19:21:10 +00:00
class DominicDeegan ( _BasicScraper ) :
url = ' http://www.dominic-deegan.com/ '
stripUrl = url + ' view.php?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2002-05-21 '
2013-03-06 19:21:10 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (comics/[^ " ]+) ' ) )
prevSearch = compile ( r ' " (view.php \ ?date=[^ " ]+) " .+?prev21 ' )
help = ' Index format: yyyy-mm-dd '
class DorkTower ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' The Place for All Things Dork '
2013-03-06 19:21:10 +00:00
url = ' http://www.dorktower.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-06 19:21:10 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1997/01/01/shadis-magazine-strip-1 '
2013-04-25 17:01:38 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s files/ \ d+/ \ d+/[^ " ]+ \ .gif) ' % rurl ) )
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) + " Previous " )
2013-03-06 19:21:10 +00:00
help = ' Index format: yyyy/mm/dd/stripname-dd-mm-yy '
2012-06-20 19:58:13 +00:00
class Dracula ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://draculacomic.net/ '
stripUrl = url + ' comic.php?comicID= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 0 '
2012-11-20 17:53:53 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (comics/[^ " ]+) ' ) )
2012-06-20 19:58:13 +00:00
prevSearch = compile ( r ' <a class= " archivelink " href= " (.+?) " >« Prev</a> ' )
help = ' Index format: nnn '
class DreamKeepersPrelude ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.dreamkeeperscomic.com/Prelude.php '
stripUrl = url + ' ?pg= %s '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' (images/PreludeNew/.+?) " ' )
prevSearch = compile ( r ' (Prelude.php \ ?pg=.+?) " ' )
help = ' Index format: n '
class DresdenCodak ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' Dresden Codak '
2013-02-04 20:00:26 +00:00
url = ' http://dresdencodak.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2012-11-13 18:10:19 +00:00
stripUrl = None
2013-04-10 21:57:09 +00:00
firstStripUrl = url + ' 2007/02/08/pom/ '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
2013-04-10 21:57:09 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl ) +
2013-04-11 16:27:43 +00:00
tagre ( " img " , " src " , r " %s m_prev2? \ .png " % rurl , quote = " " ) )
2013-04-10 16:19:11 +00:00
starter = indirectStarter ( url , compile ( tagre ( " div " , " id " , " preview " ) +
tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl ) ) )
2012-06-20 19:58:13 +00:00
2012-11-26 06:13:32 +00:00
2013-04-10 21:57:09 +00:00
class DrFun ( _BasicScraper ) :
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.ibiblio.org/Dave/ '
url = baseUrl + ' ar00502.htm '
stripUrl = baseUrl + ' ar %s .htm '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 00001 '
imageSearch = compile ( tagre ( " a " , " href " , r ' (Dr-Fun/df \ d+/df[^ " ]+) ' ) )
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' ) + ' Previous Week, ' )
help = ' Index format: nnnnn '
2013-04-13 06:00:03 +00:00
description = ' A series of bizarre one-panel gags. Topics range from the mundane to the obscure. '
2013-04-10 21:57:09 +00:00
2013-03-25 18:47:44 +00:00
class DrMcNinja ( _BasicScraper ) :
2013-04-19 04:31:12 +00:00
description = u ' The Adventures of Dr. McNinja '
2013-03-25 18:47:44 +00:00
url = ' http://drmcninja.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-03-25 18:47:44 +00:00
stripUrl = url + ' archives/comic/ %s / '
firstStripUrl = stripUrl % ' 0p1 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s archives/comic/[^ " ]+) ' % rurl , after = " prev " ) )
2013-03-25 18:47:44 +00:00
help = ' Index format: episode number and page '
2013-03-06 19:21:10 +00:00
class Drowtales ( _BasicScraper ) :
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.drowtales.com/ '
rurl = escape ( baseUrl )
url = baseUrl + ' mainarchive.php '
2013-03-06 19:21:10 +00:00
stripUrl = url + ' ?sid= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 4192 '
2013-04-11 16:27:43 +00:00
imageSearch = (
compile ( tagre ( " img " , " src " , r ' ( %s mainarchive/[^ " ]+) ' % rurl ) ) ,
compile ( r ' background-image:url \ ((mainarchive/[^ \ )]+center \ .jpg) ' ) ,
)
2013-03-06 19:21:10 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?sid= \ d+) ' , before = " link_prev_top " ) )
help = ' Index format: number '
2013-02-06 21:08:36 +00:00
# XXX disallowed by robots.txt
class _DumbingOfAge ( _BasicScraper ) :
url = ' http://www.dumbingofage.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/[^ " ]+) ' % rurl , after = " prev " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/comic/book-num/seriesname/stripname '