2012-06-20 20:41:04 +00:00
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2013-02-05 18:51:46 +00:00
# Copyright (C) 2012-2013 Bastian Kleineidam
2012-06-20 19:58:13 +00:00
2013-04-10 16:19:11 +00:00
from re import compile , escape , IGNORECASE
2012-10-11 10:03:12 +00:00
from . . scraper import _BasicScraper
from . . helpers import indirectStarter
2012-11-21 20:57:26 +00:00
from . . util import tagre
2012-06-20 19:58:13 +00:00
2013-02-06 21:27:40 +00:00
class TheBrads ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' ArchiveFirst World Problems Comic - By Brad Colbow '
2013-02-06 21:27:40 +00:00
url = ' http://bradcolbow.com/archive/C4/ '
2013-04-10 21:57:09 +00:00
stripUrl = url + ' %s / '
firstStripUrl = stripUrl % ' P125 '
2013-03-07 18:54:18 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://s3 \ .amazonaws \ .com/the_brads/the-?brads[-_][^ " ]+) ' ) )
2013-02-06 21:27:40 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (http://bradcolbow \ .com/archive/C4/[^ " ]+) ' , before = " prev " ) )
multipleImagesPerStrip = True
help = ' Index format: a letter and a number '
2012-12-08 20:30:51 +00:00
class TheDevilsPanties ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u " It ' s not Satanic Porn "
2013-02-04 20:00:26 +00:00
url = ' http://thedevilspanties.com/ '
stripUrl = url + ' archives/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 300 '
2012-12-08 20:30:51 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://cdn \ .thedevilspanties \ .com/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/archives/ \ d+) ' , after = " Previous " ) )
help = ' Index format: number '
2012-06-20 19:58:13 +00:00
class TheNoob ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.thenoobcomic.com/index.php '
stripUrl = url + ' ?pos= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-11-25 06:56:46 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/headquarters/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( \ ?pos= \ d+) ' , before = " comic_nav_previous_button " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnnn '
class TheOrderOfTheStick ( _BasicScraper ) :
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.giantitp.com/ '
url = baseUrl + ' comics/oots0863.html '
stripUrl = baseUrl + ' comics/oots %s .html '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 0001 '
2012-12-07 23:45:18 +00:00
imageSearch = compile ( r ' <IMG src= " (/comics/images/[^ " ]+) " > ' )
2012-06-20 19:58:13 +00:00
prevSearch = compile ( r ' <A href= " (/comics/oots \ d {4} \ .html) " ><IMG src= " /Images/redesign/ComicNav_Back.gif " ' )
help = ' Index format: n (unpadded) '
2013-04-13 18:58:00 +00:00
starter = indirectStarter ( baseUrl , compile ( r ' <A href= " (/comics/oots \ d {4} \ .html) " ' ) )
2012-06-20 19:58:13 +00:00
2012-12-07 23:45:18 +00:00
@classmethod
def namer ( cls , imageUrl , pageUrl ) :
return pageUrl . rsplit ( ' / ' , 1 ) [ - 1 ] [ : - 5 ]
2012-06-20 19:58:13 +00:00
class TheParkingLotIsFull ( _BasicScraper ) :
2013-04-13 18:58:00 +00:00
baseUrl = ' http://plif.courageunfettered.com/ '
url = baseUrl + ' archive/arch2002.htm '
stripUrl = baseUrl + ' archive/arch %s .htm '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1998 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <td align= " center " ><A TARGET=_parent HREF= " (wc \ d+ \ ..+?) " > ' )
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
prevSearch = compile ( r ' \ d {4} - \ s+<A HREF= " (arch \ d {4} \ .htm) " > \ d {4} ' )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnn '
class TheWotch ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.thewotch.com/ '
stripUrl = url + ' ?date= %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2002-11-21 '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r " <img.+?src= ' (comics/.+?) ' " )
2012-12-04 06:02:40 +00:00
prevSearch = compile ( r " <link rel= ' Previous ' href= ' (/ \ ?date= \ d+- \ d+- \ d+) ' " )
2012-06-20 19:58:13 +00:00
help = ' Index format: yyyy-mm-dd '
2013-02-06 21:08:36 +00:00
class ThisIsIndexed ( _BasicScraper ) :
url = ' http://thisisindexed.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' page/ %s '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s wp-content/uploads/ \ d+/ \ d+/card[^ " ]+) ' % rurl ) )
2013-02-06 21:08:36 +00:00
multipleImagesPerStrip = True
prevSearch = compile ( tagre ( " div " , " class " , " nav-previous " ) +
2013-04-11 16:27:43 +00:00
tagre ( " a " , " href " , r ' ( %s page/ \ d+/)[^ " ]* ' % rurl ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: number '
2012-12-08 20:30:51 +00:00
class ThunderAndLightning ( _BasicScraper ) :
2013-02-06 21:08:36 +00:00
url = ' http://www.talcomic.com/wp/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-11 16:27:43 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s [^ " ]+) ' % rurl , after = " prev " ) )
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
2012-12-08 20:30:51 +00:00
help = ' Index format: yyyy/mm/dd/page-nn '
2013-02-06 21:08:36 +00:00
@classmethod
def starter ( cls ) :
return cls . url + ' ?latestcomic '
2012-12-08 20:30:51 +00:00
2012-11-21 20:57:26 +00:00
class TinyKittenTeeth ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.tinykittenteeth.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2009/01/26/gene-kelly '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
2012-11-21 20:57:26 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ([^ " ]+) ' , after = " Previous " ) )
2012-11-25 06:56:46 +00:00
help = ' Index format: yyyy/mm/dd/stripname (unpadded) '
2012-11-21 20:57:26 +00:00
2012-06-20 19:58:13 +00:00
2013-02-06 21:08:36 +00:00
class ToonHole ( _BasicScraper ) :
url = ' http://www.toonhole.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2009/12/toon-hole-coming-soon-2010 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/ \ d+- \ d+- \ d+[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/[^ " ]+) ' % rurl , after = " prev " ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/stripname '
2013-03-07 17:22:24 +00:00
def shouldSkipUrl ( self , url ) :
2013-03-08 05:47:00 +00:00
return url in ( self . stripUrl % " 2013/03/if-game-of-thrones-was-animated " , )
2013-03-07 17:22:24 +00:00
2013-02-06 21:08:36 +00:00
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _TwoLumps ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://www.twolumps.net/ '
stripUrl = url + ' d/ %s .html '
2012-12-08 20:30:51 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (/comics/[^ " ]+) ' ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' (/d/ \ d+ \ .html) ' , after = " prev " ) )
help = ' Index format: yyyymmdd '
2012-06-20 19:58:13 +00:00
class TwoTwoOneFour ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Artwork, comics, graphic novels, music, articles, and various silliness by Troy McQuinn '
2013-02-04 20:00:26 +00:00
url = ' http://www.nitrocosm.com/go/2214_classic/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 1 '
2012-11-25 06:56:46 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' (http://content \ .nitrocosm \ .com/[^ " ]+) ' , before = " gallery_display " ) )
2013-04-10 16:19:11 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/) ' % rurl , after = " Previous " ) )
2012-06-20 19:58:13 +00:00
help = ' Index format: n (unpadded) '
class TheWhiteboard ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' The Whiteboard, a somewhat paintball-related webcomic by " Doc " Nickel '
2013-02-04 20:00:26 +00:00
url = ' http://www.the-whiteboard.com/ '
stripUrl = url + ' auto %s .html '
2012-06-20 19:58:13 +00:00
imageSearch = compile ( r ' <img SRC= " (autotwb \ d { 1,4}.+?|autowb \ d { 1,4}.+?) " > ' , IGNORECASE )
prevSearch = compile ( r '  <a href= " (.+?) " >previous</a> ' , IGNORECASE )
help = ' Index format: twb or wb + n wg. twb1000 '
class HMHigh ( _BasicScraper ) :
2013-04-14 07:02:14 +00:00
description = u ' Welcome to the website of professional artist and illustrator, Angel Smith and of Fallen Angel Media Ltd; An independent art, design & publishing house and event management company based in Bristol, England '
2012-06-20 19:58:13 +00:00
name = ' TheFallenAngel/HMHigh '
2013-04-13 18:58:00 +00:00
baseUrl = ' http://www.thefallenangel.co.uk/ '
url = baseUrl + ' hmhigh/ '
rurl = escape ( baseUrl )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' ?id= %s '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( r ' <img src= " ( %s hmhigh/img/comic/.+?) " ' % rurl )
prevSearch = compile ( r ' <a href= " ( %s .+?) " title= " .+? " >Prev</a> ' % rurl )
2012-06-20 19:58:13 +00:00
help = ' Index format: nnn '
class TheOuterQuarter ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://theouterquarter.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' comic/ %s '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' oq-the-first-take/4 '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( r ' <img src= " ( %s comics/.+?) " ' % rurl )
2012-06-20 19:58:13 +00:00
prevSearch = compile ( r ' <div class= " nav-previous " ><a href= " ([^ " ]+) " rel= " prev " > ' )
help = ' Index format: nnn '
2012-12-08 20:30:51 +00:00
2013-02-06 21:08:36 +00:00
class ThreePanelSoul ( _BasicScraper ) :
url = ' http://threepanelsoul.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-06 21:08:36 +00:00
stripUrl = url + ' %s / '
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % ' 2006/05/11/a-test-comic '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s comics/[^ " ]+) ' % rurl ) )
prevSearch = compile ( tagre ( " a " , " href " , r ' ( %s \ d+/ \ d+/ \ d+/[^ " ]+) ' % rurl , after = " prev " ) )
2013-02-06 21:08:36 +00:00
help = ' Index format: yyyy/mm/dd/stripname '
2012-12-08 20:30:51 +00:00
class TracyAndTristan ( _BasicScraper ) :
2013-02-04 20:00:26 +00:00
url = ' http://tandt.thecomicseries.com/ '
2013-04-10 16:19:11 +00:00
rurl = escape ( url )
2013-02-04 20:00:26 +00:00
stripUrl = url + ' comics/ %s '
2013-04-10 16:19:11 +00:00
imageSearch = compile ( tagre ( " img " , " src " , r ' ( %s images/comics/[^ " ]+) ' % rurl ) )
2012-12-08 20:30:51 +00:00
prevSearch = compile ( tagre ( " a " , " href " , r ' (/comics/ \ d+) ' , after = " prev " ) )
help = ' Index format: number '