diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 82e3666e2..70e12fec4 100644 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -1,5 +1,7 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012 Bastian Kleineidam + import urllib2 import os import locale diff --git a/dosagelib/configuration.py b/dosagelib/configuration.py index 06dd4a7c5..a1fd67a4f 100644 --- a/dosagelib/configuration.py +++ b/dosagelib/configuration.py @@ -1,3 +1,4 @@ +# Copyright (C) 2012 Bastian Kleineidam """ Define basic configuration data like version or application name. """ diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index 64f16d8c1..075fd2212 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -25,21 +25,16 @@ def regexNamer(regex): return _namer -def constStarter(latestUrl): - """Start from constant URL.""" - @staticmethod - def _starter(): - return latestUrl - return _starter - - def bounceStarter(latestUrl, nextSearch): """Get start URL by "bouncing" back and forth one time.""" @classmethod def _starter(cls): url = fetchUrl(latestUrl, cls.prevSearch) - if url: - url = fetchUrl(url, nextSearch) + if not url: + raise ValueError("could not find prevSearch pattern %r in %s" % (cls.prevSearch.pattern, latestUrl)) + url = fetchUrl(url, nextSearch) + if not url: + raise ValueError("could not find nextSearch pattern %r in %s" % (nextSearch.pattern, latestUrl)) return url return _starter @@ -48,7 +43,10 @@ def indirectStarter(baseUrl, latestSearch): """Get start URL by indirection.""" @staticmethod def _starter(): - return fetchUrl(baseUrl, latestSearch) + url = fetchUrl(baseUrl, latestSearch) + if not url: + raise ValueError("could not find latestSearch pattern %r in %s" % (latestSearch.pattern, baseUrl)) + return url return _starter diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index ba5358474..7ea4e6040 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -1,5 +1,7 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012 Bastian Kleineidam + from re import compile, MULTILINE from ..util import tagre from ..scraper import _BasicScraper diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 2e778f38e..5b523d27c 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -1,5 +1,7 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012 Bastian Kleineidam + from re import compile from ..util import tagre diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index 58e218cee..8ea739223 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -1,51 +1,28 @@ # -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012 Bastian Kleineidam + from re import compile from ..scraper import _BasicScraper -from ..helpers import constStarter, bounceStarter +from ..helpers import bounceStarter, indirectStarter from ..util import tagre, getQueryParams -class CalvinAndHobbes(_BasicScraper): - starter = bounceStarter('http://www.gocomics.com/calvinandhobbes/', - compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Next feature")) - stripUrl = 'http://www.gocomics.com/calvinandhobbes/%s' - imageSearch = compile(tagre("img", "src", "(http://assets\.amuniversal\.com/[a-f0-9]+)")) - prevSearch = compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Previous feature") - help = 'Index format: yyyy/mm/dd' - - @classmethod - def namer(cls, imageUrl, pageUrl): - prefix, year, month, day = pageUrl.rsplit('/', 3) - return "%s%s%s.gif" % (year, month, day) - - -class CandyCartoon(_BasicScraper): - latestUrl = 'http://www.candycartoon.com/' - stripUrl = latestUrl + 'archives/%s.html' - imageSearch = compile(r'[^prev') - help = 'Index format: nnnnnn' - - - class CaptainSNES(_BasicScraper): - latestUrl = 'http://captainsnes.com/' - stripUrl = latestUrl + '?date=%s' - imageSearch = compile(r'') - help = 'Index format: yyyymmdd' - + latestUrl = 'http://www.captainsnes.com/' + stripUrl = latestUrl + '%s/' + imageSearch = compile(r"') + help = 'Index format: yyyy/mm/dd/nnn-stripname' class CaribbeanBlue(_BasicScraper): latestUrl = 'http://cblue.katbox.net/' - stripUrl = latestUrl + 'index.php?strip_id=%s' - imageSearch = compile(r'="(.+?strips/.+?)"') - prevSearch = compile(r'') + stripUrl = latestUrl + '?p=%s' + imageSearch = compile(tagre("img", "src", r'(http://www\.commissionedcomic\.com/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://www\.commissionedcomic\.com/\?p=\d+)', after="prev")) help = 'Index format: n' - class CoolCatStudio(_BasicScraper): latestUrl = 'http://www.coolcatstudio.com/' stripUrl = latestUrl + 'strips-cat/ccs%s' - imageSearch = compile(tagre("img", "src", r'(http://www.coolcatstudio.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="cniprevt")) + imageSearch = compile(tagre("img", "src", r'(http://www\.coolcatstudio\.com/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="prev")) help = 'Index format: yyyymmdd' - class CourtingDisaster(_BasicScraper): latestUrl = 'http://www.courting-disaster.com/' stripUrl = latestUrl + 'archive/%s.html' - imageSearch = compile(r'(/comics/.+?)"') - prevSearch = compile(r']+?>') + imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/archive/\d+\.html)') + tagre("img", "src", r'/images/previous\.gif')) help = 'Index format: yyyymmdd' - class CrapIDrewOnMyLunchBreak(_BasicScraper): latestUrl = 'http://crap.jinwicked.com/' stripUrl = latestUrl + '%s' @@ -168,7 +121,6 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper): help = 'Index format: yyyy/mm/dd/name' - class CtrlAltDel(_BasicScraper): latestUrl = 'http://www.cad-comic.com/cad/' stripUrl = latestUrl + '%s' @@ -186,34 +138,31 @@ class CtrlAltDelSillies(CtrlAltDel): class Curvy(_BasicScraper): latestUrl = 'http://www.c.urvy.org/' stripUrl = latestUrl + '?date=%s' - imageSearch = compile(r'(/c/.+?)"') - prevSearch = compile(r'(/\?date=.+?)"><< Previous page') + imageSearch = compile(tagre("img", "src", r'(/c/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/\?date=\d+)') + tagre("img", "src", "/nav/prev\.png")) help = 'Index format: yyyymmdd' def cloneManga(name, shortName, lastStrip=None): url = 'http://manga.clone-army.org' baseUrl = '%s/%s.php' % (url, shortName) - stripUrl = baseUrl + '?page=%s' - if lastStrip is None: - starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"next\.gif"))) - else: - starter = constStarter(stripUrl % lastStrip) def namer(self, imageUrl, pageUrl): return '%03d' % int(getQueryParams(pageUrl)['page'][0]) - return type('CloneManga_%s' % name, - (_BasicScraper,), - dict( - name='CloneManga/' + name, - starter=starter, - stripUrl=stripUrl, - imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")), - prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")), - help='Index format: n', - namer=namer) + attrs = dict( + name='CloneManga/' + name, + stripUrl = baseUrl + '?page=%s', + imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")), + prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")), + help='Index format: n', + namer=namer, ) + if lastStrip is None: + attrs['starter'] = indirectStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"last\.gif"))) + else: + attrs['latestUrl'] = attrs['stripUrl'] % lastStrip + return type('CloneManga_%s' % name, (_BasicScraper,), attrs) anm = cloneManga('AprilAndMay', 'anm') @@ -233,148 +182,14 @@ class CatAndGirl(_BasicScraper): help = 'Index format: n (unpadded)' -def comicsDotCom(name, section): - latestUrl = 'http://www.gocomics.com/%s' % name - - @classmethod - def namer(cls, imageUrl, pageUrl): - prefix, year, month, day = pageUrl.split('/', 3) - return "%s_%s%s%s.gif" % (name, year, month, day) - - return type('GoComicsDotCom_%s' % name, - (_BasicScraper,), - dict( - name='GoComicsDotCom/' + name, - stripUrl=latestUrl + '/%s', - imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')), - prevSearch=compile(tagre("a", "href", "(/%s/\d+/\d+/\d+)")+"Previous"), - help='Index format: yyyy/mm/dd', - namer=namer) - ) - -# http://www.gocomics.com/features -# XXX - -# http://www.gocomics.com/explore/editorial_list -# XXX - -# http://www.gocomics.com/explore/sherpa_list -# XXX - -acaseinpoint = comicsDotCom('acaseinpoint', 'comics') -agnes = comicsDotCom('agnes', 'creators') -alleyoop = comicsDotCom('alleyoop', 'comics') -andycapp = comicsDotCom('andycapp', 'creators') -arlonjanis = comicsDotCom('arlonjanis', 'comics') -ballardst = comicsDotCom('ballardst', 'creators') -barkeaterlake = comicsDotCom('barkeaterlake', 'comics') -bc = comicsDotCom('bc', 'creators') -ben = comicsDotCom('ben', 'comics') -betty = comicsDotCom('betty', 'comics') -bignate = comicsDotCom('bignate', 'comics') -bonanas = comicsDotCom('bonanas', 'wash') -bornloser = comicsDotCom('bornloser', 'comics') -buckets = comicsDotCom('buckets', 'comics') -candorville = comicsDotCom('candorville', 'wash') -cheapthrills = comicsDotCom('cheapthrills', 'wash') -chickweed = comicsDotCom('chickweed', 'comics') -committed = comicsDotCom('committed', 'comics') -dilbert = comicsDotCom('dilbert', 'comics') -drabble = comicsDotCom('drabble', 'comics') -fatcats = comicsDotCom('fatcats', 'comics') -ferdnand = comicsDotCom('ferdnand', 'comics') -flightdeck = comicsDotCom('flightdeck', 'creators') -floandfriends = comicsDotCom('floandfriends', 'creators') -franknernest = comicsDotCom('franknernest', 'comics') -frazz = comicsDotCom('frazz', 'comics') -geech = comicsDotCom('geech', 'comics') -genepool = comicsDotCom('genepool', 'wash') -getfuzzy = comicsDotCom('getfuzzy', 'comics') -gofish = comicsDotCom('gofish', 'comics') -graffiti = comicsDotCom('graffiti', 'comics') -grandave = comicsDotCom('grandave', 'comics') -grizzwells = comicsDotCom('grizzwells', 'comics') -heathcliff = comicsDotCom('heathcliff', 'creators') -hedge = comicsDotCom('hedge', 'comics') -herbnjamaal = comicsDotCom('herbnjamaal', 'creators') -herman = comicsDotCom('herman', 'comics') -humblestumble = comicsDotCom('humblestumble', 'comics') -janesworld = comicsDotCom('janesworld', 'comics') -jumpstart = comicsDotCom('jumpstart', 'comics') -kitncarlyle = comicsDotCom('kitncarlyle', 'comics') -liberty = comicsDotCom('liberty', 'creators') -lilabner = comicsDotCom('lilabner', 'comics') -luann = comicsDotCom('luann', 'comics') -marmaduke = comicsDotCom('marmaduke', 'comics') -meg = comicsDotCom('meg', 'comics') -moderatelyconfused = comicsDotCom('moderatelyconfused', 'comics') -momma = comicsDotCom('momma', 'creators') -monty = comicsDotCom('monty', 'comics') -motley = comicsDotCom('motley', 'comics') -nancy = comicsDotCom('nancy', 'comics') -naturalselection = comicsDotCom('naturalselection', 'creators') -offthemark = comicsDotCom('offthemark', 'comics') -onebighappy = comicsDotCom('onebighappy', 'creators') -othercoast = comicsDotCom('othercoast', 'creators') -pcnpixel = comicsDotCom('pcnpixel', 'wash') -peanuts = comicsDotCom('peanuts', 'comics') -pearls = comicsDotCom('pearls', 'comics') -pibgorn = comicsDotCom('pibgorn', 'comics') -pickles = comicsDotCom('pickles', 'wash') -raisingduncan = comicsDotCom('raisingduncan', 'comics') -reality = comicsDotCom('reality', 'comics') -redandrover = comicsDotCom('redandrover', 'wash') -ripleys = comicsDotCom('ripleys', 'comics') -roseisrose = comicsDotCom('roseisrose', 'comics') -rubes = comicsDotCom('rubes', 'creators') -rudypark = comicsDotCom('rudypark', 'comics') -shirleynson = comicsDotCom('shirleynson', 'comics') -soup2nutz = comicsDotCom('soup2nutz', 'comics') -speedbump = comicsDotCom('speedbump', 'creators') -spotthefrog = comicsDotCom('spotthefrog', 'comics') -strangebrew = comicsDotCom('strangebrew', 'creators') -sunshineclub = comicsDotCom('sunshineclub', 'comics') -tarzan = comicsDotCom('tarzan', 'comics') -thatslife = comicsDotCom('thatslife', 'wash') -wizardofid = comicsDotCom('wizardofid', 'creators') -workingdaze = comicsDotCom('workingdaze', 'comics') -workingitout = comicsDotCom('workingitout', 'creators') - - -def creators(name, shortname): - return type('Creators_%s' % name, - (_BasicScraper,), - dict( - name='Creators/' + name, - latestUrl='http://www.creators.com/comics_show.cfm?ComicName=%s' % (shortname,), - stripUrl=None, - imageSearch=compile(tagre("img", "src", r'(\d{4}/[^"]+/[^"]+\.[^"]+)')), - prevSearch=compile(tagre("a", "href", r'(comics_show\.cfm\?next=\d+&ComicName=[^"]+)', after='Previous Comic')), - help='Indexing unsupported') - ) - - -arc = creators('Archie', 'arc') -shg = creators('AskShagg', 'shg') -hev = creators('ForHeavensSake', 'hev') -rug = creators('Rugrats', 'rug') -sou = creators('StateOfTheUnion', 'sou') -din = creators('TheDinetteSet', 'din') -lil = creators('TheMeaningOfLila', 'lil') -wee = creators('WeePals', 'wee') -zhi = creators('ZackHill', 'zhi') - - - class CyanideAndHappiness(_BasicScraper): - latestUrl = 'http://www.explosm.net/comics' - stripUrl = latestUrl + '/%s' - imageSearch = compile(r'Cyanide and Happiness, a daily webcomic< Previous') + latestUrl = 'http://www.explosm.net/comics/' + stripUrl = latestUrl + '%s/' + imageSearch = compile(tagre("img", "src", r'(http:\/\/www\.explosm\.net/db/files/Comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev")) help = 'Index format: n (unpadded)' - class CrimsonDark(_BasicScraper): latestUrl = 'http://www.davidcsimon.com/crimsondark/' stripUrl = latestUrl + 'index.php?view=comic&strip_id=%s' @@ -383,16 +198,6 @@ class CrimsonDark(_BasicScraper): help = 'Index format: n (unpadded)' - -class CrimesOfCybeleCity(_BasicScraper): - latestUrl = 'http://www.pulledpunches.com/crimes/' - stripUrl = 'http://www.beaglespace.com/pulledpunches/crimes/?p=%s' - imageSearch = compile(r'Next>')) - stripUrl = 'http://corydoncafe.com/comic-%s.html' - imageSearch = compile(r'<Previous') - help = 'Index format: nnn' + starter = bounceStarter('http://corydoncafe.com/', compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="next", quote="'"))) + stripUrl = 'http://corydoncafe.com/%s.php' + imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'")) + prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'")) + help = 'Index format: yyyy/stripname' @classmethod def namer(cls, imageUrl, pageUrl): return pageUrl.split('/')[-1].split('.')[0] - class CraftedFables(_BasicScraper): latestUrl = 'http://www.craftedfables.com/' stripUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s' imageSearch = compile(tagre("img", "src", r'(http://www\.caf-fiends\.net/craftedfables/comics/[^"]+)')) prevSearch = compile(r'') help = 'Index format: nnn' - - - -class Currhue(_BasicScraper): - latestUrl = 'http://www.currhue.com/' - stripUrl = latestUrl + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://www\.currhue\.com/comics/[^"]+)')) - prevSearch = compile(r'