# -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012 Bastian Kleineidam from re import compile, IGNORECASE from ..scraper import _BasicScraper from ..helpers import bounceStarter, queryNamer, indirectStarter from ..util import tagre class PartiallyClips(_BasicScraper): latestUrl = 'http://partiallyclips.com/' stripUrl = latestUrl + '%s/' imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', before="prev")) help = 'Index format: yyyy/mm/dd/stripname' class PastelDefender(_BasicScraper): latestUrl = 'http://www.pasteldefender.com/coverbackcover.html' stripUrl = 'http://www.pasteldefender.com/%s.html' imageSearch = compile(r'First Comic)Archives/Strip.+?|(?=.+?">Previous Comic)(?!.+?">First Comic)Strip.+?)"') help = 'Index format: n (unpadded)' class PennyAndAggie(_BasicScraper): baseUrl = 'http://www.pennyandaggie.com/' stripUrl = baseUrl + 'index.php?p=%s' imageSearch = compile(tagre("a", "href", r'(http://www\.pennyandaggie\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") + tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote="")) starter = indirectStarter(baseUrl, prevSearch) help = 'Index format: n (unpadded)' class PennyArcade(_BasicScraper): baseUrl = 'http://penny-arcade.com/comic/' starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntNext")) ) stripUrl = baseUrl + '%s/' imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntPrev")) help = 'Index format: yyyy/mm/dd' @classmethod def namer(cls, imageUrl, pageUrl): yyyy, mm, dd = pageUrl.split('/')[-4:-1] return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd)) class PeppermintSaga(_BasicScraper): latestUrl = 'http://www.pepsaga.com/' stripUrl = latestUrl + '?p=%s' imageSearch = compile(tagre("img", "src", r'(http://www\.pepsaga\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.pepsaga\.com/\?p=\d+)', after="prev")) help = 'Index format: number' class Pixel(_BasicScraper): latestUrl = 'http://pixelcomic.net/' stripUrl = latestUrl + '%s.php' imageSearch = compile(tagre("img", "src", r'(\d+\.png)')) prevSearch = compile(tagre("a", "href", r'(http://pixelcomic\.net/\d+\.php)', before="prev")) help = 'Index format: nnn' class PiledHigherAndDeeper(_BasicScraper): starter = bounceStarter('http://www.phdcomics.com/comics/archive.php', compile(r'')) stripUrl = 'http://www.phdcomics.com/comics/archive.php?comicid=%s' imageSearch = compile(r'') help = 'Index format: n (unpadded)' namer = queryNamer('comicid', usePageUrl=True) class Precocious(_BasicScraper): baseUrl = 'http://www.precociouscomic.com/' starter = indirectStarter(baseUrl, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png")) ) stripUrl = baseUrl + 'archive/comic/%s' imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)')) prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png")) help = 'Index format: yyyy/mm/dd' class PvPonline(_BasicScraper): latestUrl = 'http://pvponline.com/comic' stripUrl = latestUrl + '%s' imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp\d+\.jpg)')) prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous")) help = 'Index format: yyyy/mm/dd/stripname' def pensAndTales(name, baseUrl): return type('PensAndTales_%s' % name, (_BasicScraper,), dict( name='PensAndTales/' + name, latestUrl=baseUrl, stripUrl=baseUrl + '?date=%s', imageSearch=compile(r']+?src="([^"]*?comics/.+?)"', IGNORECASE), prevSearch=compile(r'(:?]+?alt=")?Previous Comic', IGNORECASE), help='Index format: yyyymmdd') ) # XXX: using custom Wordpress layout # th = pensAndTales('TreasureHunters', 'http://th.pensandtales.com/') # XXX: comic broken, no content # strangekith = pensAndTales('Strangekith', 'http://strangekith.pensandtales.com/') # XXX: comic broken # fireflycross = pensAndTales('FireflyCross', 'http://fireflycross.pensandtales.com/') evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/') # XXX: moved / layout changed #ynt = pensAndTales('YamiNoTainai', 'http://ynt.pensandtales.com/') class ProperBarn(_BasicScraper): latestUrl = 'http://www.nitrocosm.com/go/gag/' stripUrl = latestUrl + '%s/' imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+.png)')) prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous")) help = 'Index format: nnn' class PunksAndNerds(_BasicScraper): latestUrl = 'http://www.punksandnerds.com/' stripUrl = latestUrl + '?p=%s' imageSearch = compile(tagre("img", "src", r'(http://www\.punksandnerds\.com/comics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(http://www\.punksandnerds\.com/\?p=\d+)', after="navi-prev")) help = 'Index format: nnn' class PunksAndNerdsOld(_BasicScraper): latestUrl = 'http://original.punksandnerds.com/' stripUrl = latestUrl + 'd/%s.html' imageSearch = compile(r' src="(/comics/.+?)"') prevSearch = compile(r'>]+?>]+?src="/previouscomic.gif">') help = 'Index format: yyyymmdd' class PlanescapeSurvival(_BasicScraper): latestUrl = 'http://planescapecomic.com/' stripUrl = latestUrl + '%s.html' imageSearch = compile(r'src="(comics/.+?)"') prevSearch = compile(r'