# -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2013 Bastian Kleineidam from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper from ..helpers import regexNamer, bounceStarter, indirectStarter class AbleAndBaker(_BasicScraper): description = u"Able and Baker: Hatin' and Dictatin'" url = 'http://www.jimburgessdesign.com/comics/index.php' stripUrl = url + '?comic=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(comics/.+)')) prevSearch = compile(tagre('a', 'href', r'(.+\d+)') + '.+?previous.gif') help = 'Index format: nnn' class AbsurdNotions(_BasicScraper): baseUrl = 'http://www.absurdnotions.org/' url = baseUrl + 'page129.html' stripUrl = baseUrl + 'page%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) help = 'Index format: n (unpadded)' class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(%sstrips/[^<>"]+)' % rurl)) prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') help = 'Index format: n (unpadded)' @classmethod def namer(cls, imageUrl, pageUrl): index = int(pageUrl.rstrip('/').split('/')[-1]) name = imageUrl.split('/')[-1].split('.')[0] return 'c%03d-%s' % (index, name) class AcademyVale(_BasicScraper): description = u'Academy Vale' url = 'http://www.imagerie.com/vale/' stripUrl = url + 'avarch.cgi?%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' class Achewood(_BasicScraper): url = 'http://www.achewood.com/' stripUrl = url + 'index.php?date=%s' firstStripUrl = stripUrl % '00000000' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' namer = regexNamer(compile(r'date=(\d+)')) class AfterStrife(_BasicScraper): baseUrl = 'http://afterstrife.com/' rurl = escape(baseUrl) stripUrl = baseUrl + '?p=%s' url = stripUrl % '262' firstStripUrl = stripUrl % '1' imageSearch = compile(r'

') prevSearch = compile(r'

Previous Comic\s*

', MULTILINE) help = 'Index format: n (unpadded)' class ARedTailsDream(_BasicScraper): description = u"Finnish mythology meets teen boy and his dog" baseUrl = 'http://www.minnasundberg.fi/' stripUrl = baseUrl + 'comic/page%s.php' firstStripUrl = stripUrl % '00' url = baseUrl + 'comic/recent.php' imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + tagre("img", "src", r'.*?aprev.*?')) help = 'Index format: nn' class ASofterWorld(_BasicScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("p", "id", "thecomic") + r'\s*' + tagre("img", "src", r'(http://www\.asofterworld\.com/clean/[^"]+)')) prevSearch = compile(tagre("a", "href", "(index\.php\?id=\d+)")+'< back') help = 'Index format: n (unpadded)' class AstronomyPOTD(_BasicScraper): description = u'A different astronomy and space science related image is featured each day, along with a brief explanation.' baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' url = baseUrl + 'astropix.html' starter = bounceStarter(url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + ">")) stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "<") help = 'Index format: yymmdd' def shouldSkipUrl(self, url): """Skip pages without images.""" return url in ( self.stripUrl % '130217', # video self.stripUrl % '130218', # video self.stripUrl % '130226', # video self.stripUrl % '130424', # video ) @classmethod def namer(cls, imageUrl, pageUrl): return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], imageUrl.split('/')[-1].split('.')[0]) class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") help = 'Index format: nnn' class AxeCop(_BasicScraper): description = u'Axe Cop' url = 'http://axecop.com/' rurl = escape(url) starter = bounceStarter(url, compile(tagre("a", "href", r'(%scomic/episode-\d+/)' % rurl, after="navi-next"))) stripUrl = url + 'comic/episode-%s/' firstStripUrl = stripUrl % '0' imageSearch = compile(tagre("img", "src", r'(http://mainsite\.axecop\.wpengine\.com/wp-content/uploads/sites/\d+/\d+/\d+/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(%scomic/episode-\d+/)' % rurl, after="navi-prev")) help = 'Index format: number' @classmethod def namer(cls, imageUrl, pageUrl): extension = imageUrl.rsplit('.', 1)[1] episode = pageUrl.rsplit('-', 1)[1] episode = episode.strip('/') return 'AxeCop-%s.%s' % (episode, extension)