# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »') help = 'Index format: n (unpadded)' textSearch = compile(tagre("img", "title", r'([^"]+)')) @classmethod def namer(cls, imageUrl, pageUrl): index = int(pageUrl.rstrip('/').split('/')[-1]) name = imageUrl.split('/')[-1].split('.')[0] return 'c%03d-%s' % (index, name) class AbsurdNotions(_BasicScraper): baseUrl = 'http://www.absurdnotions.org/' url = baseUrl + 'page129.html' stripUrl = baseUrl + 'page%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) help = 'Index format: n (unpadded)' class AcademyVale(_BasicScraper): url = 'http://www.imagerie.com/vale/' stripUrl = url + 'avarch.cgi?%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' class Achewood(_BasicScraper): url = 'http://www.achewood.com/' stripUrl = url + 'index.php?date=%s' firstStripUrl = stripUrl % '00000000' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' namer = regexNamer(compile(r'date=(\d+)')) class AfterStrife(_BasicScraper): baseUrl = 'http://afterstrife.com/' rurl = escape(baseUrl) stripUrl = baseUrl + '?p=%s' url = stripUrl % '262' firstStripUrl = stripUrl % '1' imageSearch = compile(r'

') prevSearch = compile(r'

Back') class AmazingSuperPowers(_BasicScraper): url = 'http://www.amazingsuperpowers.com/' rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2007/09/heredity' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/name' def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( # video self.stripUrl % '2013/05/orbital-deathray-kickstarter', ) class Angband(_BasicScraper): url = 'http://angband.calamarain.net/' stripUrl = url + 'view.php?date=%s' firstStripUrl = stripUrl % '2005-12-30' imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") help = 'Index format: yyyy-mm-dd' class Angels2200(_BasicScraper): url = 'http://www.janahoffmann.com/angels/' stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' class Annyseed(_BasicScraper): baseUrl = 'http://www.colourofivy.com/' rurl = escape(baseUrl) url = baseUrl + 'annyseed_webcomic_latest.htm' stripUrl = baseUrl + 'annyseed_webcomic%s.htm' imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) prevSearch = compile(r'

Previous Comic\s*

', MULTILINE) help = 'Index format: n (unpadded)' class ARedTailsDream(_BasicScraper): baseUrl = 'http://www.minnasundberg.fi/' stripUrl = baseUrl + 'comic/page%s.php' firstStripUrl = stripUrl % '00' url = baseUrl + 'comic/recent.php' imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + tagre("img", "src", r'.*?aprev.*?')) help = 'Index format: nn' class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") help = 'Index format: nnn' class ASofterWorld(_ParserScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' firstStripUrl = stripUrl % '1' imageSearch = '//div[@id="comicimg"]//img' prevSearch = '//div[@id="previous"]/a' help = 'Index format: n (unpadded)' class AstronomyPOTD(_BasicScraper): baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' url = baseUrl + 'astropix.html' starter = bounceStarter(url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + ">")) stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre("a", "href", r'(ap\d{6}\.html)') + "<") help = 'Index format: yymmdd' def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( self.stripUrl % '130217', # video self.stripUrl % '130218', # video self.stripUrl % '130226', # video self.stripUrl % '130424', # video ) @classmethod def namer(cls, imageUrl, pageUrl): return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], imageUrl.split('/')[-1].split('.')[0])