# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) starter = bounceStarter( url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »")) stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »') help = 'Index format: n (unpadded)' textSearch = compile(tagre("img", "title", r'([^"]+)')) @classmethod def namer(cls, image_url, page_url): index = int(page_url.rstrip('/').split('/')[-1]) name = image_url.split('/')[-1].split('.')[0] return 'c%03d-%s' % (index, name) class AbsurdNotions(_BasicScraper): baseUrl = 'http://www.absurdnotions.org/' url = baseUrl + 'page129.html' stripUrl = baseUrl + 'page%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) help = 'Index format: n (unpadded)' class AcademyVale(_BasicScraper): url = 'http://www.imagerie.com/vale/' stripUrl = url + 'avarch.cgi?%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' class Achewood(_BasicScraper): url = 'http://www.achewood.com/' stripUrl = url + 'index.php?date=%s' firstStripUrl = stripUrl % '00000000' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) help = 'Index format: mmddyyyy' namer = regexNamer(compile(r'date=(\d+)')) class AfterStrife(_WordPressScraper): baseUrl = 'http://afterstrife.com/' stripUrl = baseUrl + '?p=%s' url = stripUrl % '262' firstStripUrl = stripUrl % '1' prevSearch = '//a[%s]' % xpath_class('navi-prev') help = 'Index format: nnn' endOfLife = True class AGirlAndHerFed(_BasicScraper): url = 'http://www.agirlandherfed.com/' starter = bounceStarter(url, compile(r'[^>]+Back')) stripUrl = url + '1.%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) prevSearch = compile(r'[^>]+Back') help = 'Index format: nnn' class AhoiPolloi(_ParserScraper): url = 'https://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' firstStripUrl = stripUrl % '20060306' multipleImagesPerStrip = True lang = 'de' imageSearch = '//img[contains(@src, "/static/antville/ahoipolloi/")]' prevSearch = '//a[contains(@href, "/?day=")]' help = 'Index format: yyyymmdd' class AhoyEarth(_WordPressScraper): url = 'http://www.ahoyearth.com/' prevSearch = '//a[%s]' % xpath_class('navi-prev') class AirForceBlues(_WordPressScraper): url = 'http://farvatoons.com/' firstStripUrl = url + 'comic/in-texas-there-are-texans/' class ALessonIsLearned(_BasicScraper): url = 'http://www.alessonislearned.com/' prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'") + r"[^>]+previous") starter = indirectStarter(url, prevSearch) stripUrl = url + 'index.php?comic=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)")) help = 'Index format: nnn' class Alice(_WordPressScraper): url = 'http://www.alicecomics.com/' prevSearch = '//a[%s]' % xpath_class('navi-prev-in') starter = indirectStarter('http://www.alicecomics.com/', '//a[text()="Latest Alice!"]') class AlienLovesPredator(_BasicScraper): url = 'http://alienlovespredator.com/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay' imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/name' class AlienShores(_WordPressScraper): url = 'http://alienshores.com/alienshores_band/' firstStripUrl = url + 'AScomic/updated-cover/' class AllTheGrowingThings(_BasicScraper): url = 'http://growingthings.typodmary.com/' rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things' imageSearch = compile(tagre("img", "src", r'(%sfiles/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/dd/strip-name' class AlphaLuna(_BasicScraper): url = 'http://www.alphaluna.net/' stripUrl = url + 'issue-%s/' firstStripUrl = stripUrl % '1/cover' imageSearch = compile(tagre("a", "href", r'[^"]*/(?:issue-|support/upcoming)[^"]+') + tagre("img", "src", r'([^"]*/PAGINAS/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "alt", "Prev")) help = 'Index format: issue/page (e.g. 4/05)' class AlphaLunaSpanish(AlphaLuna): name = 'AlphaLuna/Spanish' lang = 'es' url = 'http://alphaluna.net/spanish/' stripUrl = url + 'issue-%s/' firstStripUrl = stripUrl % '1/portada' class Altermeta(_BasicScraper): url = 'http://altermeta.net/' rurl = escape(url) stripUrl = url + 'archive.php?comic=%s' firstStripUrl = stripUrl % '0' imageSearch = compile(r'') prevSearch = compile(r'Back') class AmazingSuperPowers(_BasicScraper): url = 'http://www.amazingsuperpowers.com/' rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2007/09/heredity' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/name' def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( # video self.stripUrl % '2013/05/orbital-deathray-kickstarter', ) class Amya(_WordPressScraper): url = 'http://www.amyachronicles.com/' class Angband(_BasicScraper): url = 'http://angband.calamarain.net/' stripUrl = url + 'view.php?date=%s' firstStripUrl = stripUrl % '2005-12-30' imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') + "Previous") help = 'Index format: yyyy-mm-dd' class Angels2200(_BasicScraper): url = 'http://www.janahoffmann.com/angels/' stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' class Annyseed(_ParserScraper): baseUrl = 'http://www.mirrorwoodcomics.com/' url = baseUrl + 'AnnyseedLatest.htm' stripUrl = baseUrl + 'Annyseed%s.htm' imageSearch = '//div/img[contains(@src, "Annyseed")]' prevSearch = '//a[img[@name="Previousbtn"]]' help = 'Index format: nnn' class AoiHouse(_ParserScraper): url = 'http://www.aoihouse.net/' imageSearch = '//div[@id="comic"]/a[2]/img' prevSearch = '//a[@id="cndprev"]' class AppleGeeks(_BasicScraper): url = 'http://www.applegeeks.com/' stripUrl = url + 'comics/viewcomic.php?issue=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'((?:/comics/)?issue\d+\.jpg)')) prevSearch = compile(r'
Previous Comic
\s*

', MULTILINE) help = 'Index format: n (unpadded)' class ARedTailsDream(_BasicScraper): baseUrl = 'http://www.minnasundberg.fi/' stripUrl = baseUrl + 'comic/page%s.php' firstStripUrl = stripUrl % '00' url = baseUrl + 'comic/recent.php' imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + tagre("img", "src", r'.*?aprev.*?')) help = 'Index format: nn' class Ashes(_WordPressScraper): url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/' firstStripUrl = url starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) prevSearch = compile(tagre("a", "href", "(/comic/\d+)") + r"[^>]+Previous") help = 'Index format: nnn' class ASofterWorld(_ParserScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' firstStripUrl = stripUrl % '1' imageSearch = '//div[@id="comicimg"]//img' prevSearch = '//div[@id="previous"]/a' help = 'Index format: n (unpadded)' class AstronomyPOTD(_ParserScraper): baseUrl = 'http://apod.nasa.gov/apod/' url = baseUrl + 'astropix.html' starter = bounceStarter(url, '//a[text()=">"]') stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' imageSearch = '//a/img' multipleImagesPerStrip = True prevSearch = '//a[text()="<"]' help = 'Index format: yymmdd' def shouldSkipUrl(self, url, data): """Skip pages without images.""" return data.xpath('//iframe') # videos @classmethod def namer(cls, image_url, page_url): return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:], image_url.split('/')[-1].split('.')[0]) class AxeCop(_WordPressScraper): url = 'http://axecop.com/comic/season-two/'