# -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2017 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function from re import compile, escape, MULTILINE try: from functools import cached_property except ImportError: from cached_property import cached_property from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, xpath_class from ..util import tagre from .common import _ComicControlScraper, _TumblrScraper, _WordPressScraper, _WPNavi, _WPNaviIn class TalesOfTheQuestor(_ParserScraper): stripUrl = 'http://www.rhjunior.com/%s/' firstStripUrl = stripUrl % 'tales-of-the-questor-cover' url = stripUrl % 'comics/tales-of-the-questor' imageSearch = '//div[contains(@class, "entry-content")]//img' prevSearch = ('//a[@rel="prev"]', '//a[@title="Tales of the Questor"]') class Tamberlane(_ParserScraper): baseUrl = 'https://www.tamberlanecomic.com/' url = baseUrl + 'latest/' stripUrl = baseUrl + 'tamberlane/%s/' firstStripUrl = stripUrl % 'page-1' imageSearch = '//div[@class="webcomic-image"]//img' prevSearch = '//a[contains(@class, "previous-webcomic-link")]' def namer(self, imageUrl, pageUrl): # Fix inconsistent filenames filename = imageUrl.rsplit('/', 1)[-1] return filename.replace('ai4zCWaA', 'Page_152') class TheBrads(_ParserScraper): url = 'http://bradcolbow.com/archive/' imageSearch = '//div[%s]//img' % xpath_class('entry') prevSearch = '//a[%s]' % xpath_class('prev') multipleImagesPerStrip = True class TheClassMenagerie(_ParserScraper): stripUrl = 'http://www.theclassm.com/d/%s.html' url = stripUrl % '20050717' firstStripUrl = stripUrl % '19990322' imageSearch = '//img[@class="ksc"]' prevSearch = '//a[@rel="prev"]' multipleImagesPerStrip = True endOfLife = True class TheDevilsPanties(_WPNavi): url = 'http://thedevilspanties.com/' stripUrl = url + 'archives/%s' firstStripUrl = stripUrl % '300' help = 'Index format: number' class TheDreamlandChronicles(_WordPressScraper): url = 'http://www.thedreamlandchronicles.com/' class TheGamerCat(_ParserScraper): url = 'https://thegamercat.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % '06102011' imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[contains(@class, "comic-nav-previous")]' help = 'Index format: stripname' class TheGentlemansArmchair(_WordPressScraper): url = 'http://thegentlemansarmchair.com/' class TheGentleWolf(_WordPressScraper): url = 'https://thegentlewolf.net/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % 'tgw-001' def namer(self, imageUrl, pageUrl): # Fix duplicate filename filename = imageUrl.rsplit('/', 1)[-1] if pageUrl == self.stripUrl % 'tgw-271': filename = filename.replace('272', '271') return filename class TheLandscaper(_BasicScraper): stripUrl = 'http://landscaper.visual-assault.net/comic/%s' url = stripUrl % 'latest' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(/comics/comic/comic_page/[^"]+)')) prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)') + '‹ Previous') help = 'Index format: name' class TheMelvinChronicles(_WordPressScraper): url = 'http://melvin.jeaniebottle.com/' class TheNoob(_WordPressScraper): url = 'http://thenoobcomic.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % '1' help = 'Index format: n (unpadded)' class TheOldVictorian(_ParserScraper): url = 'http://theoldvictorianwebcomic.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % 'the-old-victorian-cover' imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[contains(@class, "comic-nav-previous")]' def namer(self, imageUrl, pageUrl): filename = imageUrl.rsplit('/', 1)[-1].replace('_', '-') filename = filename.replace('TOV00', 'TOV-00') if filename.replace('oldvic', '')[0].isdigit(): filename = filename.replace('oldvic', 'TOV-00') if 'TOV-000' in filename and len(filename) > 12: filename = filename[:8] + '-' + filename[8:] return filename class TheOrderOfTheStick(_BasicScraper): url = 'http://www.giantitp.com/' stripUrl = url + 'comics/oots%s.html' firstStripUrl = stripUrl % '0001' imageSearch = compile(r'') prevSearch = compile(r'\n<') url = 'http://www.the-whiteboard.com/' stripUrl = url + 'auto%s.html' firstStripUrl = stripUrl % 'wb001' imageSearch = '//img[contains(@src, "auto")]' prevSearch = '//a[.//img[contains(@src, "previous")]]' def _parse_page(self, data): # Ugly hack to fix broken HTML data = self.BROKEN_PAGE_MIDDLE.sub('<', data) return super(TheWhiteboard, self)._parse_page(data) class TheWotch(_WordPressScraper): url = 'http://www.thewotch.com/' firstStripUrl = url + '?comic=enter-the-wotch' class ThisIsIndexed(_BasicScraper): url = 'http://thisisindexed.com/' rurl = escape(url) stripUrl = url + 'page/%s' imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/card[^"]+)' % rurl)) multipleImagesPerStrip = True prevSearch = compile(tagre("div", "class", "nav-previous") + tagre("a", "href", r'(%spage/\d+/)[^"]*' % rurl)) help = 'Index format: number' class ThreePanelSoul(_ComicControlScraper): url = 'http://threepanelsoul.com/' firstStripUrl = url + 'comic/a-test-comic' class ToonHole(_WordPressScraper): url = 'http://toonhole.com/' firstStripUrl = url + 'comic/toon-hole-coming-soon-2010/' def shouldSkipUrl(self, url, data): return url in (self.url + "comic/if-game-of-thrones-was-animated/",) class TracesOfThePast(_WPNaviIn): baseUrl = 'http://rickgriffinstudios.com/' url = baseUrl + 'in-the-new-age/' stripUrl = baseUrl + 'comic-post/%s/' firstStripUrl = stripUrl % 'totp-page-1' latestSearch = '//a[contains(@title, "Permanent Link")]' starter = indirectStarter class TracesOfThePastNSFW(_WPNaviIn): name = 'TracesOfThePast/NSFW' baseUrl = 'http://rickgriffinstudios.com/' url = baseUrl + 'in-the-new-age/' stripUrl = baseUrl + 'comic-post/%s/' firstStripUrl = stripUrl % 'totp-page-1-nsfw' latestSearch = '//a[contains(@title, "NSFW")]' starter = indirectStarter adult = True class TrippingOverYou(_BasicScraper): url = 'http://www.trippingoveryou.com/' stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % 'wiggle-room' imageSearch = compile(tagre("img", "src", r'([^"]+/comics/[^"]+)')) prevSearch = compile(r'