# SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper, ParserScraper from ..helpers import bounceStarter, queryNamer, indirectStarter from ..util import tagre from .common import ComicControlScraper, WordPressScraper, WordPressNavi class PandyLand(_ParserScraper): url = ('https://web.archive.org/web/20200122163307/' 'http:/pandyland.net/') stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = '//div[d:class("comic")]/img' prevSearch = '//a[contains(text(), "previous")]' help = 'Index format: number' endOfLife = True class ParadigmShift(_BasicScraper): url = 'http://www.paradigmshiftmanga.com/' starter = indirectStarter stripUrl = url + 'ps/%s.html' imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="previous-comic-link")) latestSearch = compile(tagre("a", "href", r'([^"]+)', after="next-comic-link")) help = 'Index format: custom' class ParallelUniversum(WordPressScraper): url = 'https://www.paralleluniversum.net/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '001-der-comic-ist-tot' prevSearch = '//a[@rel="prev"]' lang = 'de' class ParaNatural(ComicControlScraper): url = 'https://www.paranatural.net/' firstStripUrl = url + 'comic/chapter-1' class PartiallyClips(WordPressScraper): url = ('https://web.archive.org/web/20180509161332/' 'http://partiallyclips.com/') firstStripUrl = url + 'comic/screaming-woman/' endOfLife = True class PastelDefender(_BasicScraper): baseUrl = 'http://www.pasteldefender.com/' url = baseUrl + 'coverbackcover.html' stripUrl = baseUrl + '%s.html' firstStripUrl = stripUrl % 'cover' imageSearch = compile(r'') baseUrl = 'http://phdcomics.com/' url = baseUrl + 'comics.php' stripUrl = baseUrl + 'comics/archive.php?comicid=%s' firstStripUrl = stripUrl % '1' imageSearch = ('//img[@id="comic2"]', r'//img[d:class("img-responsive") and re:test(@name, "comic\d+")]') multipleImagesPerStrip = True prevSearch = '//a[img[contains(@src, "prev_button")]]' nextSearch = '//a[img[contains(@src, "next_button")]]' help = 'Index format: n (unpadded)' # Ugly hack :( def _parse_page(self, data): data = self.BROKEN_COMMENT_END.sub('-->', data) return super(PHDComics, self)._parse_page(data) def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( # video self.stripUrl % '1880', self.stripUrl % '1669', ) or data.xpath('//img[@id="comic" and contains(@src, "phd083123s")]') class Picklewhistle(ComicControlScraper): url = 'http://www.picklewhistle.com/' class PicPakDog(WordPressScraper): url = 'http://www.picpak.net/' firstStripUrl = url + 'comic/dogs-cant-spell/' # Keep, because naming is different to PHDComics... class PiledHigherAndDeeper(PHDComics): starter = bounceStarter namer = queryNamer('comicid', use_page_url=True) class Pixel(_BasicScraper): url = 'http://pixelcomic.net/' rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '000.shtml' imageSearch = compile(tagre("img", "src", r'(\d+\.png)')) prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl, before="prev")) help = 'Index format: nnn' class PlanescapeSurvival(_BasicScraper): url = 'http://planescapecomic.com/' stripUrl = url + '%s.html' imageSearch = compile(r'src="(comics/.+?)"') prevSearch = compile(r'Previous= 9 and filename[8].isdigit(): filename = filename[:8] + '-' + filename[8:] return filename class Prototype(_ParserScraper): stripUrl = 'https://web.archive.org/web/20201030035444/http://planetprototype.com/%s/' firstStripUrl = stripUrl % '2018/03/30/vol-1-ch-1-front-cover' url = firstStripUrl imageSearch = '//img[contains(@class, "wp-post-image")]' prevSearch = '//a[.//text()="Previous"]' latestSearch = '//a[.//text()="Latest"]' starter = indirectStarter endOfLife = True class PS238(_ParserScraper): url = 'http://ps238.nodwick.com/' stripUrl = url + 'comic/%s/' firstStripUrl = stripUrl % '12072006' imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]' help = 'Index format: yyyy-mm-dd' class PvPOnline(ParserScraper): baseUrl = 'https://www.toonhoundstudios.com/' stripUrl = baseUrl + 'comic/%s/?sid=372' url = stripUrl % 'pvp-2022-09-16' firstStripUrl = stripUrl % '19980504' imageSearch = '//div[@id="spliced-comic"]//img/@data-src-img' prevSearch = '//a[d:class("prev")]' endOfLife = True def namer(self, image_url, page_url): return 'pvp' + image_url.rsplit('/', 1)[-1]