6c8814fe40
When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url.
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
|
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
from re import compile, escape
|
|
|
|
from ..scraper import _BasicScraper, _ParserScraper
|
|
from ..util import tagre
|
|
from ..helpers import bounceStarter, xpath_class
|
|
from .common import _WPNavi
|
|
|
|
|
|
class ZapComic(_ParserScraper):
|
|
url = 'http://www.zapcomic.com/'
|
|
css = True
|
|
imageSearch = 'img.comic-item'
|
|
prevSearch = 'a.previous-comic-link'
|
|
|
|
|
|
class Zapiro(_ParserScraper):
|
|
url = 'http://mg.co.za/zapiro/'
|
|
starter = bounceStarter
|
|
imageSearch = '//div[@id="cartoon"]/img'
|
|
prevSearch = '//a[%s]' % xpath_class('left')
|
|
nextSearch = '//a[%s]' % xpath_class('right')
|
|
|
|
def namer(self, image_url, page_url):
|
|
parts = page_url.rsplit('/', 1)
|
|
return parts[1]
|
|
|
|
|
|
class ZenPencils(_WPNavi):
|
|
url = 'https://zenpencils.com/'
|
|
multipleImagesPerStrip = True
|
|
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
|
starter = bounceStarter
|
|
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
|
nextSearch = '//a[%s]' % xpath_class('navi-next')
|
|
|
|
|
|
class ZombieHunters(_BasicScraper):
|
|
url = 'http://www.thezombiehunters.com/'
|
|
stripUrl = url + '?strip_id=%s'
|
|
firstStripUrl = stripUrl % '1'
|
|
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
|
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
|
|
help = 'Index format: n(unpadded)'
|
|
|
|
|
|
class Zwarwald(_BasicScraper):
|
|
url = "http://www.zwarwald.de/"
|
|
rurl = escape(url)
|
|
stripUrl = url + 'index.php/page/%s/'
|
|
# anything before page 495 seems to be flash
|
|
firstStripUrl = stripUrl % '495'
|
|
lang = 'de'
|
|
imageSearch = (
|
|
compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
|
|
compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
|
|
)
|
|
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
|
|
tagre("img", "src",
|
|
r'http://zwarwald\.de/images/prev\.jpg',
|
|
quote="'"))
|
|
help = 'Index format: number'
|
|
|
|
def shouldSkipUrl(self, url, data):
|
|
"""Some pages have flash content."""
|
|
return url in (
|
|
self.stripUrl % "112",
|
|
self.stripUrl % "222",
|
|
self.stripUrl % "223",
|
|
self.stripUrl % "246",
|
|
self.stripUrl % "368",
|
|
self.stripUrl % '495',
|
|
)
|
|
|
|
def namer(self, image_url, page_url):
|
|
prefix, year, month, name = image_url.rsplit('/', 3)
|
|
return "%s_%s_%s" % (year, month, name)
|