dosage/dosagelib/plugins/z.py
Gervásio Júnior 6c8814fe40 Fix multiple imgs for json flag & ZenPencils bouncer (#133)
When using the JSON output flag, if the page has more than one image,
dictionary indexing cannot be used as list.

For the ZenPencils comic, the bouncer is missing, saving the page url
as the root url.
2019-06-19 07:09:33 +02:00

84 lines
2.8 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, xpath_class
from .common import _WPNavi
class ZapComic(_ParserScraper):
url = 'http://www.zapcomic.com/'
css = True
imageSearch = 'img.comic-item'
prevSearch = 'a.previous-comic-link'
class Zapiro(_ParserScraper):
url = 'http://mg.co.za/zapiro/'
starter = bounceStarter
imageSearch = '//div[@id="cartoon"]/img'
prevSearch = '//a[%s]' % xpath_class('left')
nextSearch = '//a[%s]' % xpath_class('right')
def namer(self, image_url, page_url):
parts = page_url.rsplit('/', 1)
return parts[1]
class ZenPencils(_WPNavi):
url = 'https://zenpencils.com/'
multipleImagesPerStrip = True
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
starter = bounceStarter
prevSearch = '//a[%s]' % xpath_class('navi-prev')
nextSearch = '//a[%s]' % xpath_class('navi-next')
class ZombieHunters(_BasicScraper):
url = 'http://www.thezombiehunters.com/'
stripUrl = url + '?strip_id=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
help = 'Index format: n(unpadded)'
class Zwarwald(_BasicScraper):
url = "http://www.zwarwald.de/"
rurl = escape(url)
stripUrl = url + 'index.php/page/%s/'
# anything before page 495 seems to be flash
firstStripUrl = stripUrl % '495'
lang = 'de'
imageSearch = (
compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
)
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
tagre("img", "src",
r'http://zwarwald\.de/images/prev\.jpg',
quote="'"))
help = 'Index format: number'
def shouldSkipUrl(self, url, data):
"""Some pages have flash content."""
return url in (
self.stripUrl % "112",
self.stripUrl % "222",
self.stripUrl % "223",
self.stripUrl % "246",
self.stripUrl % "368",
self.stripUrl % '495',
)
def namer(self, image_url, page_url):
prefix, year, month, name = image_url.rsplit('/', 3)
return "%s_%s_%s" % (year, month, name)