2016-04-03 22:23:47 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2012-06-20 20:41:04 +00:00
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2014-01-05 15:50:57 +00:00
|
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
2016-04-03 22:23:47 +00:00
|
|
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
|
|
|
|
|
|
|
from __future__ import absolute_import, division, print_function
|
2012-11-21 20:57:26 +00:00
|
|
|
|
2013-04-10 16:19:11 +00:00
|
|
|
from re import compile, escape
|
2016-04-03 22:23:47 +00:00
|
|
|
|
|
|
|
from ..scraper import _BasicScraper, _ParserScraper
|
2012-11-26 06:13:32 +00:00
|
|
|
from ..util import tagre
|
|
|
|
from ..helpers import bounceStarter
|
2016-04-10 21:04:34 +00:00
|
|
|
from .common import _WordPressScraper, xpath_class
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
|
2016-04-03 22:23:47 +00:00
|
|
|
class ZapComic(_ParserScraper):
|
2013-02-04 20:00:26 +00:00
|
|
|
url = 'http://www.zapcomic.com/'
|
2016-04-03 22:23:47 +00:00
|
|
|
css = True
|
|
|
|
imageSearch = 'img.comic-item'
|
|
|
|
prevSearch = 'a.previous-comic-link'
|
2012-12-07 23:45:18 +00:00
|
|
|
|
|
|
|
|
2016-05-16 21:16:29 +00:00
|
|
|
class Zapiro(_ParserScraper):
|
|
|
|
url = 'http://mg.co.za/zapiro/'
|
2016-04-13 18:01:51 +00:00
|
|
|
starter = bounceStarter
|
2016-05-16 21:16:29 +00:00
|
|
|
imageSearch = '//div[@id="cartoon_full_size"]//img'
|
|
|
|
prevSearch = '//li[@class="nav_older"]/a'
|
|
|
|
nextSearch = '//li[@class="nav_newer"]/a'
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2016-04-21 06:20:49 +00:00
|
|
|
def namer(self, image_url, page_url):
|
2016-05-16 21:16:29 +00:00
|
|
|
parts = page_url.rsplit('/', 1)
|
|
|
|
return parts[1]
|
2012-12-04 06:02:40 +00:00
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2016-04-10 21:04:34 +00:00
|
|
|
class ZenPencils(_WordPressScraper):
|
2013-04-09 17:38:47 +00:00
|
|
|
url = 'http://zenpencils.com/'
|
2015-09-03 21:24:28 +00:00
|
|
|
multipleImagesPerStrip = True
|
2016-04-10 21:04:34 +00:00
|
|
|
firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
|
|
|
|
prevSearch = '//a[%s]' % xpath_class('navi-prev')
|
2013-04-09 17:38:47 +00:00
|
|
|
|
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
class ZombieHunters(_BasicScraper):
|
2013-02-04 20:00:26 +00:00
|
|
|
url = 'http://www.thezombiehunters.com/'
|
|
|
|
stripUrl = url + '?strip_id=%s'
|
2013-04-10 21:57:09 +00:00
|
|
|
firstStripUrl = stripUrl % '1'
|
2012-11-26 06:13:32 +00:00
|
|
|
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
|
|
|
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
|
2012-06-20 19:58:13 +00:00
|
|
|
help = 'Index format: n(unpadded)'
|
2013-03-07 22:51:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Zwarwald(_BasicScraper):
|
|
|
|
url = "http://www.zwarwald.de/"
|
2013-04-10 16:19:11 +00:00
|
|
|
rurl = escape(url)
|
2013-03-07 22:51:55 +00:00
|
|
|
stripUrl = url + 'index.php/page/%s/'
|
2013-03-08 21:33:05 +00:00
|
|
|
# anything before page 495 seems to be flash
|
|
|
|
firstStripUrl = stripUrl % '495'
|
|
|
|
lang = 'de'
|
2013-04-10 18:14:43 +00:00
|
|
|
imageSearch = (
|
|
|
|
compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
|
|
|
|
compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
|
|
|
|
)
|
2013-04-10 16:19:11 +00:00
|
|
|
prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
|
2016-04-03 22:23:47 +00:00
|
|
|
tagre("img", "src",
|
|
|
|
r'http://zwarwald\.de/images/prev\.jpg',
|
|
|
|
quote="'"))
|
2013-03-07 22:51:55 +00:00
|
|
|
help = 'Index format: number'
|
|
|
|
|
2014-02-10 20:58:09 +00:00
|
|
|
def shouldSkipUrl(self, url, data):
|
2013-03-08 21:33:05 +00:00
|
|
|
"""Some pages have flash content."""
|
|
|
|
return url in (
|
|
|
|
self.stripUrl % "112",
|
|
|
|
self.stripUrl % "222",
|
|
|
|
self.stripUrl % "223",
|
|
|
|
self.stripUrl % "246",
|
|
|
|
self.stripUrl % "368",
|
|
|
|
self.stripUrl % '495',
|
|
|
|
)
|
|
|
|
|
2016-04-21 06:20:49 +00:00
|
|
|
def namer(self, image_url, page_url):
|
|
|
|
prefix, year, month, name = image_url.rsplit('/', 3)
|
2013-03-08 21:33:05 +00:00
|
|
|
return "%s_%s_%s" % (year, month, name)
|