dosage/dosagelib/plugins/z.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function

from re import compile, escape

from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, xpath_class
from .common import _WPNavi


class ZapComic(_ParserScraper):
    url = 'http://www.zapcomic.com/'
    css = True
    imageSearch = 'img.comic-item'
    prevSearch = 'a.previous-comic-link'


class Zapiro(_ParserScraper):
    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
    imageSearch = '//div[@id="cartoon"]/img'
    prevSearch = '//a[%s]' % xpath_class('left')
    nextSearch = '//a[%s]' % xpath_class('right')

    def namer(self, image_url, page_url):
        parts = page_url.rsplit('/', 1)
        return parts[1]


class ZenPencils(_WPNavi):
    url = 'https://zenpencils.com/'
    multipleImagesPerStrip = True
    firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
    starter = bounceStarter
    prevSearch = '//a[%s]' % xpath_class('navi-prev')
    nextSearch = '//a[%s]' % xpath_class('navi-next')


class ZombieHunters(_BasicScraper):
    url = 'http://www.thezombiehunters.com/'
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
    help = 'Index format: n(unpadded)'


class Zwarwald(_BasicScraper):
    url = "http://www.zwarwald.de/"
    rurl = escape(url)
    stripUrl = url + 'index.php/page/%s/'
    # anything before page 495 seems to be flash
    firstStripUrl = stripUrl % '495'
    lang = 'de'
    imageSearch = (
        compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
    )
    prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
                         tagre("img", "src",
                               r'http://zwarwald\.de/images/prev\.jpg',
                               quote="'"))
    help = 'Index format: number'

    def shouldSkipUrl(self, url, data):
        """Some pages have flash content."""
        return url in (
            self.stripUrl % "112",
            self.stripUrl % "222",
            self.stripUrl % "223",
            self.stripUrl % "246",
            self.stripUrl % "368",
            self.stripUrl % '495',
        )

    def namer(self, image_url, page_url):
        prefix, year, month, name = image_url.rsplit('/', 3)
        return "%s_%s_%s" % (year, month, name)
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`# -- coding: utf-8 --`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Random module fixes. 2017-05-21 22:30:31 +00:00			`# Copyright (C) 2015-2017 Tobias Gruetzmacher`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00
			`from __future__ import absolute_import, division, print_function`
Fix some comics. 2012-11-21 20:57:26 +00:00
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00
			`from ..scraper import _BasicScraper, _ParserScraper`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from ..util import tagre`
Move xpath_class to helpers module. 2017-02-13 21:41:17 +00:00			`from ..helpers import bounceStarter, xpath_class`
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`from .common import _WPNavi`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`class ZapComic(_ParserScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.zapcomic.com/'`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`css = True`
			`imageSearch = 'img.comic-item'`
			`prevSearch = 'a.previous-comic-link'`
Fix more comics. 2012-12-07 23:45:18 +00:00

Fix some more comic modules. 2016-05-16 21:16:29 +00:00			`class Zapiro(_ParserScraper):`
			`url = 'http://mg.co.za/zapiro/'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = bounceStarter`
Random module fixes. 2017-05-21 22:30:31 +00:00			`imageSearch = '//div[@id="cartoon"]/img'`
Fix a bunch of comic modules. 2016-10-31 05:57:47 +00:00			`prevSearch = '//a[%s]' % xpath_class('left')`
			`nextSearch = '//a[%s]' % xpath_class('right')`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
Fix some more comic modules. 2016-05-16 21:16:29 +00:00			`parts = page_url.rsplit('/', 1)`
			`return parts[1]`
Fix comics. 2012-12-04 06:02:40 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`class ZenPencils(_WPNavi):`
Fix multiple imgs for json flag & ZenPencils bouncer (#133) When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url. 2019-06-19 05:09:33 +00:00			`url = 'https://zenpencils.com/'`
ZenPencils: Allow multiple images per page. 2015-09-03 21:24:28 +00:00			`multipleImagesPerStrip = True`
Move more comics to common WordPressScraper. 2016-04-10 21:04:34 +00:00			`firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'`
Fix multiple imgs for json flag & ZenPencils bouncer (#133) When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url. 2019-06-19 05:09:33 +00:00			`starter = bounceStarter`
			`prevSearch = '//a[%s]' % xpath_class('navi-prev')`
			`nextSearch = '//a[%s]' % xpath_class('navi-next')`
Added ZenPencils. 2013-04-09 17:38:47 +00:00

Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class ZombieHunters(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.thezombiehunters.com/'`
			`stripUrl = url + '?strip_id=%s'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Fix some comics. 2012-11-26 06:13:32 +00:00			`imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: n(unpadded)'`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00

			`class Zwarwald(_BasicScraper):`
			`url = "http://www.zwarwald.de/"`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`stripUrl = url + 'index.php/page/%s/'`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`# anything before page 495 seems to be flash`
			`firstStripUrl = stripUrl % '495'`
			`lang = 'de'`
Fix zwarwald 2013-04-10 18:14:43 +00:00			`imageSearch = (`
			`compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),`
			`compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),`
			`)`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`tagre("img", "src",`
			`r'http://zwarwald\.de/images/prev\.jpg',`
			`quote="'"))`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`help = 'Index format: number'`

Provide page data in shouldSkipUrl() function 2014-02-10 20:58:09 +00:00			`def shouldSkipUrl(self, url, data):`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`"""Some pages have flash content."""`
			`return url in (`
			`self.stripUrl % "112",`
			`self.stripUrl % "222",`
			`self.stripUrl % "223",`
			`self.stripUrl % "246",`
			`self.stripUrl % "368",`
			`self.stripUrl % '495',`
			`)`

Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
			`prefix, year, month, name = image_url.rsplit('/', 3)`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`return "%s_%s_%s" % (year, month, name)`