dosage/dosagelib/plugins/z.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function

from re import compile, escape

from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, xpath_class
from .common import _WPNavi


class ZapComic(_ParserScraper):
    url = 'http://www.zapcomic.com/'
    css = True
    imageSearch = 'img.comic-item'
    prevSearch = 'a.previous-comic-link'


class Zapiro(_ParserScraper):
    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
    imageSearch = '//div[@id="cartoon"]/img'
    prevSearch = '//a[%s]' % xpath_class('left')
    nextSearch = '//a[%s]' % xpath_class('right')

    def namer(self, image_url, page_url):
        parts = page_url.rsplit('/', 1)
        return parts[1]


class ZenPencils(_WPNavi):
    url = 'http://zenpencils.com/'
    multipleImagesPerStrip = True
    firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'


class ZombieHunters(_BasicScraper):
    url = 'http://www.thezombiehunters.com/'
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
    help = 'Index format: n(unpadded)'


class Zwarwald(_BasicScraper):
    url = "http://www.zwarwald.de/"
    rurl = escape(url)
    stripUrl = url + 'index.php/page/%s/'
    # anything before page 495 seems to be flash
    firstStripUrl = stripUrl % '495'
    lang = 'de'
    imageSearch = (
        compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
    )
    prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
                         tagre("img", "src",
                               r'http://zwarwald\.de/images/prev\.jpg',
                               quote="'"))
    help = 'Index format: number'

    def shouldSkipUrl(self, url, data):
        """Some pages have flash content."""
        return url in (
            self.stripUrl % "112",
            self.stripUrl % "222",
            self.stripUrl % "223",
            self.stripUrl % "246",
            self.stripUrl % "368",
            self.stripUrl % '495',
        )

    def namer(self, image_url, page_url):
        prefix, year, month, name = image_url.rsplit('/', 3)
        return "%s_%s_%s" % (year, month, name)
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`# -- coding: utf-8 --`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Random module fixes. 2017-05-21 22:30:31 +00:00			`# Copyright (C) 2015-2017 Tobias Gruetzmacher`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00
			`from __future__ import absolute_import, division, print_function`
Fix some comics. 2012-11-21 20:57:26 +00:00
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00
			`from ..scraper import _BasicScraper, _ParserScraper`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from ..util import tagre`
Move xpath_class to helpers module. 2017-02-13 21:41:17 +00:00			`from ..helpers import bounceStarter, xpath_class`
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`from .common import _WPNavi`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`class ZapComic(_ParserScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.zapcomic.com/'`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`css = True`
			`imageSearch = 'img.comic-item'`
			`prevSearch = 'a.previous-comic-link'`
Fix more comics. 2012-12-07 23:45:18 +00:00

Fix some more comic modules. 2016-05-16 21:16:29 +00:00			`class Zapiro(_ParserScraper):`
			`url = 'http://mg.co.za/zapiro/'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = bounceStarter`
Random module fixes. 2017-05-21 22:30:31 +00:00			`imageSearch = '//div[@id="cartoon"]/img'`
Fix a bunch of comic modules. 2016-10-31 05:57:47 +00:00			`prevSearch = '//a[%s]' % xpath_class('left')`
			`nextSearch = '//a[%s]' % xpath_class('right')`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
Fix some more comic modules. 2016-05-16 21:16:29 +00:00			`parts = page_url.rsplit('/', 1)`
			`return parts[1]`
Fix comics. 2012-12-04 06:02:40 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`class ZenPencils(_WPNavi):`
Added ZenPencils. 2013-04-09 17:38:47 +00:00			`url = 'http://zenpencils.com/'`
ZenPencils: Allow multiple images per page. 2015-09-03 21:24:28 +00:00			`multipleImagesPerStrip = True`
Move more comics to common WordPressScraper. 2016-04-10 21:04:34 +00:00			`firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'`
Added ZenPencils. 2013-04-09 17:38:47 +00:00

Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class ZombieHunters(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.thezombiehunters.com/'`
			`stripUrl = url + '?strip_id=%s'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Fix some comics. 2012-11-26 06:13:32 +00:00			`imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: n(unpadded)'`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00

			`class Zwarwald(_BasicScraper):`
			`url = "http://www.zwarwald.de/"`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`stripUrl = url + 'index.php/page/%s/'`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`# anything before page 495 seems to be flash`
			`firstStripUrl = stripUrl % '495'`
			`lang = 'de'`
Fix zwarwald 2013-04-10 18:14:43 +00:00			`imageSearch = (`
			`compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),`
			`compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),`
			`)`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`tagre("img", "src",`
			`r'http://zwarwald\.de/images/prev\.jpg',`
			`quote="'"))`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`help = 'Index format: number'`

Provide page data in shouldSkipUrl() function 2014-02-10 20:58:09 +00:00			`def shouldSkipUrl(self, url, data):`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`"""Some pages have flash content."""`
			`return url in (`
			`self.stripUrl % "112",`
			`self.stripUrl % "222",`
			`self.stripUrl % "223",`
			`self.stripUrl % "246",`
			`self.stripUrl % "368",`
			`self.stripUrl % '495',`
			`)`

Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
			`prefix, year, month, name = image_url.rsplit('/', 3)`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`return "%s_%s_%s" % (year, month, name)`