dosage/dosagelib/plugins/z.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
from re import compile, escape

from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, joinPathPartsNamer
from .common import _WPNavi


class ZapComic(_ParserScraper):
    url = 'http://www.zapcomic.com/'
    css = True
    imageSearch = 'img.comic-item'
    prevSearch = 'a.previous-comic-link'


class Zapiro(_ParserScraper):
    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
    imageSearch = '//div[@id="cartoon"]/img'
    prevSearch = '//a[d:class("left")]'
    nextSearch = '//a[d:class("right")]'
    namer = joinPathPartsNamer((-1,), ())


class ZenPencils(_WPNavi):
    url = 'https://zenpencils.com/'
    multipleImagesPerStrip = True
    firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
    starter = bounceStarter
    prevSearch = '//a[d:class("navi-prev")]'
    nextSearch = '//a[d:class("navi-next")]'


class ZombieHunters(_BasicScraper):
    url = 'http://www.thezombiehunters.com/'
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
    help = 'Index format: n(unpadded)'


class Zwarwald(_BasicScraper):
    url = "http://www.zwarwald.de/"
    rurl = escape(url)
    stripUrl = url + 'index.php/page/%s/'
    # anything before page 495 seems to be flash
    firstStripUrl = stripUrl % '495'
    lang = 'de'
    imageSearch = (
        compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),
    )
    prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +
                         tagre("img", "src",
                               r'http://zwarwald\.de/images/prev\.jpg',
                               quote="'"))
    namer = joinPathPartsNamer((), (-3, -2, -1))
    help = 'Index format: number'

    def shouldSkipUrl(self, url, data):
        """Some pages have flash content."""
        return url in (
            self.stripUrl % "112",
            self.stripUrl % "222",
            self.stripUrl % "223",
            self.stripUrl % "246",
            self.stripUrl % "368",
            self.stripUrl % '495',
        )
Update file headers The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers. 2020-04-18 11:45:44 +00:00			`# SPDX-License-Identifier: MIT`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Replace xpath_class with custom xpath function 2020-07-31 20:56:30 +00:00			`# Copyright (C) 2015-2020 Tobias Gruetzmacher`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00
			`from ..scraper import _BasicScraper, _ParserScraper`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from ..util import tagre`
Replace xpath_class with custom xpath function 2020-07-31 20:56:30 +00:00			`from ..helpers import bounceStarter, joinPathPartsNamer`
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`from .common import _WPNavi`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`class ZapComic(_ParserScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.zapcomic.com/'`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`css = True`
			`imageSearch = 'img.comic-item'`
			`prevSearch = 'a.previous-comic-link'`
Fix more comics. 2012-12-07 23:45:18 +00:00

Fix some more comic modules. 2016-05-16 21:16:29 +00:00			`class Zapiro(_ParserScraper):`
			`url = 'http://mg.co.za/zapiro/'`
Refactor: Convert starter to simple method. 2016-04-13 18:01:51 +00:00			`starter = bounceStarter`
Random module fixes. 2017-05-21 22:30:31 +00:00			`imageSearch = '//div[@id="cartoon"]/img'`
Replace xpath_class with custom xpath function 2020-07-31 20:56:30 +00:00			`prevSearch = '//a[d:class("left")]'`
			`nextSearch = '//a[d:class("right")]'`
Add new namer "joinPathPartsNamer" Additionally, switch some comics which benefit from it to the new namer. This fixes #127. 2019-06-30 18:52:15 +00:00			`namer = joinPathPartsNamer((-1,), ())`
Fix comics. 2012-12-04 06:02:40 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Unify more WordPress-based modules. 2017-05-21 23:17:05 +00:00			`class ZenPencils(_WPNavi):`
Fix multiple imgs for json flag & ZenPencils bouncer (#133) When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url. 2019-06-19 05:09:33 +00:00			`url = 'https://zenpencils.com/'`
ZenPencils: Allow multiple images per page. 2015-09-03 21:24:28 +00:00			`multipleImagesPerStrip = True`
Move more comics to common WordPressScraper. 2016-04-10 21:04:34 +00:00			`firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'`
Fix multiple imgs for json flag & ZenPencils bouncer (#133) When using the JSON output flag, if the page has more than one image, dictionary indexing cannot be used as list. For the ZenPencils comic, the bouncer is missing, saving the page url as the root url. 2019-06-19 05:09:33 +00:00			`starter = bounceStarter`
Replace xpath_class with custom xpath function 2020-07-31 20:56:30 +00:00			`prevSearch = '//a[d:class("navi-prev")]'`
			`nextSearch = '//a[d:class("navi-next")]'`
Added ZenPencils. 2013-04-09 17:38:47 +00:00

Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class ZombieHunters(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.thezombiehunters.com/'`
			`stripUrl = url + '?strip_id=%s'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Fix some comics. 2012-11-26 06:13:32 +00:00			`imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: n(unpadded)'`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00

			`class Zwarwald(_BasicScraper):`
			`url = "http://www.zwarwald.de/"`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`stripUrl = url + 'index.php/page/%s/'`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`# anything before page 495 seems to be flash`
			`firstStripUrl = stripUrl % '495'`
			`lang = 'de'`
Fix zwarwald 2013-04-10 18:14:43 +00:00			`imageSearch = (`
			`compile(tagre("img", "src", r'(%simages/\d+/\d+/[^"]+)' % rurl)),`
			`compile(tagre("img", "src", r'(http://wp1163540\.wp190\.webpack\.hosteurope\.de/wordpress/images/\d+/\d+/[^"]+)')),`
			`)`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%sindex\.php/page/\d+/)' % rurl) +`
Fix ZapComics, remove ZebraGirl. - ZebraGirl is now ComicFury/ZebraGirl... 2016-04-03 22:23:47 +00:00			`tagre("img", "src",`
			`r'http://zwarwald\.de/images/prev\.jpg',`
			`quote="'"))`
Add new namer "joinPathPartsNamer" Additionally, switch some comics which benefit from it to the new namer. This fixes #127. 2019-06-30 18:52:15 +00:00			`namer = joinPathPartsNamer((), (-3, -2, -1))`
Added Zwarwald and AhoiPolloi 2013-03-07 22:51:55 +00:00			`help = 'Index format: number'`

Provide page data in shouldSkipUrl() function 2014-02-10 20:58:09 +00:00			`def shouldSkipUrl(self, url, data):`
Fix some comics and add language tag. 2013-03-08 21:33:05 +00:00			`"""Some pages have flash content."""`
			`return url in (`
			`self.stripUrl % "112",`
			`self.stripUrl % "222",`
			`self.stripUrl % "223",`
			`self.stripUrl % "246",`
			`self.stripUrl % "368",`
			`self.stripUrl % '495',`
			`)`