dosage/dosagelib/plugins/v.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function
from re import compile

from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class
from ..util import tagre


class VGCats(_BasicScraper):
    url = 'http://www.vgcats.com/comics/'
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '0'
    imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') +
                         tagre("img", "src", r"back\.gif"))
    help = 'Index format: n (unpadded)'


class VGCatsAdventure(VGCats):
    name = 'VGCats/Adventure'
    url = 'http://www.vgcats.com/ffxi/'
    stripUrl = url + '?strip_id=%s'


class VGCatsSuper(VGCats):
    name = 'VGCats/Super'
    url = 'http://www.vgcats.com/super/'
    stripUrl = url + '?strip_id=%s'


class VictimsOfTheSystem(_BasicScraper):
    url = 'http://www.votscomic.com/'
    stripUrl = url + '?id=%s.jpg'
    firstStripUrl = stripUrl % '070103-002452'
    imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') +
                         "Previous")
    help = 'Index format: nnn-nnn'


class ViiviJaWagner(_ParserScraper):
    url = 'http://www.hs.fi/viivijawagner/'
    imageSearch = '//meta[@property="og:image"]/@content'
    prevSearch = '//a[%s]' % xpath_class('prev')
    latestSearch = '//div[%s]//a' % xpath_class('cartoon-content')
    starter = indirectStarter
    lang = 'fi'

    def namer(self, image_url, page_url):
        return page_url.rsplit('-', 1)[1].split('.')[0]


class VirmirWorld(_ParserScraper):
    url = 'http://world.virmir.com/'
    stripUrl = url + 'comic.php?story=%s&page=%s'
    firstStripUrl = stripUrl % ('1', '1')
    imageSearch = '//div[@class="comic"]//img'
    prevSearch = '//a[contains(@class, "prev")]'

    def getIndexStripUrl(self, index):
        index = index.split('-')
        return self.stripUrl % (index[0], index[1])
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`# -- coding: utf-8 --`
Fixup copyright years. 2016-10-28 22:21:41 +00:00			`# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Fix ViiviJaWagner. 2017-02-12 19:29:57 +00:00			`# Copyright (C) 2015-2017 Tobias Gruetzmacher`
Fix some comics. 2012-11-21 20:57:26 +00:00
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`from __future__ import absolute_import, division, print_function`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from re import compile`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Fix a bunch of comic modules. 2016-10-31 05:57:47 +00:00			`from ..scraper import _BasicScraper, _ParserScraper`
Move xpath_class to helpers module. 2017-02-13 21:41:17 +00:00			`from ..helpers import indirectStarter, xpath_class`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from ..util import tagre`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Fix some comics. 2012-12-02 17:35:06 +00:00			`class VGCats(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.vgcats.com/comics/'`
			`stripUrl = url + '?strip_id=%s'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '0'`
Fix some comics. 2012-12-02 17:35:06 +00:00			`imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') +`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`tagre("img", "src", r"back\.gif"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: n (unpadded)'`


Fix some comics. 2012-12-02 17:35:06 +00:00			`class VGCatsAdventure(VGCats):`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`name = 'VGCats/Adventure'`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.vgcats.com/ffxi/'`
			`stripUrl = url + '?strip_id=%s'`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`class VGCatsSuper(VGCats):`
			`name = 'VGCats/Super'`
			`url = 'http://www.vgcats.com/super/'`
			`stripUrl = url + '?strip_id=%s'`


Added comics. 2012-12-08 20:30:51 +00:00			`class VictimsOfTheSystem(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.votscomic.com/'`
			`stripUrl = url + '?id=%s.jpg'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '070103-002452'`
Added comics. 2012-12-08 20:30:51 +00:00			`imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)'))`
Sort comics alphabetically & PEP8 style fixes. 2016-03-31 21:13:54 +00:00			`prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') +`
			`"Previous")`
Added comics. 2012-12-08 20:30:51 +00:00			`help = 'Index format: nnn-nnn'`


Fix a bunch of comic modules. 2016-10-31 05:57:47 +00:00			`class ViiviJaWagner(_ParserScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.hs.fi/viivijawagner/'`
Fix ViiviJaWagner. 2017-02-12 19:29:57 +00:00			`imageSearch = '//meta[@property="og:image"]/@content'`
			`prevSearch = '//a[%s]' % xpath_class('prev')`
			`latestSearch = '//div[%s]//a' % xpath_class('cartoon-content')`
			`starter = indirectStarter`
Fix ViiviJaWagner. 2015-04-18 20:45:13 +00:00			`lang = 'fi'`
Fix more comics. 2012-12-07 23:45:18 +00:00
Refactor: Make namer a method. When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class. 2016-04-21 06:20:49 +00:00			`def namer(self, image_url, page_url):`
Fix ViiviJaWagner. 2017-02-12 19:29:57 +00:00			`return page_url.rsplit('-', 1)[1].split('.')[0]`
Add VirmirWorld 2019-07-12 07:46:45 +00:00

			`class VirmirWorld(_ParserScraper):`
			`url = 'http://world.virmir.com/'`
			`stripUrl = url + 'comic.php?story=%s&page=%s'`
			`firstStripUrl = stripUrl % ('1', '1')`
			`imageSearch = '//div[@class="comic"]//img'`
			`prevSearch = '//a[contains(@class, "prev")]'`

			`def getIndexStripUrl(self, index):`
			`index = index.split('-')`
			`return self.stripUrl % (index[0], index[1])`