dosage/dosagelib/plugins/x.py

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam

from re import compile

from ..scraper import _BasicScraper
from ..helpers import bounceStarter
from ..util import tagre


class Xkcd(_BasicScraper):
    name = 'xkcd'
    url = 'http://xkcd.com/'
    starter = bounceStarter(url, compile(tagre("a", "href", r'(/\d+/)',
                                               before="next")))
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src",
                                r'(//imgs\.xkcd\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
    help = 'Index format: n (unpadded)'
    textSearch = compile(tagre("img", "title", r'([^"]+)',
                               before=r'//imgs\.xkcd\.com/comics/'))

    @classmethod
    def namer(cls, image_url, page_url):
        index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
        name = image_url.rsplit('/', 1)[-1].split('.')[0]
        return '%03d-%s' % (index, name)

    @classmethod
    def imageUrlModifier(cls, url, data):
        if url and '/large/' in data:
            return url.replace(".png", "_large.png")
        return url

    def shouldSkipUrl(self, url, data):
        return url in (
            self.stripUrl % '1663',  # Garden
        )
Updated copyright for all source files. 2012-06-20 20:41:04 +00:00			`# -- coding: iso-8859-1 --`
			`# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Fix some comics. 2012-11-21 20:57:26 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`from re import compile`

A lot of refactoring. 2012-10-11 10:03:12 +00:00			`from ..scraper import _BasicScraper`
			`from ..helpers import bounceStarter`
Fix some comics. 2012-11-26 06:13:32 +00:00			`from ..util import tagre`
A lot of refactoring. 2012-10-11 10:03:12 +00:00
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00			`class Xkcd(_BasicScraper):`
			`name = 'xkcd'`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://xkcd.com/'`
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00			`starter = bounceStarter(url, compile(tagre("a", "href", r'(/\d+/)',`
			`before="next")))`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`stripUrl = url + '%s/'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00			`imageSearch = compile(tagre("img", "src",`
			`r'(//imgs\.xkcd\.com/comics/[^"]+)'))`
Fix some comics. 2012-11-26 06:13:32 +00:00			`prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: n (unpadded)'`
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00			`textSearch = compile(tagre("img", "title", r'([^"]+)',`
			`before=r'//imgs\.xkcd\.com/comics/'))`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
			`@classmethod`
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00			`def namer(cls, image_url, page_url):`
			`index = int(page_url.rstrip('/').rsplit('/', 1)[-1])`
			`name = image_url.rsplit('/', 1)[-1].split('.')[0]`
Fix some comics. 2012-11-26 06:13:32 +00:00			`return '%03d-%s' % (index, name)`
Store large xkcd images. 2013-12-04 16:56:54 +00:00
			`@classmethod`
			`def imageUrlModifier(cls, url, data):`
			`if url and '/large/' in data:`
			`return url.replace(".png", "_large.png")`
			`return url`
Skip non-image on xkcd. 2016-04-05 22:47:47 +00:00
			`def shouldSkipUrl(self, url, data):`
			`return url in (`
			`self.stripUrl % '1663', # Garden`
			`)`