dosage/dosagelib/plugins/k.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function

from re import compile, escape, IGNORECASE

from ..scraper import _BasicScraper
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, xpath_class


class KevinAndKell(_BasicScraper):
    url = 'http://www.kevinandkell.com/'
    stripUrl = url + '%s/kk%s%s.html'
    firstStripUrl = stripUrl % ('1995', '09', '03')
    imageSearch = compile(r'<img.+?src="(/?(\d+/)?strips/kk\d+.(gif|jpg))"',
                          IGNORECASE)
    prevSearch = compile(
        r'<a.+?href="(/?(\.\./)?\d+/kk\d+\.html)"[^>]*><span>Previous Strip',
        IGNORECASE)
    help = 'Index format: yyyy-mm-dd'

    def getIndexStripUrl(self, index):
        return self.stripUrl % tuple(map(int, index.split('-')))


class Key(_BasicScraper):
    baseUrl = 'http://key.shadilyn.com/'
    url = baseUrl + 'latestpage.html'
    stripUrl = baseUrl + 'pages/%s.html'
    imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
    prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
    help = 'Index format: nnn'


class KickInTheHead(_WordPressScraper):
    url = 'http://www.kickinthehead.org/'
    firstStripUrl = url + '2003/03/20/ipod-envy/'
    prevSearch = '//a[%s]' % xpath_class('navi-prev')


class KiwiBlitz(_ComicControlScraper):
    url = 'http://www.kiwiblitz.com'


class Krakow(_BasicScraper):
    url = 'http://www.krakow.krakowstudios.com/'
    stripUrl = url + 'archive.php?date=%s'
    firstStripUrl = stripUrl % '20081111'
    imageSearch = compile(r'<img src="(comics/.+?)"')
    prevSearch = compile(
        r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
    help = 'Index format: yyyymmdd'


class Kukuburi(_BasicScraper):
    baseUrl = 'http://www.kukuburi.com/'
    url = baseUrl + 'current/'
    stripUrl = baseUrl + 'v2/%s/'
    firstStripUrl = stripUrl % '2007/08/09/one'
    imageSearch = compile(
        tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)',
              after='alt="[^"]'))
    prevSearch = compile(r'nav-previous.+?"(http.+?)"')
    help = 'Index format: yyyy/mm/dd/stripname'


class KuroShouri(_BasicScraper):
    url = 'http://kuroshouri.com/'
    rurl = escape(url)
    stripUrl = url + '?webcomic_post=%s'
    imageSearch = compile(
        tagre("img", "src",
              r"(%swp-content/webcomic/kuroshouri/[^'\"]+)" % rurl,
              quote="['\"]"))
    prevSearch = compile(
        tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,
              after="previous"))
    help = 'Index format: chapter-n-page-m'
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`# -- coding: utf-8 --`
Updated copyright for all source files. 2012-06-20 22:41:04 +02:00			`# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 16:50:57 +01:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`# Copyright (C) 2015-2016 Tobias Gruetzmacher`
Initial commit to Github. 2012-06-20 21:58:13 +02:00
Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. 2016-04-02 00:14:31 +02:00			`from __future__ import absolute_import, division, print_function`
Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-04 00:12:53 +02:00
Use re.escape and add some firstStripUrl. 2013-04-10 18:19:11 +02:00			`from re import compile, escape, IGNORECASE`
Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-04 00:12:53 +02:00
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`from ..scraper import _BasicScraper`
Fix comics. 2012-12-04 07:02:40 +01:00			`from ..util import tagre`
Move more comics to common WordPressScraper. 2016-04-10 23:04:34 +02:00			`from .common import _ComicControlScraper, _WordPressScraper, xpath_class`
Fixed some comics. 2013-04-11 18:27:43 +02:00
Initial commit to Github. 2012-06-20 21:58:13 +02:00
			`class KevinAndKell(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 21:00:26 +01:00			`url = 'http://www.kevinandkell.com/'`
			`stripUrl = url + '%s/kk%s%s.html'`
Add firstStripUrl for KevinAndKell. 2013-03-25 19:48:19 +01:00			`firstStripUrl = stripUrl % ('1995', '09', '03')`
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`imageSearch = compile(r'<img.+?src="(/?(\d+/)?strips/kk\d+.(gif\|jpg))"',`
			`IGNORECASE)`
			`prevSearch = compile(`
			`r'<a.+?href="(/?(\.\./)?\d+/kk\d+\.html)"[^>]*><span>Previous Strip',`
			`IGNORECASE)`
Initial commit to Github. 2012-06-20 21:58:13 +02:00			`help = 'Index format: yyyy-mm-dd'`

Added some comic strips and cleanup the scraper code. 2013-03-06 20:00:30 +01:00			`def getIndexStripUrl(self, index):`
			`return self.stripUrl % tuple(map(int, index.split('-')))`
Initial commit to Github. 2012-06-20 21:58:13 +02:00

Sort comics. 2013-03-06 20:21:10 +01:00			`class Key(_BasicScraper):`
s/baseurl/baseUrl/g 2013-04-13 20:58:00 +02:00			`baseUrl = 'http://key.shadilyn.com/'`
			`url = baseUrl + 'latestpage.html'`
			`stripUrl = baseUrl + 'pages/%s.html'`
Sort comics. 2013-03-06 20:21:10 +01:00			`imageSearch = compile(r'"((?:images/.+?)\|(?:pages/images/.+?))"')`
			`prevSearch = compile(r'</a><a href="(.+?html)".+?prev')`
			`help = 'Index format: nnn'`


Move more comics to common WordPressScraper. 2016-04-10 23:04:34 +02:00			`class KickInTheHead(_WordPressScraper):`
Added KickInTheHead 2013-03-12 21:16:17 +01:00			`url = 'http://www.kickinthehead.org/'`
Move more comics to common WordPressScraper. 2016-04-10 23:04:34 +02:00			`firstStripUrl = url + '2003/03/20/ipod-envy/'`
			`prevSearch = '//a[%s]' % xpath_class('navi-prev')`
Added KickInTheHead 2013-03-12 21:16:17 +01:00

Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-04 00:12:53 +02:00			`class KiwiBlitz(_ComicControlScraper):`
			`url = 'http://www.kiwiblitz.com'`


Sort comics. 2013-03-06 20:21:10 +01:00			`class Krakow(_BasicScraper):`
			`url = 'http://www.krakow.krakowstudios.com/'`
			`stripUrl = url + 'archive.php?date=%s'`
Add firstStripUrls. 2013-04-10 23:57:09 +02:00			`firstStripUrl = stripUrl % '20081111'`
Sort comics. 2013-03-06 20:21:10 +01:00			`imageSearch = compile(r'<img src="(comics/.+?)"')`
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`prevSearch = compile(`
			`r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')`
Sort comics. 2013-03-06 20:21:10 +01:00			`help = 'Index format: yyyymmdd'`


			`class Kukuburi(_BasicScraper):`
s/baseurl/baseUrl/g 2013-04-13 20:58:00 +02:00			`baseUrl = 'http://www.kukuburi.com/'`
			`url = baseUrl + 'current/'`
			`stripUrl = baseUrl + 'v2/%s/'`
Add firstStripUrls. 2013-04-10 23:57:09 +02:00			`firstStripUrl = stripUrl % '2007/08/09/one'`
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`imageSearch = compile(`
			`tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)',`
			`after='alt="[^"]'))`
Sort comics. 2013-03-06 20:21:10 +01:00			`prevSearch = compile(r'nav-previous.+?"(http.+?)"')`
			`help = 'Index format: yyyy/mm/dd/stripname'`


Various comics are fixed. 2012-12-13 21:05:27 +01:00			`class KuroShouri(_BasicScraper):`
Use re.escape and add some firstStripUrl. 2013-04-10 18:19:11 +02:00			`url = 'http://kuroshouri.com/'`
			`rurl = escape(url)`
			`stripUrl = url + '?webcomic_post=%s'`
Remove dead modules (& format). 2016-03-20 20:48:42 +01:00			`imageSearch = compile(`
			`tagre("img", "src",`
			`r"(%swp-content/webcomic/kuroshouri/[^'\"]+)" % rurl,`
			`quote="['\"]"))`
			`prevSearch = compile(`
			`tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,`
			`after="previous"))`
Use re.escape and add some firstStripUrl. 2013-04-10 18:19:11 +02:00			`help = 'Index format: chapter-n-page-m'`