dosage/dosagelib/plugins/l.py

# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher

from __future__ import absolute_import, division, print_function

from re import compile, escape

from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH


class Lackadaisy(_BasicScraper):
    baseUrl = 'http://lackadaisy.foxprints.com/'
    url = baseUrl + 'comic.php'
    stripUrl = baseUrl + 'comic.php?comicid=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))
    prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +
                         "&lt; Previous")
    help = 'Index format: n'
    starter = bounceStarter(
        url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
                     "Next"))

    @classmethod
    def namer(cls, imageUrl, pageUrl):
        """Use comic id for filename."""
        num = pageUrl.rsplit('=', 1)[-1]
        ext = imageUrl.rsplit('.', 1)[-1]
        return 'lackadaisy_%s.%s' % (num, ext)


class Laiyu(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
    firstStripUrl = url
    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)


class LasLindas(_BasicScraper):
    url = 'http://laslindas.katbox.net/'
    rurl = escape(url)
    stripUrl = url + 'comic/%s/'
    imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="attachment-full"))
    multipleImagesPerStrip = True
    prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))
    help = 'Index format: stripname'


class LastNerdsOnEarth(_ParserScraper):
    url = 'http://www.lastnerdsonearth.com/latest/'
    imageSearch = '//div[@id="content"]/a/img'
    prevSearch = '//div[@id="comicnav"]/a[last()-2]'


class LeastICouldDo(_BasicScraper):
    url = 'http://www.leasticoulddo.com/'
    rurl = escape(url)
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % '20130109'
    imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
                               after="Previous"))
    starter = indirectStarter(
        url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
                           after="feature-comic")))
    help = 'Index format: yyyymmdd'


class LetsSpeakEnglish(_ComicControlScraper):
    url = 'http://www.marycagle.com'


class Lint(_BasicScraper):
    url = 'http://www.purnicellin.com/lint/'
    rurl = escape(url)
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2004/01/10/01102004'
    imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
    prevSearch = compile(r'\| <a href="([^"]+)" rel="prev">')
    help = 'Index format: yyyy/mm/dd/num-name'


class LinuxComFridayFunnies(_BasicScraper):
    url = 'https://www.linux.com/news/friday-funnies/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % 'the-road-to-japan'
    imageSearch = compile(tagre("img", "src", r'(/news/friday-funnies/episode/[^"]+\?format=image[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/news/friday-funnies/[^"]+)') + "Previous")
    help = 'Index format: stripname'


class LittleGamers(_BasicScraper):
    url = 'http://www.little-gamers.com/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2000/12/01/99'
    imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))
    help = 'Index format: yyyy/mm/dd/name'


class LoadingArtist(_ParserScraper):
    url = 'http://www.loadingartist.com/comic/new-update/'
    imageSearch = '//div[@class="comic"]//img'
    prevSearch = "//a[contains(concat(' ', @class, ' '), ' prev ')]"


class LookingForGroup(_ParserScraper):
    url = 'http://www.lfgcomic.com/'
    rurl = escape(url)
    stripUrl = url + 'page/%s/'
    firstStripUrl = stripUrl % '1'
    css = True
    imageSearch = '#comic img'
    prevSearch = '#comic-left > a'
    starter = indirectStarter(url, '#header-dropdown-comic-lfg > a:nth-of-type(2)')
    nameSearch = compile(r'/page/([-0-9]+)/')
    help = 'Index format: nnn'
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`# -- coding: utf-8 --`
Updated copyright for all source files. 2012-06-20 20:41:04 +00:00			`# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs`
Updated copyright. 2014-01-05 15:50:57 +00:00			`# Copyright (C) 2012-2014 Bastian Kleineidam`
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`# Copyright (C) 2015-2016 Tobias Gruetzmacher`

			`from __future__ import absolute_import, division, print_function`
Initial commit to Github. 2012-06-20 19:58:13 +00:00
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`from re import compile, escape`
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00
Fixed problem with LookingForGroup comic 2015-05-07 11:57:10 +00:00			`from ..scraper import _BasicScraper, _ParserScraper`
Added Lackadaisy. 2013-04-25 19:06:12 +00:00			`from ..helpers import bounceStarter, indirectStarter`
Fix some comics. 2012-11-21 20:57:26 +00:00			`from ..util import tagre`
Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-03 22:12:53 +00:00			`from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

Added Lackadaisy. 2013-04-25 19:06:12 +00:00			`class Lackadaisy(_BasicScraper):`
			`baseUrl = 'http://lackadaisy.foxprints.com/'`
			`url = baseUrl + 'comic.php'`
			`stripUrl = baseUrl + 'comic.php?comicid=%s'`
			`firstStripUrl = stripUrl % '1'`
			`imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))`
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +`
			`"< Previous")`
Added Lackadaisy. 2013-04-25 19:06:12 +00:00			`help = 'Index format: n'`
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`starter = bounceStarter(`
			`url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +`
			`"Next"))`
Added Lackadaisy. 2013-04-25 19:06:12 +00:00
			`@classmethod`
			`def namer(cls, imageUrl, pageUrl):`
			`"""Use comic id for filename."""`
			`num = pageUrl.rsplit('=', 1)[-1]`
			`ext = imageUrl.rsplit('.', 1)[-1]`
			`return 'lackadaisy_%s.%s' % (num, ext)`


Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`class Laiyu(_WordPressScraper):`
			`url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'`
			`firstStripUrl = url`
			`starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)`


Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class LasLindas(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://laslindas.katbox.net/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Fix some comics. 2013-02-19 19:58:04 +00:00			`stripUrl = url + 'comic/%s/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[^"]+)' % rurl, after="attachment-full"))`
Fix LasLindas 2013-04-04 16:30:02 +00:00			`multipleImagesPerStrip = True`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl, after="previous"))`
Fix some comics. 2012-11-21 20:57:26 +00:00			`help = 'Index format: stripname'`
Initial commit to Github. 2012-06-20 19:58:13 +00:00

adding LastNerdsOnEarth 2016-01-03 01:16:58 +00:00			`class LastNerdsOnEarth(_ParserScraper):`
			`url = 'http://www.lastnerdsonearth.com/latest/'`
			`imageSearch = '//div[@id="content"]/a/img'`
			`prevSearch = '//div[@id="comicnav"]/a[last()-2]'`


Sort comics. 2013-03-06 19:21:10 +00:00			`class LeastICouldDo(_BasicScraper):`
			`url = 'http://www.leasticoulddo.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Sort comics. 2013-03-06 19:21:10 +00:00			`stripUrl = url + 'comic/%s'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '20130109'`
Fix broken comics 2013-11-12 17:33:14 +00:00			`imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))`
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00			`prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,`
			`after="Previous"))`
			`starter = indirectStarter(`
			`url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,`
			`after="feature-comic")))`
Sort comics. 2013-03-06 19:21:10 +00:00			`help = 'Index format: yyyymmdd'`


Move ComicControl into common module. - Move all comics using ComicControl into alphabetical files. - Add BalderDash & Picklewhistle 2016-04-03 22:12:53 +00:00			`class LetsSpeakEnglish(_ComicControlScraper):`
			`url = 'http://www.marycagle.com'`


Initial commit to Github. 2012-06-20 19:58:13 +00:00			`class Lint(_BasicScraper):`
Always have an url attribute in comic scrapers. 2013-02-04 20:00:26 +00:00			`url = 'http://www.purnicellin.com/lint/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`stripUrl = url + '%s/'`
			`firstStripUrl = stripUrl % '2004/01/10/01102004'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`prevSearch = compile(r'\\| <a href="([^"]+)" rel="prev">')`
			`help = 'Index format: yyyy/mm/dd/num-name'`


Updated linuxcom 2013-06-24 18:27:43 +00:00			`class LinuxComFridayFunnies(_BasicScraper):`
			`url = 'https://www.linux.com/news/friday-funnies/'`
			`stripUrl = url + '%s'`
			`firstStripUrl = stripUrl % 'the-road-to-japan'`
			`imageSearch = compile(tagre("img", "src", r'(/news/friday-funnies/episode/[^"]+\?format=image[^"]+)'))`
			`prevSearch = compile(tagre("a", "href", r'(/news/friday-funnies/[^"]+)') + "Previous")`
			`help = 'Index format: stripname'`


Sort comics. 2013-03-06 19:21:10 +00:00			`class LittleGamers(_BasicScraper):`
			`url = 'http://www.little-gamers.com/'`
			`stripUrl = url + '%s/'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '2000/12/01/99'`
Sort comics. 2013-03-06 19:21:10 +00:00			`imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))`
Code cleanup. 2013-03-26 16:36:06 +00:00			`prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))`
Sort comics. 2013-03-06 19:21:10 +00:00			`help = 'Index format: yyyy/mm/dd/name'`


fixed LoadingArtist 2015-05-31 23:33:50 +00:00			`class LoadingArtist(_ParserScraper):`
			`url = 'http://www.loadingartist.com/comic/new-update/'`
			`imageSearch = '//div[@class="comic"]//img'`
			`prevSearch = "//a[contains(concat(' ', @class, ' '), ' prev ')]"`
Added some comic strips and cleanup the scraper code. 2013-03-06 19:00:30 +00:00
Move Flowerlark Studios into alphabetical files. 2016-04-03 20:58:01 +00:00
Fixed problem with LookingForGroup comic 2015-05-07 11:57:10 +00:00			`class LookingForGroup(_ParserScraper):`
Fix LookingForGroup. 2013-02-12 16:55:13 +00:00			`url = 'http://www.lfgcomic.com/'`
Use re.escape and add some firstStripUrl. 2013-04-10 16:19:11 +00:00			`rurl = escape(url)`
Fix LookingForGroup. 2013-02-12 16:55:13 +00:00			`stripUrl = url + 'page/%s/'`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`firstStripUrl = stripUrl % '1'`
Fixed problem with LookingForGroup comic 2015-05-07 11:57:10 +00:00			`css = True`
			`imageSearch = '#comic img'`
			`prevSearch = '#comic-left > a'`
			`starter = indirectStarter(url, '#header-dropdown-comic-lfg > a:nth-of-type(2)')`
Add firstStripUrls. 2013-04-10 21:57:09 +00:00			`nameSearch = compile(r'/page/([-0-9]+)/')`
Initial commit to Github. 2012-06-20 19:58:13 +00:00			`help = 'Index format: nnn'`