diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index f74886204..09822595e 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -8,7 +8,7 @@ from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter -from .common import _WordPressScraper, _ComicPressScraper +from .common import _WordPressScraper, _ComicPressScraper, WP_LATEST_SEARCH class AbstruseGoose(_BasicScraper): @@ -300,6 +300,12 @@ class ARedTailsDream(_BasicScraper): help = 'Index format: nn' +class Ashes(_WordPressScraper): + url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/' + firstStripUrl = url + starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) + + class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' diff --git a/dosagelib/plugins/common.py b/dosagelib/plugins/common.py index bcbda63c3..f00cc3ab7 100644 --- a/dosagelib/plugins/common.py +++ b/dosagelib/plugins/common.py @@ -13,6 +13,9 @@ from ..scraper import _ParserScraper # expression is for which comics. +WP_LATEST_SEARCH = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]' + + class _WordPressScraper(_ParserScraper): imageSearch = '//div[@id="comic"]//img' prevSearch = "//a[contains(concat(' ', @class, ' '), ' comic-nav-previous ')]" diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index 20d852df9..edcce70b4 100755 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, IGNORECASE from ..helpers import indirectStarter from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre +from .common import _WordPressScraper, WP_LATEST_SEARCH class EarthsongSaga(_ParserScraper): @@ -38,6 +40,12 @@ class EarthsongSaga(_ParserScraper): int(imgmatch.group(3)), suffix) +class EasilyAmused(_WordPressScraper): + url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/' + firstStripUrl = url + starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) + + class EatLiver(_BasicScraper): url = 'http://www.eatliver.com/' rurl = escape(url) @@ -57,9 +65,11 @@ class EatThatToast(_BasicScraper): rurl = escape(url) stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % 'thewizard/' - imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='comic-nav-base comic-nav-previous')) - textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "alt", r'([^"]+)')) + imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, + after='comic-nav-base comic-nav-previous')) + textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + + tagre("img", "alt", r'([^"]+)')) help = 'Index Format: name' @@ -68,7 +78,8 @@ class EdibleDirt(_BasicScraper): stripUrl = url + 'index.php?id=%s' firstStripUrl = stripUrl % '0' imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) - prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)")+"Previous") + prevSearch = compile(tagre("a", "href", r"(index\.php\?id=\d+)") + + "Previous") help = 'Index format: number' @@ -172,6 +183,13 @@ class Erstwhile(_ParserScraper): help = 'Index format: title-nn' +class Eryl(_WordPressScraper): + url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/' + firstStripUrl = url + starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) + help = 'This was known as DarkWings in previous Dosage versions' + + class EverybodyLovesEricRaymond(_BasicScraper): url = 'http://geekz.co.uk/lovesraymond/' stripUrl = url + 'archive/%s' diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py index 4db96fa93..8766cf28b 100644 --- a/dosagelib/plugins/l.py +++ b/dosagelib/plugins/l.py @@ -1,11 +1,16 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from re import compile, escape + from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, indirectStarter from ..util import tagre +from .common import _WordPressScraper, WP_LATEST_SEARCH class Lackadaisy(_BasicScraper): @@ -14,10 +19,12 @@ class Lackadaisy(_BasicScraper): stripUrl = baseUrl + 'comic.php?comicid=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)')) - prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") + "< Previous") + prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") + + "< Previous") help = 'Index format: n' - starter = bounceStarter(url, - compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") + "Next")) + starter = bounceStarter( + url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") + + "Next")) @classmethod def namer(cls, imageUrl, pageUrl): @@ -27,6 +34,12 @@ class Lackadaisy(_BasicScraper): return 'lackadaisy_%s.%s' % (num, ext) +class Laiyu(_WordPressScraper): + url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/' + firstStripUrl = url + starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) + + class LasLindas(_BasicScraper): url = 'http://laslindas.katbox.net/' rurl = escape(url) @@ -49,9 +62,11 @@ class LeastICouldDo(_BasicScraper): stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '20130109' imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="Previous")) - starter = indirectStarter(url, - compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="feature-comic"))) + prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, + after="Previous")) + starter = indirectStarter( + url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, + after="feature-comic"))) help = 'Index format: yyyymmdd' @@ -88,6 +103,7 @@ class LoadingArtist(_ParserScraper): imageSearch = '//div[@class="comic"]//img' prevSearch = "//a[contains(concat(' ', @class, ' '), ' prev ')]" + class LookingForGroup(_ParserScraper): url = 'http://www.lfgcomic.com/' rurl = escape(url) diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 124fce1b2..9eeea81b2 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape + from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre -from .common import _WordPressScraper +from .common import _WordPressScraper, WP_LATEST_SEARCH class Namesake(_BasicScraper): @@ -140,6 +142,12 @@ class NobodyScores(_BasicScraper): help = 'Index format: nnn' +class NoMoreSavePoints(_WordPressScraper): + url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/' + firstStripUrl = url + starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH) + + class NoNeedForBushido(_BasicScraper): url = 'http://nn4b.com/' rurl = escape(url) diff --git a/dosagelib/plugins/webcomicfactory.py b/dosagelib/plugins/webcomicfactory.py index 560880d9f..5fb2c6fa5 100644 --- a/dosagelib/plugins/webcomicfactory.py +++ b/dosagelib/plugins/webcomicfactory.py @@ -5,18 +5,16 @@ from __future__ import absolute_import, division, print_function -from .common import _WordPressScraper +from .common import _WordPressScraper, WP_LATEST_SEARCH class _WebcomicFactory(_WordPressScraper): - latestSearch = '//a[contains(concat(" ", @class, " "), " comic-nav-last ")]' - @classmethod def starter(cls): """this is basically helpers.indirectStarter, but dynamically selecting the right parameters.""" data = cls.getPage(cls.firstStripUrl) - return cls.fetchUrl(cls.firstStripUrl, data, cls.latestSearch) + return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH) # do not edit anything below since these entries are generated from diff --git a/dosagelib/plugins/wordpress.py b/dosagelib/plugins/wordpress.py index a5c277ecc..fd3665e9f 100644 --- a/dosagelib/plugins/wordpress.py +++ b/dosagelib/plugins/wordpress.py @@ -23,17 +23,3 @@ for (name, starterXPath) in [ ('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a') ]: add(name, 'http://hijinksensue.com/', starter=indirectStarter('http://hijinksensue.com/', starterXPath)) - -# all comics on flowerlarkstudios -for (name, linkNumber) in [ - ('Ashes', 1), - ('Eryl', 3), - # this is a duplicate as it was under this name in previous versions of dosage - ('DarkWings', 3), - ('Laiyu', 5), - ('NoMoreSavePoints', 7), - ('EasilyAmused', 9) -]: - add(name, 'http://www.flowerlarkstudios.com/', - starter=indirectStarter('http://www.flowerlarkstudios.com/', - '(//div[@id="sidebar-left"]//a)[' + str(linkNumber) + ']'))