diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index bf35b0ec6..a08e1c6c4 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, MULTILINE + from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter -from .common import _WordPressScraper, _ComicPressScraper, WP_LATEST_SEARCH +from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH class AbstruseGoose(_BasicScraper): @@ -67,15 +69,14 @@ class Achewood(_BasicScraper): namer = regexNamer(compile(r'date=(\d+)')) -class AfterStrife(_BasicScraper): +class AfterStrife(_WordPressScraper): baseUrl = 'http://afterstrife.com/' - rurl = escape(baseUrl) stripUrl = baseUrl + '?p=%s' url = stripUrl % '262' firstStripUrl = stripUrl % '1' - imageSearch = compile(r'\s*" + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, after=' alt')) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous") + imageSearch = compile(tagre("div", "class", "entry-content") + + "\s*

\s*" + + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, + after=' alt')) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous") help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy' @@ -250,9 +252,11 @@ class DresdenCodak(_BasicScraper): firstStripUrl = url + '2007/02/08/pom/' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + - tagre("img", "src", r"%sm_prev2?\.png" % rurl, quote="")) - starter = indirectStarter(url, compile(tagre("div", "id", "preview") + - tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl))) + tagre("img", "src", r"%sm_prev2?\.png" % rurl, + quote="")) + starter = indirectStarter( + url, compile(tagre("div", "id", "preview") + + tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl))) class DrFun(_BasicScraper): @@ -309,6 +313,7 @@ class DumbingOfAge(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) help = 'Index format: yyyy/comic/book-num/seriesname/stripname' + class DungeonsAndDenizens(_BasicScraper): url = 'http://dungeond.com/' stripUrl = url + r'\d+/\d+/\d+/%s/' diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index edcce70b4..d0c1ecd58 100755 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE from ..helpers import indirectStarter from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre -from .common import _WordPressScraper, WP_LATEST_SEARCH +from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class class EarthsongSaga(_ParserScraper): @@ -83,12 +83,10 @@ class EdibleDirt(_BasicScraper): help = 'Index format: number' -class EdmundFinney(_ParserScraper): +class EdmundFinney(_WordPressScraper): url = 'http://eqcomics.com/' firstStripUrl = url + '2009/03/08/sunday-aliens/' - imageSearch = '//div[@id="comic"]//img' - prevSearch = '//a[@class="navi navi-prev"]' - help = 'Index format: yyyy/mm/dd/stripname' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class EerieCuties(_BasicScraper): @@ -174,13 +172,10 @@ class ErrantStory(_BasicScraper): help = 'Index format: yyyy-mm-dd/num' -class Erstwhile(_ParserScraper): +class Erstwhile(_WordPressScraper): url = 'http://www.erstwhiletales.com/' - stripUrl = url + '%s/' - css = True - imageSearch = 'div.comicpane a img' - prevSearch = 'a.navi-prev' - help = 'Index format: title-nn' + prevSearch = '//a[%s]' % xpath_class('navi-prev') + endOfLife = True class Eryl(_WordPressScraper): @@ -199,14 +194,10 @@ class EverybodyLovesEricRaymond(_BasicScraper): help = 'Index format: name-of-old-comic' -class EverydayBlues(_BasicScraper): +class EverydayBlues(_WordPressScraper): url = 'http://everydayblues.everydayblues.net/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2010/02/11/sometimes' - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+/)' % rurl, after="navi-prev")) - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+-[^"]+)' % rurl)) - help = 'Index format: yyyy/mm/dd/stripname' + firstStripUrl = url + '2010/02/11/sometimes/' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class EvilDiva(_BasicScraper): @@ -242,15 +233,11 @@ class Exiern(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' -class ExploitationNow(_BasicScraper): +class ExploitationNow(_WordPressScraper): url = 'http://www.exploitationnow.com/' - rurl = escape(url) - stripUrl = url + '%s' - firstStripUrl = stripUrl % '2000-07-07/9' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, - after="navi-prev")) - help = 'Index format: yyyy-mm-dd/num' + firstStripUrl = url + '2000-07-07/9' + prevSearch = '//a[%s]' % xpath_class('navi-prev') + endOfLife = True class ExtraLife(_BasicScraper): diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 4b058a787..675320db2 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -4,12 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre -from .common import _ComicControlScraper +from .common import _ComicControlScraper, _WordPressScraper, xpath_class class Galaxion(_BasicScraper): @@ -150,15 +151,10 @@ class GoneWithTheBlastwave(_BasicScraper): return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1)) -class GrrlPower(_BasicScraper): +class GrrlPower(_WordPressScraper): url = 'http://grrlpowercomic.com/' - rurl = escape(url) - stripUrl = url + 'archives/%s' - firstStripUrl = stripUrl % '48' - imageSearch = compile(tagre("img", "src", r'(.*/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(.*/archives/\d+)', - after="navi-prev")) - help = 'Index format: number' + firstStripUrl = url + 'archives/48' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class GUComics(_BasicScraper): diff --git a/dosagelib/plugins/i.py b/dosagelib/plugins/i.py index 99e7171ec..5a78cec4e 100644 --- a/dosagelib/plugins/i.py +++ b/dosagelib/plugins/i.py @@ -4,10 +4,12 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape + from ..scraper import _BasicScraper from ..util import tagre -from .common import _WordPressScraper +from .common import _WordPressScraper, xpath_class class IAmArg(_BasicScraper): @@ -34,13 +36,11 @@ class IDreamOfAJeanieBottle(_WordPressScraper): url = 'http://jeaniebottle.com/' -class InternetWebcomic(_BasicScraper): +class InternetWebcomic(_WordPressScraper): url = 'http://www.internet-webcomic.com/' - rurl = escape(url) stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '30' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"/]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi navi-prev")) + prevSearch = '//a[%s]' % xpath_class('navi-prev') help = 'Index format: n' diff --git a/dosagelib/plugins/k.py b/dosagelib/plugins/k.py index 13380101d..6f8ce9d94 100644 --- a/dosagelib/plugins/k.py +++ b/dosagelib/plugins/k.py @@ -10,7 +10,7 @@ from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper from ..util import tagre from ..helpers import indirectStarter -from .common import _ComicControlScraper +from .common import _ComicControlScraper, _WordPressScraper, xpath_class class KevinAndKell(_BasicScraper): @@ -37,17 +37,10 @@ class Key(_BasicScraper): help = 'Index format: nnn' -class KickInTheHead(_BasicScraper): +class KickInTheHead(_WordPressScraper): url = 'http://www.kickinthehead.org/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2003/03/20/ipod-envy' - imageSearch = compile( - tagre("img", "src", - r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, - after="navi-prev")) - help = 'Index format: yyyy/mm/dd/stripname' + firstStripUrl = url + '2003/03/20/ipod-envy/' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class KiwiBlitz(_ComicControlScraper): diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 325ee9f26..e1b4bef97 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -10,7 +10,8 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre -from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH +from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH, + xpath_class) class Namesake(_ComicControlScraper): @@ -108,7 +109,7 @@ class NichtLustig(_BasicScraper): class Nicky510(_WordPressScraper): url = 'http://www.nickyitis.com/' - prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]" + prevSearch = '//a[%s]' % xpath_class('navi-prev') class Nimona(_BasicScraper): diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py index 4f76ad230..2dc5ca973 100644 --- a/dosagelib/plugins/o.py +++ b/dosagelib/plugins/o.py @@ -10,7 +10,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre -from .common import _WordPressScraper +from .common import _WordPressScraper, xpath_class class OctopusPie(_ParserScraper): @@ -23,17 +23,6 @@ class OctopusPie(_ParserScraper): help = 'Index format: yyyy-mm-dd/nnn-strip-name' -class OddFish(_BasicScraper): - url = 'http://www.odd-fish.net/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % 'tv-tentacles' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, - after="navi-prev")) - help = 'Index format: stripname' - - class Oglaf(_BasicScraper): url = 'http://oglaf.com/' stripUrl = url + '%s/' @@ -48,18 +37,11 @@ class Oglaf(_BasicScraper): adult = True -class OhJoySexToy(_BasicScraper): +class OhJoySexToy(_WordPressScraper): url = 'http://www.ohjoysextoy.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % 'introduction' - imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + - tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, - after='navi navi-prev')) - textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + - tagre("img", "alt", r'([^"]+)')) - help = 'Index Format: name' + firstStripUrl = url + 'introduction/' + prevSearch = '//a[%s]' % xpath_class('navi-prev') + textSearch = '//div[@id="comic"]//img/@alt' adult = True @@ -119,16 +101,11 @@ class OnTheFastrack(_BasicScraper): return "%s.gif" % name.title() -class Optipess(_BasicScraper): +class Optipess(_WordPressScraper): url = 'http://www.optipess.com/' - stripUrl = url + '%s' firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/' - imageSearch = compile(tagre("img", "src", - r'(%scomics/[x|\d]+[^"]+\.[^"]+)' % url)) - prevSearch = compile(tagre("a", "href", r'([^"]+)', - after="navi navi-prev")) - textSearch = compile(tagre("img", "alt", r'([^"]+)', before=url)) - help = 'Index format: yyyy/mm/dd/stripname' + prevSearch = '//a[%s]' % xpath_class('navi-prev') + textSearch = '//div[@id="comic"]//img/@alt' class OrnerBoy(_BasicScraper): diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 0b3eb38fa..7d3ac392e 100755 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape + from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, queryNamer, indirectStarter from ..util import tagre -from .common import _ComicControlScraper, _WordPressScraper +from .common import _ComicControlScraper, _WordPressScraper, xpath_class class PandyLand(_WordPressScraper): @@ -40,14 +42,9 @@ class ParallelUniversum(_BasicScraper): lang = 'de' -class PartiallyClips(_BasicScraper): +class PartiallyClips(_WordPressScraper): url = 'http://partiallyclips.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2001/10/28/screaming-woman' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/stripname' + firstStripUrl = url + 'comic/screaming-woman/' class PastelDefender(_BasicScraper): @@ -252,25 +249,14 @@ class PS238(_ParserScraper): help = 'Index format: yyyy-mm-dd' -class PunksAndNerds(_BasicScraper): +class PunksAndNerds(_WordPressScraper): url = 'http://www.punksandnerds.com/' - rurl = escape(url) stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '15' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, - after="navi-prev")) + prevSearch = '//a[%s]' % xpath_class('navi-prev') help = 'Index format: nnn' -class PunksAndNerdsOld(_BasicScraper): - url = 'http://original.punksandnerds.com/' - stripUrl = url + 'd/%s.html' - imageSearch = compile(r' src="(/comics/.+?)"') - prevSearch = compile(r'>]+?>]+?src="/previouscomic.gif">') - help = 'Index format: yyyymmdd' - - class PvPonline(_BasicScraper): url = 'http://pvponline.com/comic' stripUrl = url + '%s' diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 960ce179c..7e7c422a2 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -12,7 +12,8 @@ import datetime from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, bounceStarter from ..util import tagre -from .common import _ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH +from .common import (_ComicControlScraper, _WordPressScraper, WP_LATEST_SEARCH, + xpath_class) class SabrinaOnline(_BasicScraper): @@ -36,15 +37,10 @@ class SabrinaOnline(_BasicScraper): return archivepages[-1] -class SafelyEndangered(_BasicScraper): +class SafelyEndangered(_WordPressScraper): url = 'http://www.safelyendangered.com/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'ignored' - imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', - after="navi navi-prev")) - textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads')) - help = 'Index format: yyyy/mm/stripname' + firstStripUrl = url + 'comic/ignored/' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class SailorsunOrg(_WordPressScraper): @@ -209,15 +205,9 @@ class ShermansLagoon(_BasicScraper): return "%s-%s-%s" % (year, month, day) -class Shivae(_BasicScraper): - url = 'http://shivae.net/' - rurl = escape(url) - stripUrl = url + 'blog/%s/' - firstStripUrl = stripUrl % '2007/09/21/09212007' - imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, - after="navi-prev")) - help = 'Index format: yyyy/mm/dd/stripname' +class Shivae(_WordPressScraper): + url = 'http://shivae.com/' + firstStripUrl = url + 'gnip/ck-chapter-01/caidenkoel-title-01/' class Shortpacked(_ParserScraper): @@ -229,14 +219,9 @@ class Shortpacked(_ParserScraper): help = 'Index format: nnn' -class ShotgunShuffle(_BasicScraper): +class ShotgunShuffle(_WordPressScraper): url = 'http://shotgunshuffle.com/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'pilot/' - imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', - after="navi navi-prev")) - help = 'Index format: stripname' + firstStripUrl = url + 'comic/pilot/' class SinFest(_BasicScraper): @@ -362,7 +347,7 @@ class SpaceTrawler(_WordPressScraper): base_url = 'http://spacetrawler.com/' url = base_url + '2013/12/24/spacetrawler-379/' firstStripUrl = base_url + '2010/01/01/spacetrawler-4/' - prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]" + prevSearch = '//a[%s]' % xpath_class('navi-prev') endOfLife = True diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index d0da70b5b..3feb65f0e 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -4,22 +4,19 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape from ..scraper import _BasicScraper from ..helpers import indirectStarter from ..util import tagre +from .common import _WordPressScraper, xpath_class -class Underling(_BasicScraper): +class Underling(_WordPressScraper): url = 'http://underlingcomic.com/' - stripUrl = url - rurl = escape(url) - firstStripUrl = stripUrl + 'page-one/' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl)) - prevSearch = compile(tagre("a", "href", r'([^"]+)', - after=r'class="[^"]*navi-prev')) - help = 'Index format: nnn' + firstStripUrl = url + 'page-one/' + prevSearch = '//a[%s]' % xpath_class('navi-prev') class Undertow(_BasicScraper): diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index f164bc7d5..a28f2bb8f 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper from ..util import tagre from ..helpers import indirectStarter +from .common import _ComicControlScraper, _WordPressScraper class WapsiSquare(_BasicScraper): @@ -108,15 +110,10 @@ class WhiteNoise(_BasicScraper): help = 'Index format: n' -class Whomp(_BasicScraper): +class Whomp(_ComicControlScraper): url = 'http://www.whompcomic.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2010/06/14/06142010' - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, - after="navi-prev")) - help = 'Index format: yyyy/mm/dd/stripname' + firstStripUrl = url + 'comic/06152010' + textSearch = '//img[@id="cc-comic"]/@title' class WhyTheLongFace(_BasicScraper): @@ -193,16 +190,13 @@ class WorldOfMrToast(_BasicScraper): def getPrevUrl(self, url, data, baseUrl): idx = self.prevurls.index(url) try: - return self.prevurls[idx+1] + return self.prevurls[idx + 1] except IndexError: return None -class WorldOfWarcraftEh(_BasicScraper): +class WorldOfWarcraftEh(_WordPressScraper): url = 'http://woweh.com/' - stripUrl = None - imageSearch = compile(r'http://woweh.com/(comics/.+?)"') - prevSearch = compile(r'woweh.com/(\?p=.+:?)".+:?="prev') class WormWorldSaga(_BasicScraper): @@ -242,12 +236,3 @@ class WormWorldSagaGerman(WormWorldSaga): class WormWorldSagaSpanish(WormWorldSaga): lang = 'es' - - -class WotNow(_BasicScraper): - url = 'http://shadowburn.binmode.com/wotnow/' - stripUrl = url + 'comic.php?comic_id=%s' - firstStripUrl = stripUrl % '1' - imageSearch = compile(r'