diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py
index 7b7a62940..2777891c2 100644
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@@ -1,8 +1,13 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function
+
from .util import getQueryParams
+
def queryNamer(paramName, usePageUrl=False):
"""Get name from URL query part."""
@classmethod
@@ -25,23 +30,32 @@ def regexNamer(regex, usePageUrl=False):
return _namer
-def bounceStarter(url, nextSearch):
- """Get start URL by "bouncing" back and forth one time."""
+def bounceStarter():
+ """Get start URL by "bouncing" back and forth one time.
+
+ This needs the url and nextSearch properties be defined on the class.
+ """
@classmethod
def _starter(cls):
"""Get bounced start URL."""
- data = cls.getPage(url)
- url1 = cls.fetchUrl(url, data, cls.prevSearch)
+ data = cls.getPage(cls.url)
+ url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url1)
- return cls.fetchUrl(url1, data, nextSearch)
+ return cls.fetchUrl(url1, data, cls.nextSearch)
return _starter
-def indirectStarter(url, latestSearch):
- """Get start URL by indirection."""
+def indirectStarter():
+ """Get start URL by indirection.
+
+ This is useful for comics where the latest comic can't be reached at a
+ stable URL. If the class has an attribute 'startUrl', this page is fetched
+ first, otherwise the page at 'url' is fetched. After that, the attribute
+ 'latestSearch' is used on the page content to find the latest strip."""
@classmethod
def _starter(cls):
"""Get indirect start URL."""
+ url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
data = cls.getPage(url)
- return cls.fetchUrl(url, data, latestSearch)
+ return cls.fetchUrl(url, data, cls.latestSearch)
return _starter
diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py
index a08e1c6c4..141cd982b 100644
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@@ -16,8 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
class AbstruseGoose(_BasicScraper):
url = 'http://abstrusegoose.com/'
rurl = escape(url)
- starter = bounceStarter(
- url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »"))
+ starter = bounceStarter()
stripUrl = url + '%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src',
@@ -81,7 +80,6 @@ class AfterStrife(_WordPressScraper):
class AGirlAndHerFed(_BasicScraper):
url = 'http://www.agirlandherfed.com/'
- starter = bounceStarter(url, compile(r'[^>]+Back'))
stripUrl = url + '1.%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
@@ -114,7 +112,6 @@ class ALessonIsLearned(_BasicScraper):
url = 'http://www.alessonislearned.com/'
prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
quote="'") + r"[^>]+previous")
- starter = indirectStarter(url, prevSearch)
stripUrl = url + 'index.php?comic=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
@@ -124,8 +121,8 @@ class ALessonIsLearned(_BasicScraper):
class Alice(_WordPressScraper):
url = 'http://www.alicecomics.com/'
prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
- starter = indirectStarter('http://www.alicecomics.com/',
- '//a[text()="Latest Alice!"]')
+ latestSearch = '//a[text()="Latest Alice!"]'
+ starter = indirectStarter()
class AlienLovesPredator(_BasicScraper):
@@ -264,7 +261,8 @@ class ARedTailsDream(_BasicScraper):
class Ashes(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
firstStripUrl = url
- starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+ latestSearch = WP_LATEST_SEARCH
+ starter = indirectStarter()
class ASkeweredParadise(_BasicScraper):
@@ -289,12 +287,13 @@ class ASofterWorld(_ParserScraper):
class AstronomyPOTD(_ParserScraper):
baseUrl = 'http://apod.nasa.gov/apod/'
url = baseUrl + 'astropix.html'
- starter = bounceStarter(url, '//a[text()=">"]')
+ starter = bounceStarter()
stripUrl = baseUrl + 'ap%s.html'
firstStripUrl = stripUrl % '061012'
imageSearch = '//a/img'
multipleImagesPerStrip = True
prevSearch = '//a[text()="<"]'
+ nextSearch = '//a[text()=">"]'
help = 'Index format: yymmdd'
def shouldSkipUrl(self, url, data):
diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py
index 349ef152c..ceb88f9ed 100644
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@@ -47,8 +47,8 @@ class BalderDash(_ComicControlScraper):
class Bardsworth(_WordPressScraper):
url = 'http://www.bardsworth.com/'
- starter = indirectStarter('http://www.bardsworth.com/',
- '//a[@rel="bookmark"]')
+ latestSearch = '//a[@rel="bookmark"]'
+ starter = indirectStarter()
class Baroquen(_BasicScraper):
@@ -72,12 +72,15 @@ class Beetlebum(_BasicScraper):
rurl = escape(url)
stripUrl = url + '%s'
firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
- starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
+ starter = indirectStarter()
multipleImagesPerStrip = True
imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre('a', 'href',
r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
after='prev'))
+ latestSearch = compile(tagre('a', 'href',
+ r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl,
+ after='bookmark'))
help = 'Index format: yyyy/mm/dd/striptitle'
lang = 'de'
@@ -223,7 +226,8 @@ class BoredAndEvil(_BasicScraper):
firstStripUrl = stripUrl % '2004-06-07'
imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
prevSearch = compile(r'First Comic.+\s*
\s*
")
- starter = indirectStarter(
- url,
- compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
+ latestSearch = compile(tagre('a', 'href', "(" + url +
+ r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
+ starter = indirectStarter()
class Freefall(_BasicScraper):
diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py
index 675320db2..7f42d37b9 100644
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@@ -27,15 +27,15 @@ class Garanos(_BasicScraper):
baseUrl = 'http://garanos.alexheberling.com/'
rurl = escape(baseUrl)
url = baseUrl + 'pages/page-1/'
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
- after="nav-last")))
+ starter = indirectStarter()
stripUrl = baseUrl + 'pages/page-%s'
imageSearch = compile(
tagre("img", "src",
r'(%swp-content/uploads/sites/\d+/\d+/\d+/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
after="prev"))
+ latestSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
+ after="nav-last"))
help = 'Index format: n (unpadded)'
@@ -136,14 +136,14 @@ class GoGetARoomie(_ComicControlScraper):
class GoneWithTheBlastwave(_BasicScraper):
url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
- starter = indirectStarter(
- url, compile(r'href="(index.php\?p=comic&nro=\d+)">' +
- r'' +
r'' +
+ r']+\.html)', quote="")))
- imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)', before="photo-frame"))
- prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)', quote="") + "Next")
+ starter = indirectStarter()
+ imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
+ before="photo-frame"))
+ prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
+ quote="") + "Next")
+ latestSearch = compile(tagre("a", "href", r'(files/JimBentonComic-[^>]+\.html)', quote=""))
help = 'Index format: stripname'
@@ -58,6 +65,7 @@ class JustAnotherEscape(_BasicScraper):
rurl = escape(url)
stripUrl = url + 'index.cgi?date=%s'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
- prevSearch = compile(tagre("a", "href", r'(%s/index\.cgi\?date=\d+)' % rurl)
- + tagre("img", "alt", "Previous Comic"))
+ prevSearch = compile(tagre("a", "href",
+ r'(%s/index\.cgi\?date=\d+)' % rurl) +
+ tagre("img", "alt", "Previous Comic"))
help = 'Index format: yyyymmdd'
diff --git a/dosagelib/plugins/k.py b/dosagelib/plugins/k.py
index 6f8ce9d94..da7384833 100644
--- a/dosagelib/plugins/k.py
+++ b/dosagelib/plugins/k.py
@@ -9,7 +9,6 @@ from re import compile, escape, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
-from ..helpers import indirectStarter
from .common import _ComicControlScraper, _WordPressScraper, xpath_class
@@ -81,4 +80,3 @@ class KuroShouri(_BasicScraper):
tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,
after="previous"))
help = 'Index format: chapter-n-page-m'
- starter = indirectStarter(url, prevSearch)
diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py
index 8e7d5dc0f..2d0f731ae 100644
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@@ -21,10 +21,10 @@ class Lackadaisy(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))
prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +
"< Previous")
+ nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
+ "Next")
help = 'Index format: n'
- starter = bounceStarter(
- url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
- "Next"))
+ starter = bounceStarter()
@classmethod
def namer(cls, imageUrl, pageUrl):
@@ -37,7 +37,8 @@ class Lackadaisy(_BasicScraper):
class Laiyu(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
firstStripUrl = url
- starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+ latestSearch = WP_LATEST_SEARCH
+ starter = indirectStarter()
class LasLindas(_BasicScraper):
@@ -64,9 +65,9 @@ class LeastICouldDo(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))
prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
after="Previous"))
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
- after="feature-comic")))
+ latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
+ after="feature-comic"))
+ starter = indirectStarter()
help = 'Index format: yyyymmdd'
@@ -110,12 +111,11 @@ class LoadingArtist(_ParserScraper):
class LookingForGroup(_ParserScraper):
url = 'http://www.lfgcomic.com/'
- rurl = escape(url)
stripUrl = url + 'page/%s/'
firstStripUrl = stripUrl % '1'
css = True
imageSearch = '#comic img'
prevSearch = '#comic-left > a'
- starter = indirectStarter(url, '#header-dropdown-comic-lfg > a:nth-of-type(2)')
- nameSearch = compile(r'/page/([-0-9]+)/')
+ latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
+ starter = indirectStarter()
help = 'Index format: nnn'
diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py
index e1b4bef97..d8b157ca4 100644
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@@ -102,9 +102,9 @@ class NichtLustig(_BasicScraper):
lang = 'de'
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
+ latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
help = 'Index format: yymmdd'
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
+ starter = indirectStarter()
class Nicky510(_WordPressScraper):
@@ -136,7 +136,8 @@ class NobodyScores(_BasicScraper):
class NoMoreSavePoints(_WordPressScraper):
url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
firstStripUrl = url
- starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+ latestSearch = WP_LATEST_SEARCH
+ starter = indirectStarter()
class NoNeedForBushido(_BasicScraper):
@@ -149,10 +150,10 @@ class NoNeedForBushido(_BasicScraper):
after="attachment-full"))
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
after="previous-webcomic"))
+ latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
+ after="last-webcomic"))
help = 'Index format: nnn'
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
- after="last-webcomic")))
+ starter = indirectStarter()
class NotInventedHere(_BasicScraper):
diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py
index 2dc5ca973..c81fbe563 100644
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@@ -8,7 +8,6 @@ from __future__ import absolute_import, division, print_function
from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter
from ..util import tagre
from .common import _WordPressScraper, xpath_class
@@ -53,7 +52,6 @@ class OkCancel(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
prevSearch = compile(tagre("div", "class", "previous") +
tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
- starter = indirectStarter(url, prevSearch)
help = 'Index format: yyyymmdd'
diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py
index 7d3ac392e..bc407aafe 100755
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@@ -20,12 +20,13 @@ class PandyLand(_WordPressScraper):
class ParadigmShift(_BasicScraper):
url = 'http://www.paradigmshiftmanga.com/'
- starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)',
- after="next-comic-link")))
+ starter = indirectStarter()
stripUrl = url + 'ps/%s.html'
imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)',
after="previous-comic-link"))
+ latestSearch = compile(tagre("a", "href", r'([^"]+)',
+ after="next-comic-link"))
help = 'Index format: custom'
@@ -72,7 +73,6 @@ class PennyAndAggie(_BasicScraper):
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
- starter = indirectStarter(url, prevSearch)
help = 'Index format: n (unpadded)'
@@ -162,11 +162,12 @@ class PicPakDog(_BasicScraper):
class PiledHigherAndDeeper(_BasicScraper):
url = 'http://www.phdcomics.com/comics.php'
- starter = bounceStarter(url, compile(r'.*]*next_button\.gif'))
+ starter = bounceStarter()
stripUrl = url + '?comicid=%s'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
prevSearch = compile(r'.*]*prev_button\.gif')
+ nextSearch = compile(r'.*]*next_button\.gif')
help = 'Index format: n (unpadded)'
namer = queryNamer('comicid', usePageUrl=True)
@@ -204,9 +205,9 @@ class PokeyThePenguin(_ParserScraper):
stripUrl = url + 'index%s.html'
firstStripUrl = stripUrl % '1'
imageSearch = '//p/img'
- prevSearch = True
+ latestSearch = '(//a)[last()]'
multipleImagesPerStrip = True
- starter = indirectStarter(url, "(//a)[last()]")
+ starter = indirectStarter()
help = 'Index format: number'
def getPrevUrl(self, url, data):
@@ -230,22 +231,22 @@ class PoorlyDrawnLines(_BasicScraper):
class Precocious(_BasicScraper):
url = 'http://www.precociouscomic.com/'
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
- tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
- )
+ starter = indirectStarter()
stripUrl = url + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
+ latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
+ tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
help = 'Index format: yyyy/mm/dd'
class PS238(_ParserScraper):
url = 'http://ps238.nodwick.com/'
stripUrl = url + '/comic/%s/'
- starter = bounceStarter(url, '//a[@class="comic-nav-base comic-nav-next"]')
+ starter = bounceStarter()
imageSearch = '//div[@id="comic"]//img'
prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
+ nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
help = 'Index format: yyyy-mm-dd'
diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py
index 5980f673f..83b13446b 100644
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@@ -1,10 +1,13 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function
from re import compile, escape
-from ..scraper import _BasicScraper
-from ..scraper import _ParserScraper
+
+from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import bounceStarter
from ..util import tagre
@@ -16,6 +19,7 @@ class RadioactivePanda(_BasicScraper):
prevSearch = compile(r'<<', IGNORECASE)
+ latestSearch = compile(r'SEXY LOSERS Latest SL Comic \(#\d+\)', IGNORECASE)
help = 'Index format: nnn'
- starter = indirectStarter(url,
- compile(r'SEXY LOSERS Latest SL Comic \(#\d+\)', IGNORECASE))
+ starter = indirectStarter()
@classmethod
def namer(cls, imageUrl, pageUrl):
@@ -333,7 +334,8 @@ class SnowFlame(_WordPressScraper):
url = 'http://www.snowflamecomic.com/'
stripUrl = url + '?comic=snowflame-%s-%s'
firstStripUrl = stripUrl % ('01', '01')
- starter = bounceStarter(url, WP_LATEST_SEARCH)
+ starter = bounceStarter()
+ nextSearch = WP_LATEST_SEARCH
help = 'Index format: chapter-page'
def getIndexStripUrl(self, index):
@@ -392,8 +394,9 @@ class Spamusement(_BasicScraper):
imageSearch = compile(r'' % rurl,
IGNORECASE)
+ latestSearch = prevSearch
help = 'Index format: n (unpadded)'
- starter = indirectStarter(url, prevSearch)
+ starter = indirectStarter()
class SpareParts(_BasicScraper):
@@ -504,8 +507,7 @@ class StuffNoOneToldMe(_BasicScraper):
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % '2010/05/01'
olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
- starter = indirectStarter(
- url, compile(tagre("a", "href", olderHref, quote="'")))
+ starter = indirectStarter()
imageSearch = (
compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
r"(?:|
)"),
@@ -515,6 +517,7 @@ class StuffNoOneToldMe(_BasicScraper):
)
prevSearch = compile(tagre("a", "href", olderHref, quote="'",
before="older-link"))
+ latestSearch = compile(tagre("a", "href", olderHref, quote="'"))
multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname'
diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py
index c0c064ce0..c4cdf58cb 100755
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@@ -76,14 +76,14 @@ class TheNoob(_BasicScraper):
class TheOrderOfTheStick(_BasicScraper):
- baseUrl = 'http://www.giantitp.com/'
- url = baseUrl + 'comics/oots0863.html'
- stripUrl = baseUrl + 'comics/oots%s.html'
+ url = 'http://www.giantitp.com/'
+ stripUrl = url + 'comics/oots%s.html'
firstStripUrl = stripUrl % '0001'
imageSearch = compile(r'')
prevSearch = compile(r'')
- starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
+ latestSearch = compile(tagre("a", "href", r'([^"]+)',
+ after='class="timestamp"'))
+ starter = indirectStarter()
adult = True
indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py
index 3feb65f0e..c1c4e87d5 100644
--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@@ -21,12 +21,10 @@ class Underling(_WordPressScraper):
class Undertow(_BasicScraper):
url = 'http://undertow.dreamshards.org/'
- stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
prevSearch = compile(r'href="(.+?)".+?teynpoint')
- help = 'Index format: good luck !'
- starter = indirectStarter(url,
- compile(r'href="(.+?)".+?Most recent page'))
+ latestSearch = compile(r'href="(.+?)".+?Most recent page')
+ starter = indirectStarter()
class UnicornJelly(_BasicScraper):
@@ -46,9 +44,10 @@ class Unsounded(_BasicScraper):
rurl = escape(url)
imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
- starter = indirectStarter(
- url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
- tagre("img", "src", r"%simages/newpages\.png" % rurl)))
+ latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
+ tagre("img", "src",
+ r"%simages/newpages\.png" % rurl))
+ starter = indirectStarter()
help = 'Index format: chapter-number'
def getIndexStripUrl(self, index):
diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py
index a28f2bb8f..157e853f8 100644
--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
class WebDesignerCOTW(_BasicScraper):
url = 'http://www.webdesignerdepot.com/'
rurl = escape(url)
- starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
+ starter = indirectStarter()
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
imageSearch = (
@@ -57,6 +57,7 @@ class WebDesignerCOTW(_BasicScraper):
multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
before='prev', quote="'"))
+ latestSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl))
help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url, data):
diff --git a/dosagelib/plugins/wordpress.py b/dosagelib/plugins/wordpress.py
index fd3665e9f..7683cb73b 100644
--- a/dosagelib/plugins/wordpress.py
+++ b/dosagelib/plugins/wordpress.py
@@ -4,13 +4,13 @@ from ..scraper import make_scraper
from .common import _WordPressScraper
-def add(name, url, starter=None):
+def add(name, start):
attrs = dict(
name=name,
- url=url
+ url='http://hijinksensue.com/',
+ latestSearch=start,
+ starter=indirectStarter()
)
- if starter:
- attrs['starter'] = starter
globals()[name] = make_scraper(name, _WordPressScraper, **attrs)
@@ -22,4 +22,4 @@ for (name, starterXPath) in [
('HijinksEnsueConvention', '//h4[text()="Latest Fancy Convention Sketches"]/..//a'),
('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a')
]:
- add(name, 'http://hijinksensue.com/', starter=indirectStarter('http://hijinksensue.com/', starterXPath))
+ add(name, starterXPath)
diff --git a/dosagelib/plugins/x.py b/dosagelib/plugins/x.py
index 8ec8a7d21..7fa17322c 100644
--- a/dosagelib/plugins/x.py
+++ b/dosagelib/plugins/x.py
@@ -1,6 +1,9 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function
from re import compile
@@ -12,13 +15,13 @@ from ..util import tagre
class Xkcd(_BasicScraper):
name = 'xkcd'
url = 'http://xkcd.com/'
- starter = bounceStarter(url, compile(tagre("a", "href", r'(/\d+/)',
- before="next")))
+ starter = bounceStarter()
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src",
r'(//imgs\.xkcd\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
+ nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
help = 'Index format: n (unpadded)'
textSearch = compile(tagre("img", "title", r'([^"]+)',
before=r'//imgs\.xkcd\.com/comics/'))
diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py
index 02326eda1..07aa54dc6 100644
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@@ -22,15 +22,16 @@ class ZapComic(_ParserScraper):
class Zapiro(_BasicScraper):
url = 'http://www.mg.co.za/zapiro/'
- starter = bounceStarter(
- url, compile(tagre("li", "class", r'nav_older') +
- tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')))
+ starter = bounceStarter()
stripUrl = 'http://mg.co.za/cartoon/%s'
firstStripUrl = stripUrl % 'zapiro_681'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
prevSearch = compile(tagre("li", "class", r'nav_older') +
tagre("a", "href",
r'(http://mg\.co\.za/cartoon/[^"]+)'))
+ nextSearch = compile(tagre("li", "class", r'nav_older') +
+ tagre("a", "href",
+ r'(http://mg\.co\.za/cartoon/[^"]+)'))
help = 'Index format: yyyy-mm-dd-stripname'
@classmethod