Read starter parameters from class.

This allows to specify starters in a more declarative and dynamic way.
2016-04-12 23:11:39 +02:00 · 2016-04-12 23:11:39 +02:00 · 42e43fa4e6
commit 42e43fa4e6
parent b865a171f9
23 changed files with 186 additions and 140 deletions
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@ -1,8 +1,13 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function
+
 from .util import getQueryParams

+
 def queryNamer(paramName, usePageUrl=False):
    """Get name from URL query part."""
    @classmethod
@ -25,23 +30,32 @@ def regexNamer(regex, usePageUrl=False):
    return _namer


-def bounceStarter(url, nextSearch):
-    """Get start URL by "bouncing" back and forth one time."""
+def bounceStarter():
+    """Get start URL by "bouncing" back and forth one time.
+
+    This needs the url and nextSearch properties be defined on the class.
+    """
    @classmethod
    def _starter(cls):
        """Get bounced start URL."""
-        data = cls.getPage(url)
-        url1 = cls.fetchUrl(url, data, cls.prevSearch)
+        data = cls.getPage(cls.url)
+        url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
        data = cls.getPage(url1)
-        return cls.fetchUrl(url1, data, nextSearch)
+        return cls.fetchUrl(url1, data, cls.nextSearch)
    return _starter


-def indirectStarter(url, latestSearch):
-    """Get start URL by indirection."""
+def indirectStarter():
+    """Get start URL by indirection.
+
+    This is useful for comics where the latest comic can't be reached at a
+    stable URL. If the class has an attribute 'startUrl', this page is fetched
+    first, otherwise the page at 'url' is fetched. After that, the attribute
+    'latestSearch' is used on the page content to find the latest strip."""
    @classmethod
    def _starter(cls):
        """Get indirect start URL."""
+        url = cls.startUrl if hasattr(cls, "startUrl") else cls.url
        data = cls.getPage(url)
-        return cls.fetchUrl(url, data, latestSearch)
+        return cls.fetchUrl(url, data, cls.latestSearch)
    return _starter
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@ -16,8 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH
 class AbstruseGoose(_BasicScraper):
    url = 'http://abstrusegoose.com/'
    rurl = escape(url)
-    starter = bounceStarter(
-        url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next &raquo;"))
+    starter = bounceStarter()
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre('img', 'src',
@ -81,7 +80,6 @@ class AfterStrife(_WordPressScraper):

 class AGirlAndHerFed(_BasicScraper):
    url = 'http://www.agirlandherfed.com/'
-    starter = bounceStarter(url, compile(r'<a href="([^"]+)">[^>]+Back'))
    stripUrl = url + '1.%s.html'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)'))
@ -114,7 +112,6 @@ class ALessonIsLearned(_BasicScraper):
    url = 'http://www.alessonislearned.com/'
    prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)",
                               quote="'") + r"[^>]+previous")
-    starter = indirectStarter(url, prevSearch)
    stripUrl = url + 'index.php?comic=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r"(cmx/lesson\d+\.[a-z]+)"))
@ -124,8 +121,8 @@ class ALessonIsLearned(_BasicScraper):
 class Alice(_WordPressScraper):
    url = 'http://www.alicecomics.com/'
    prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
-    starter = indirectStarter('http://www.alicecomics.com/',
-                              '//a[text()="Latest Alice!"]')
+    latestSearch = '//a[text()="Latest Alice!"]'
+    starter = indirectStarter()


 class AlienLovesPredator(_BasicScraper):
@ -264,7 +261,8 @@ class ARedTailsDream(_BasicScraper):
 class Ashes(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/'
    firstStripUrl = url
-    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+    latestSearch = WP_LATEST_SEARCH
+    starter = indirectStarter()


 class ASkeweredParadise(_BasicScraper):
@ -289,12 +287,13 @@ class ASofterWorld(_ParserScraper):
 class AstronomyPOTD(_ParserScraper):
    baseUrl = 'http://apod.nasa.gov/apod/'
    url = baseUrl + 'astropix.html'
-    starter = bounceStarter(url, '//a[text()=">"]')
+    starter = bounceStarter()
    stripUrl = baseUrl + 'ap%s.html'
    firstStripUrl = stripUrl % '061012'
    imageSearch = '//a/img'
    multipleImagesPerStrip = True
    prevSearch = '//a[text()="<"]'
+    nextSearch = '//a[text()=">"]'
    help = 'Index format: yymmdd'

    def shouldSkipUrl(self, url, data):
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@ -47,8 +47,8 @@ class BalderDash(_ComicControlScraper):

 class Bardsworth(_WordPressScraper):
    url = 'http://www.bardsworth.com/'
-    starter = indirectStarter('http://www.bardsworth.com/',
-                              '//a[@rel="bookmark"]')
+    latestSearch = '//a[@rel="bookmark"]'
+    starter = indirectStarter()


 class Baroquen(_BasicScraper):
@ -72,12 +72,15 @@ class Beetlebum(_BasicScraper):
    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile'
-    starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark')))
+    starter = indirectStarter()
    multipleImagesPerStrip = True
    imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)'))
    prevSearch = compile(tagre('a', 'href',
                               r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl,
                               after='prev'))
+    latestSearch = compile(tagre('a', 'href',
+                                 r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl,
+                                 after='bookmark'))
    help = 'Index format: yyyy/mm/dd/striptitle'
    lang = 'de'

@ -223,7 +226,8 @@ class BoredAndEvil(_BasicScraper):
    firstStripUrl = stripUrl % '2004-06-07'
    imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)'))
    prevSearch = compile(r'First Comic.+<a href="(.+?)".+previous-on.gif')
-    starter = indirectStarter(url, prevSearch)
+    latestSearch = prevSearch
+    starter = indirectStarter()
    help = 'Index format: yyyy-mm-dd'


--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -55,7 +55,7 @@ class Carciphona(_BasicScraper):
                               after="prevarea"))
    latestSearch = compile(tagre("a", "href",
                                 r'(view\.php\?page=[0-9]+[^"]*)'))
-    starter = indirectStarter(url, latestSearch)
+    starter = indirectStarter()

    @classmethod
    def namer(cls, imageUrl, pageUrl):
@ -275,10 +275,11 @@ class CoolCatStudio(_BasicScraper):

 class CorydonCafe(_ParserScraper):
    url = 'http://corydoncafe.com/'
-    starter = indirectStarter(url, '//ul//a')
+    starter = indirectStarter()
    stripUrl = url + '%s.php'
    imageSearch = "//center[2]//img"
    prevSearch = '//a[@title="prev"]'
+    latestSearch = '//ul//a'
    help = 'Index format: yyyy/stripname'

    @classmethod
@ -345,14 +346,15 @@ class CucumberQuest(_BasicScraper):
    rurl = escape(url)
    stripUrl = url + 'cq/%s/'
    firstStripUrl = stripUrl % 'page-1'
-    starter = indirectStarter(url + 'recent.html',
-                              compile(r'window\.location="(/cq/[^"]+/)"'))
+    startUrl = url + 'recent.html'
+    starter = indirectStarter()
    imageSearch = (
        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)),
    )
    prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous"))
+    latestSearch = compile(r'window\.location="(/cq/[^"]+/)"')
    help = 'Index format: stripname'


@ -377,11 +379,12 @@ class Curvy(_ParserScraper):

 class CyanideAndHappiness(_BasicScraper):
    url = 'http://www.explosm.net/comics/'
-    starter = bounceStarter(url, compile(tagre("a", "href", r"(/comics/\d+/)", after="next-comic")))
+    starter = bounceStarter()
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '15'
    imageSearch = compile(tagre("img", "src", r'(//files.explosm.net/comics/[^"]+)', before="main-comic"))
    prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', after="previous-comic"))
+    nextSearch = compile(tagre("a", "href", r"(/comics/\d+/)", after="next-comic"))
    help = 'Index format: n (unpadded)'

    def shouldSkipUrl(self, url, data):
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -13,17 +13,12 @@ from ..util import tagre
 from .common import _WordPressScraper, xpath_class


-class DailyDose(_BasicScraper):
+class DailyDose(_ParserScraper):
    url = 'http://dailydoseofcomics.com/'
-    starter = indirectStarter(
-        url, compile(tagre("a", "href",
-                           r'(http://dailydoseofcomics\.com/[^"]+)',
-                           after="preview")))
-    stripUrl = url + '%s/'
-    imageSearch = compile(tagre("img", "src", r'([^"]+)',
-                                before="align(?:none|center)"))
-    prevSearch = compile(tagre("a", "href", r'(http://dailydoseofcomics\.com/[^"]+)', after="prev"))
-    help = 'Index format: stripname'
+    starter = indirectStarter()
+    imageSearch = '//p/a/img'
+    prevSearch = '//a[@rel="prev"]'
+    latestSearch = '//a[@rel="bookmark"]'


 class DamnLol(_BasicScraper):
@ -31,13 +26,13 @@ class DamnLol(_BasicScraper):
    rurl = escape(url)
    stripUrl = url + '%s.html'
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev"))
+    nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next"))
    imageSearch = (
        compile(tagre("img", "src", r'(%si/[^"]+)' % rurl)),
        compile(tagre("img", "src", r'(%spics/[^"]+)' % rurl)),
    )
    help = 'Index format: stripname-number'
-    starter = bounceStarter(
-        url, compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="next")))
+    starter = bounceStarter()

    @classmethod
    def namer(cls, imageUrl, pageUrl):
@ -160,9 +155,12 @@ class Dilbert(_BasicScraper):
    url = 'http://dilbert.com/'
    stripUrl = url + '/strip/%s/'
    firstStripUrl = stripUrl % '1989-04-16'
-    starter = indirectStarter(url, compile(tagre("a", "href", r'(http://dilbert.com/strip/[0-9-]*)', after="Click to see")))
+    starter = indirectStarter()
    prevSearch = compile(tagre("a", "href", r'(/strip/\d+-\d+-\d+)', after="Older Strip"))
    imageSearch = compile(tagre("img", "src", r'(http://assets.amuniversal.com/\w+)'))
+    latestSearch = compile(tagre("a", "href",
+                                 r'(http://dilbert.com/strip/[0-9-]*)',
+                                 after="Click to see"))
    help = 'Index format: yyyy-mm-dd'

    @classmethod
@ -254,9 +252,10 @@ class DresdenCodak(_BasicScraper):
    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
                         tagre("img", "src", r"%sm_prev2?\.png" % rurl,
                               quote=""))
-    starter = indirectStarter(
-        url, compile(tagre("div", "id", "preview") +
-                     tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl)))
+    latestSearch = compile(tagre("div", "id", "preview") +
+                           tagre("a", "href",
+                                 r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
+    starter = indirectStarter()


 class DrFun(_BasicScraper):
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -15,9 +15,10 @@ from .common import _WordPressScraper, WP_LATEST_SEARCH, xpath_class

 class EarthsongSaga(_ParserScraper):
    url = 'http://earthsongsaga.com/index.php'
-    starter = indirectStarter(url, '//div[@id="leftmenu"]/span[1]/a[1]')
+    starter = indirectStarter()
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[@title="Previous"]'
+    latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]'

    @classmethod
    def fetchUrls(cls, url, data, urlSearch):
@ -43,21 +44,23 @@ class EarthsongSaga(_ParserScraper):
 class EasilyAmused(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/college-daze/ea01/'
    firstStripUrl = url
-    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+    latestSearch = WP_LATEST_SEARCH
+    starter = indirectStarter()


 class EatLiver(_BasicScraper):
    url = 'http://www.eatliver.com/'
    rurl = escape(url)
-    starter = indirectStarter(url, compile(
-        tagre("a", "href", r'(i\.php\?n=\d+)') +
-        tagre("img", "src", r'img/small/[^"]+') + r"</a>\s*<br"))
+    starter = indirectStarter()
    stripUrl = url + "i.php?n=%s"
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
                                before="image_src"))
    prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
                         "&#060;&#060; Previous")
+    latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
+                           tagre("img", "src", r'img/small/[^"]+') +
+                           r"</a>\s*<br")


 class EatThatToast(_BasicScraper):
@ -181,7 +184,8 @@ class Erstwhile(_WordPressScraper):
 class Eryl(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/prologue-migration/page-i/'
    firstStripUrl = url
-    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+    latestSearch = WP_LATEST_SEARCH
+    starter = indirectStarter()
    help = 'This was known as DarkWings in previous Dosage versions'


--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -26,10 +26,9 @@ class FantasyRealms(_BasicScraper):
    stripUrl = url + 'manga/%s.php'
    imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
    prevSearch = compile(r'<a href="(.+?)"><img src="../images/nav-back.gif"', IGNORECASE)
+    latestSearch = compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE)
    help = 'Index format: nnn'
-    starter = indirectStarter(
-        url,
-        compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
+    starter = indirectStarter()


 class FauxPas(_BasicScraper):
@ -47,8 +46,9 @@ class FeyWinds(_BasicScraper):
    stripUrl = baseUrl + 'comic/page.php?id=%s'
    imageSearch = compile(r"(../comic/pages//.+?)'")
    prevSearch = compile(r"(page.php\?id=.+?)'.+?navprevious.png")
+    latestSearch = compile(r'(comic/page.php\?id.+?)"')
    help = 'Index format: n (unpadded)'
-    starter = indirectStarter(url, compile(r'(comic/page.php\?id.+?)"'))
+    starter = indirectStarter()


 class FilibusterCartoons(_BasicScraper):
@ -159,9 +159,9 @@ class FredoAndPidjin(_BasicScraper):
    )
    multipleImagesPerStrip = True
    prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev</a>")
-    starter = indirectStarter(
-       url,
-       compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
+    latestSearch = compile(tagre('a', 'href', "(" + url +
+                                 r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))
+    starter = indirectStarter()


 class Freefall(_BasicScraper):
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -27,15 +27,15 @@ class Garanos(_BasicScraper):
    baseUrl = 'http://garanos.alexheberling.com/'
    rurl = escape(baseUrl)
    url = baseUrl + 'pages/page-1/'
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
-                     after="nav-last")))
+    starter = indirectStarter()
    stripUrl = baseUrl + 'pages/page-%s'
    imageSearch = compile(
        tagre("img", "src",
              r'(%swp-content/uploads/sites/\d+/\d+/\d+/[^"]+)' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
                               after="prev"))
+    latestSearch = compile(tagre("a", "href", r'(%spages/[^"]+)' % rurl,
+                                 after="nav-last"))
    help = 'Index format: n (unpadded)'


@ -136,14 +136,14 @@ class GoGetARoomie(_ComicControlScraper):

 class GoneWithTheBlastwave(_BasicScraper):
    url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1'
-    starter = indirectStarter(
-        url, compile(r'href="(index.php\?p=comic&amp;nro=\d+)">' +
-                     r'<img src="images/page/default/latest'))
+    starter = indirectStarter()
    stripUrl = url[:-1] + '%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(r'<img.+src=".+(/comics/.+?)"')
    prevSearch = compile(r'href="(index.php\?p=comic&amp;nro=\d+)">' +
                         r'<img src="images/page/default/previous')
+    latestSearch = compile(r'href="(index.php\?p=comic&amp;nro=\d+)">' +
+                           r'<img src="images/page/default/latest')
    help = 'Index format: n'

    @classmethod
--- a/dosagelib/plugins/h.py
+++ b/dosagelib/plugins/h.py
@ -41,15 +41,15 @@ class _HappyJar(_WordPressScraper):
 class HarkAVagrant(_BasicScraper):
    url = 'http://www.harkavagrant.com/'
    rurl = escape(url)
-    starter = bounceStarter(
-        url, compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
-                     tagre("img", "src", "buttonnext.png")))
+    starter = bounceStarter()
    stripUrl = url + 'index.php?id=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl,
                                after='BORDER'))
    prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
                         tagre("img", "src", "buttonprevious.png"))
+    nextSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) +
+                         tagre("img", "src", "buttonnext.png"))
    help = 'Index format: number'

    @classmethod
--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -1,8 +1,12 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function

 from re import compile, escape
+
 from ..scraper import _BasicScraper
 from ..util import tagre
 from ..helpers import indirectStarter
@ -30,9 +34,12 @@ class JerkCity(_BasicScraper):
 class JimBenton(_BasicScraper):
    url = 'http://www.jimbenton.com/page14/page14.html'
    stripUrl = 'http://www.jimbenton.com/page14/files/JimBentonComic-%s.html'
-    starter = indirectStarter(url, compile(tagre("a", "href", r'(files/JimBentonComic-[^>]+\.html)', quote="")))
-    imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)', before="photo-frame"))
-    prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)', quote="") + "Next")
+    starter = indirectStarter()
+    imageSearch = compile(tagre("img", "src", r'(JimBentonComic-[^"]+)',
+                                before="photo-frame"))
+    prevSearch = compile(tagre("a", "href", r'(JimBentonComic-[^>]+\.html)',
+                               quote="") + "Next")
+    latestSearch = compile(tagre("a", "href", r'(files/JimBentonComic-[^>]+\.html)', quote=""))
    help = 'Index format: stripname'


@ -58,6 +65,7 @@ class JustAnotherEscape(_BasicScraper):
    rurl = escape(url)
    stripUrl = url + 'index.cgi?date=%s'
    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s/index\.cgi\?date=\d+)' % rurl)
-     + tagre("img", "alt", "Previous Comic"))
+    prevSearch = compile(tagre("a", "href",
+                               r'(%s/index\.cgi\?date=\d+)' % rurl) +
+                         tagre("img", "alt", "Previous Comic"))
    help = 'Index format: yyyymmdd'
--- a/dosagelib/plugins/k.py
+++ b/dosagelib/plugins/k.py
@ -9,7 +9,6 @@ from re import compile, escape, IGNORECASE

 from ..scraper import _BasicScraper
 from ..util import tagre
-from ..helpers import indirectStarter
 from .common import _ComicControlScraper, _WordPressScraper, xpath_class


@ -81,4 +80,3 @@ class KuroShouri(_BasicScraper):
        tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl,
              after="previous"))
    help = 'Index format: chapter-n-page-m'
-    starter = indirectStarter(url, prevSearch)
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -21,10 +21,10 @@ class Lackadaisy(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(http://www\.lackadaisycats\.com/comic/[^"]*)'))
    prevSearch = compile(tagre("a", "href", r"(/comic\.php\?comicid=[0-9]+)") +
                         "&lt; Previous")
+    nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
+                         "Next")
    help = 'Index format: n'
-    starter = bounceStarter(
-        url, compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") +
-                     "Next"))
+    starter = bounceStarter()

    @classmethod
    def namer(cls, imageUrl, pageUrl):
@ -37,7 +37,8 @@ class Lackadaisy(_BasicScraper):
 class Laiyu(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/'
    firstStripUrl = url
-    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+    latestSearch = WP_LATEST_SEARCH
+    starter = indirectStarter()


 class LasLindas(_BasicScraper):
@ -64,9 +65,9 @@ class LeastICouldDo(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d{8,9}\.\w{1,4})' % rurl))
    prevSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
                               after="Previous"))
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
-                           after="feature-comic")))
+    latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl,
+                                 after="feature-comic"))
+    starter = indirectStarter()
    help = 'Index format: yyyymmdd'


@ -110,12 +111,11 @@ class LoadingArtist(_ParserScraper):

 class LookingForGroup(_ParserScraper):
    url = 'http://www.lfgcomic.com/'
-    rurl = escape(url)
    stripUrl = url + 'page/%s/'
    firstStripUrl = stripUrl % '1'
    css = True
    imageSearch = '#comic img'
    prevSearch = '#comic-left > a'
-    starter = indirectStarter(url, '#header-dropdown-comic-lfg > a:nth-of-type(2)')
-    nameSearch = compile(r'/page/([-0-9]+)/')
+    latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)'
+    starter = indirectStarter()
    help = 'Index format: nnn'
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@ -102,9 +102,9 @@ class NichtLustig(_BasicScraper):
    lang = 'de'
    imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
    prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
+    latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))
    help = 'Index format: yymmdd'
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
+    starter = indirectStarter()


 class Nicky510(_WordPressScraper):
@ -136,7 +136,8 @@ class NobodyScores(_BasicScraper):
 class NoMoreSavePoints(_WordPressScraper):
    url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/'
    firstStripUrl = url
-    starter = indirectStarter(firstStripUrl, WP_LATEST_SEARCH)
+    latestSearch = WP_LATEST_SEARCH
+    starter = indirectStarter()


 class NoNeedForBushido(_BasicScraper):
@ -149,10 +150,10 @@ class NoNeedForBushido(_BasicScraper):
            after="attachment-full"))
    prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
                               after="previous-webcomic"))
+    latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
+                                 after="last-webcomic"))
    help = 'Index format: nnn'
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl,
-                           after="last-webcomic")))
+    starter = indirectStarter()


 class NotInventedHere(_BasicScraper):
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -8,7 +8,6 @@ from __future__ import absolute_import, division, print_function
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter
 from ..util import tagre
 from .common import _WordPressScraper, xpath_class

@ -53,7 +52,6 @@ class OkCancel(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl))
    prevSearch = compile(tagre("div", "class", "previous") +
                         tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl))
-    starter = indirectStarter(url, prevSearch)
    help = 'Index format: yyyymmdd'


--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -20,12 +20,13 @@ class PandyLand(_WordPressScraper):

 class ParadigmShift(_BasicScraper):
    url = 'http://www.paradigmshiftmanga.com/'
-    starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)',
-                                                 after="next-comic-link")))
+    starter = indirectStarter()
    stripUrl = url + 'ps/%s.html'
    imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)',
                               after="previous-comic-link"))
+    latestSearch = compile(tagre("a", "href", r'([^"]+)',
+                                 after="next-comic-link"))
    help = 'Index format: custom'


@ -72,7 +73,6 @@ class PennyAndAggie(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r"(index\.php\?p\=\d+)", quote="'") +
                         tagre("img", "src", r'%simages/previous_day\.gif' % rurl, quote=""))
-    starter = indirectStarter(url, prevSearch)
    help = 'Index format: n (unpadded)'


@ -162,11 +162,12 @@ class PicPakDog(_BasicScraper):

 class PiledHigherAndDeeper(_BasicScraper):
    url = 'http://www.phdcomics.com/comics.php'
-    starter = bounceStarter(url, compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif'))
+    starter = bounceStarter()
    stripUrl = url + '?comicid=%s'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote=""))
    prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
+    nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
    help = 'Index format: n (unpadded)'
    namer = queryNamer('comicid', usePageUrl=True)

@ -204,9 +205,9 @@ class PokeyThePenguin(_ParserScraper):
    stripUrl = url + 'index%s.html'
    firstStripUrl = stripUrl % '1'
    imageSearch = '//p/img'
-    prevSearch = True
+    latestSearch = '(//a)[last()]'
    multipleImagesPerStrip = True
-    starter = indirectStarter(url, "(//a)[last()]")
+    starter = indirectStarter()
    help = 'Index format: number'

    def getPrevUrl(self, url, data):
@ -230,22 +231,22 @@ class PoorlyDrawnLines(_BasicScraper):

 class Precocious(_BasicScraper):
    url = 'http://www.precociouscomic.com/'
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
-                     tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
-    )
+    starter = indirectStarter()
    stripUrl = url + 'archive/comic/%s'
    imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
    prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
+    latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
+                           tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
    help = 'Index format: yyyy/mm/dd'


 class PS238(_ParserScraper):
    url = 'http://ps238.nodwick.com/'
    stripUrl = url + '/comic/%s/'
-    starter = bounceStarter(url, '//a[@class="comic-nav-base comic-nav-next"]')
+    starter = bounceStarter()
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]'
+    nextSearch = '//a[@class="comic-nav-base comic-nav-next"]'
    help = 'Index format: yyyy-mm-dd'


--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -1,10 +1,13 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function

 from re import compile, escape
-from ..scraper import _BasicScraper
-from ..scraper import _ParserScraper
+
+from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import bounceStarter
 from ..util import tagre

@ -16,6 +19,7 @@ class RadioactivePanda(_BasicScraper):
    prevSearch = compile(r'<a href="(/comic/.*?)".+?previous_btn')
    help = 'Index format: n (no padding)'

+
 class RalfTheDestroyer(_ParserScraper):
    url = 'http://ralfthedestroyer.com/'
    stripUrl = url + '%s/'
@ -47,12 +51,12 @@ class RealmOfAtland(_BasicScraper):
 class RedMeat(_BasicScraper):
    baseUrl = 'http://www.redmeat.com/redmeat/'
    url = baseUrl + 'current/index.html'
-    starter = bounceStarter(url,
-        compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="next")))
+    starter = bounceStarter()
    stripUrl = baseUrl + '%s/index.html'
    firstStripUrl = stripUrl % '1996-06-10'
    imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)'))
    prevSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="prev"))
+    nextSearch = compile(tagre("a", "href", r'(http://www\.redmeat\.com/[^"]*)', after="next"))
    help = 'Index format: yyyy-mm-dd'


@ -81,7 +85,8 @@ class RomanticallyApocalyptic(_BasicScraper):
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src", r'(%sart/\d+[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s\d+[^"]+)' % rurl)+"\s*"+tagre('span', 'class', 'spritePrevious'))
+    prevSearch = compile(tagre("a", "href", r'(%s\d+[^"]+)' % rurl) + "\s*" +
+                         tagre('span', 'class', 'spritePrevious'))
    help = 'Index format: n'
    adult = True

@ -101,5 +106,6 @@ class Ruthe(_BasicScraper):
    firstStripUrl = stripUrl % '1'
    lang = 'de'
    imageSearch = compile(tagre("img", "src", r'(/?cartoons/strip_\d+[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)')+'vorheriger')
+    prevSearch = compile(tagre("a", "href", r'(/cartoon/\d+/datum/asc/)') +
+                         'vorheriger')
    help = 'Index format: number'
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -69,9 +69,10 @@ class ScandinaviaAndTheWorld(_ParserScraper):
    url = 'http://satwcomic.com/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
-    starter = indirectStarter(url, '//a[text()="View latest comic"]')
+    starter = indirectStarter()
    imageSearch = '//img[@itemprop="image"]'
    prevSearch = '//a[@accesskey="p"]'
+    latestSearch = '//a[text()="View latest comic"]'
    textSearch = '//span[@itemprop="articleBody"]'
    help = 'Index format: stripname'

@ -194,9 +195,9 @@ class SexyLosers(_BasicScraper):
    stripUrl = url + '%s.html'
    imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
    prevSearch = compile(r'<a href="(/\d{3}\.\w+?)"><font color = FFAAAA><<', IGNORECASE)
+    latestSearch = compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE)
    help = 'Index format: nnn'
-    starter = indirectStarter(url,
-                              compile(r'SEXY LOSERS <A HREF="(.+?)">Latest SL Comic \(#\d+\)</A>', IGNORECASE))
+    starter = indirectStarter()

    @classmethod
    def namer(cls, imageUrl, pageUrl):
@ -333,7 +334,8 @@ class SnowFlame(_WordPressScraper):
    url = 'http://www.snowflamecomic.com/'
    stripUrl = url + '?comic=snowflame-%s-%s'
    firstStripUrl = stripUrl % ('01', '01')
-    starter = bounceStarter(url, WP_LATEST_SEARCH)
+    starter = bounceStarter()
+    nextSearch = WP_LATEST_SEARCH
    help = 'Index format: chapter-page'

    def getIndexStripUrl(self, index):
@ -392,8 +394,9 @@ class Spamusement(_BasicScraper):
    imageSearch = compile(r'<img src="(%sgfx/\d+\..+?)"' % rurl, IGNORECASE)
    prevSearch = compile(r'<a href="(%sindex.php/comics/view/.+?)">' % rurl,
                         IGNORECASE)
+    latestSearch = prevSearch
    help = 'Index format: n (unpadded)'
-    starter = indirectStarter(url, prevSearch)
+    starter = indirectStarter()


 class SpareParts(_BasicScraper):
@ -504,8 +507,7 @@ class StuffNoOneToldMe(_BasicScraper):
    stripUrl = url + '%s.html'
    firstStripUrl = stripUrl % '2010/05/01'
    olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)"
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", olderHref, quote="'")))
+    starter = indirectStarter()
    imageSearch = (
        compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') +
                r"(?:</a>|<br />)"),
@ -515,6 +517,7 @@ class StuffNoOneToldMe(_BasicScraper):
    )
    prevSearch = compile(tagre("a", "href", olderHref, quote="'",
                               before="older-link"))
+    latestSearch = compile(tagre("a", "href", olderHref, quote="'"))
    multipleImagesPerStrip = True
    help = 'Index format: yyyy/mm/stripname'

--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -76,14 +76,14 @@ class TheNoob(_BasicScraper):


 class TheOrderOfTheStick(_BasicScraper):
-    baseUrl = 'http://www.giantitp.com/'
-    url = baseUrl + 'comics/oots0863.html'
-    stripUrl = baseUrl + 'comics/oots%s.html'
+    url = 'http://www.giantitp.com/'
+    stripUrl = url + 'comics/oots%s.html'
    firstStripUrl = stripUrl % '0001'
    imageSearch = compile(r'<IMG src="(/comics/images/[^"]+)">')
    prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
+    latestSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"')
    help = 'Index format: n (unpadded)'
-    starter = indirectStarter(baseUrl, compile(r'<A href="(/comics/oots\d{4}\.html)"'))
+    starter = indirectStarter()

    @classmethod
    def namer(cls, imageUrl, pageUrl):
@ -116,9 +116,13 @@ class TheThinHLine(_BasicScraper):
    rurl = escape(url)
    stripUrl = url + 'post/%s'
    firstStripUrl = stripUrl % '3517345105'
-    imageSearch = compile(tagre('img', 'data-src', r'([^"]+media.tumblr.com/[^"]+)', before='content-image'))
+    imageSearch = compile(tagre('img', 'data-src',
+                                r'([^"]+media.tumblr.com/[^"]+)',
+                                before='content-image'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)') + '&gt;</a>')
-    starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')))
+    latestSearch = compile(tagre("a", "href", r'([^"]+)',
+                                 after='class="timestamp"'))
+    starter = indirectStarter()
    adult = True

    indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@ -21,12 +21,10 @@ class Underling(_WordPressScraper):

 class Undertow(_BasicScraper):
    url = 'http://undertow.dreamshards.org/'
-    stripUrl = url + '%s'
    imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
    prevSearch = compile(r'href="(.+?)".+?teynpoint')
-    help = 'Index format: good luck !'
-    starter = indirectStarter(url,
-                              compile(r'href="(.+?)".+?Most recent page'))
+    latestSearch = compile(r'href="(.+?)".+?Most recent page')
+    starter = indirectStarter()


 class UnicornJelly(_BasicScraper):
@ -46,9 +44,10 @@ class Unsounded(_BasicScraper):
    rurl = escape(url)
    imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)'))
    prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back'))
-    starter = indirectStarter(
-        url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
-                     tagre("img", "src", r"%simages/newpages\.png" % rurl)))
+    latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) +
+                           tagre("img", "src",
+                                 r"%simages/newpages\.png" % rurl))
+    starter = indirectStarter()
    help = 'Index format: chapter-number'

    def getIndexStripUrl(self, index):
--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper):
 class WebDesignerCOTW(_BasicScraper):
    url = 'http://www.webdesignerdepot.com/'
    rurl = escape(url)
-    starter = indirectStarter(url, compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl)))
+    starter = indirectStarter()
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1'
    imageSearch = (
@ -57,6 +57,7 @@ class WebDesignerCOTW(_BasicScraper):
    multipleImagesPerStrip = True
    prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl,
                               before='prev', quote="'"))
+    latestSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl))
    help = 'Index format: yyyy/mm/stripname'

    def shouldSkipUrl(self, url, data):
--- a/dosagelib/plugins/wordpress.py
+++ b/dosagelib/plugins/wordpress.py
@ -4,13 +4,13 @@ from ..scraper import make_scraper
 from .common import _WordPressScraper


-def add(name, url, starter=None):
+def add(name, start):
    attrs = dict(
        name=name,
-        url=url
+        url='http://hijinksensue.com/',
+        latestSearch=start,
+        starter=indirectStarter()
    )
-    if starter:
-        attrs['starter'] = starter
    globals()[name] = make_scraper(name, _WordPressScraper, **attrs)


@ -22,4 +22,4 @@ for (name, starterXPath) in [
    ('HijinksEnsueConvention', '//h4[text()="Latest Fancy Convention Sketches"]/..//a'),
    ('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a')
 ]:
-    add(name, 'http://hijinksensue.com/', starter=indirectStarter('http://hijinksensue.com/', starterXPath))
+    add(name, starterXPath)
--- a/dosagelib/plugins/x.py
+++ b/dosagelib/plugins/x.py
@ -1,6 +1,9 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function

 from re import compile

@ -12,13 +15,13 @@ from ..util import tagre
 class Xkcd(_BasicScraper):
    name = 'xkcd'
    url = 'http://xkcd.com/'
-    starter = bounceStarter(url, compile(tagre("a", "href", r'(/\d+/)',
-                                               before="next")))
+    starter = bounceStarter()
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '1'
    imageSearch = compile(tagre("img", "src",
                                r'(//imgs\.xkcd\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
+    nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
    help = 'Index format: n (unpadded)'
    textSearch = compile(tagre("img", "title", r'([^"]+)',
                               before=r'//imgs\.xkcd\.com/comics/'))
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -22,15 +22,16 @@ class ZapComic(_ParserScraper):

 class Zapiro(_BasicScraper):
    url = 'http://www.mg.co.za/zapiro/'
-    starter = bounceStarter(
-        url, compile(tagre("li", "class", r'nav_older') +
-                     tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')))
+    starter = bounceStarter()
    stripUrl = 'http://mg.co.za/cartoon/%s'
    firstStripUrl = stripUrl % 'zapiro_681'
    imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
    prevSearch = compile(tagre("li", "class", r'nav_older') +
                         tagre("a", "href",
                               r'(http://mg\.co\.za/cartoon/[^"]+)'))
+    nextSearch = compile(tagre("li", "class", r'nav_older') +
+                         tagre("a", "href",
+                               r'(http://mg\.co\.za/cartoon/[^"]+)'))
    help = 'Index format: yyyy-mm-dd-stripname'

    @classmethod