Fix comics.

2012-12-04 07:02:40 +01:00 · 2012-12-04 07:02:40 +01:00 · 387dff79a9
commit 387dff79a9
parent 45df462a47
33 changed files with 372 additions and 241 deletions
--- a/22
+++ b/22
@ -98,15 +98,19 @@ def getComics(options, comics):
        else:
            strips = scraperobj.getCurrentStrips()
        first = True
-        for strip in strips:
-            _errors, skipped = saveComicStrip(strip, options.basepath)
-            errors += _errors
-            if not first and scraperobj.indexes:
-                # stop when indexed retrieval skipped all images for one
-                # comie strip (except the first one)
-                out.write("Stop retrieval because image file already exists")
-                break
-            first = False
+        try:
+            for strip in strips:
+                _errors, skipped = saveComicStrip(strip, options.basepath)
+                errors += _errors
+                if not first and scraperobj.indexes:
+                    # stop when indexed retrieval skipped all images for one
+                    # comie strip (except the first one)
+                    out.write("Stop retrieval because image file already exists")
+                    break
+                first = False
+        except IOError as msg:
+            out.write('Error getting strip: %s' % msg)
+            errors += 1
    events.getHandler().end()
    return errors

--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@ -1,10 +1,7 @@
 # -*- coding: iso-8859-1 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam
-import re
-
 from .util import fetchUrl, getQueryParams
-from .scraper import _BasicScraper

 def queryNamer(paramName, usePageUrl=False):
    """Get name from URL query part."""
@ -81,19 +78,3 @@ class IndirectLatestMixin(object):

    latestUrl = property(getLatestUrl)

-
-class _PHPScraper(_BasicScraper):
-    """
-    Scraper for comics using phpComic/CUSP.
-
-    This provides an easy way to define scrapers for webcomics using phpComic.
-    """
-    imageUrl = property(lambda self: self.basePath + 'daily.php?date=%s')
-    imageSearch = property(lambda self: re.compile(r'<img alt=[^>]+ src="(%scomics/\d{6}\..+?)">' % (self.basePath,)))
-
-    help = 'Index format: yymmdd'
-
-    @classmethod
-    def starter(cls):
-        """Get starter URL."""
-        return cls.basePath + cls.latestUrl
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -5,8 +5,8 @@
 from re import compile

 from ..scraper import _BasicScraper
-from ..helpers import bounceStarter, indirectStarter
-from ..util import tagre, getQueryParams
+from ..helpers import bounceStarter
+from ..util import tagre


 class CaptainSNES(_BasicScraper):
@ -144,37 +144,6 @@ class Curvy(_BasicScraper):
    help = 'Index format: yyyymmdd'


-def cloneManga(name, shortName, lastStrip=None):
-    url = 'http://manga.clone-army.org'
-    baseUrl = '%s/%s.php' % (url, shortName)
-
-    def namer(self, imageUrl, pageUrl):
-        return '%03d' % int(getQueryParams(pageUrl)['page'][0])
-
-    attrs = dict(
-        name='CloneManga/' + name,
-        stripUrl = baseUrl + '?page=%s',
-        imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
-        prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
-        help='Index format: n',
-        namer=namer,
-    )
-    if lastStrip is None:
-        attrs['starter'] = indirectStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"last\.gif")))
-    else:
-        attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
-    return type('CloneManga_%s' % name, (_BasicScraper,), attrs)
-
-
-anm = cloneManga('AprilAndMay', 'anm')
-kanami = cloneManga('Kanami', 'kanami')
-momoka = cloneManga('MomokaCorner', 'momoka')
-nana = cloneManga('NanasEverydayLife', 'nana', '78')
-pxi = cloneManga('PaperEleven', 'pxi', '311')
-t42r = cloneManga('Tomoyo42sRoom', 't42r')
-penny = cloneManga('PennyTribute', 'penny')
-
-
 class CatAndGirl(_BasicScraper):
    latestUrl = 'http://catandgirl.com/'
    stripUrl = latestUrl + '?p=%s'
--- a/dosagelib/plugins/clonemanga.py
+++ b/dosagelib/plugins/clonemanga.py
@ -0,0 +1,61 @@
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
+# Copyright (C) 2012 Bastian Kleineidam
+from re import compile
+from ..scraper import make_scraper
+from ..util import tagre, getQueryParams, fetchUrl
+
+
+_linkTag = tagre("a", "href", r'([^"]+)')
+_prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
+_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
+_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
+
+def add(name, shortName, imageFolder=None, lastStrip=None):
+    classname = 'CloneManga_%s' % name
+    _url = 'http://manga.clone-army.org'
+    baseUrl = '%s/%s.php' % (_url, shortName)
+    if imageFolder is None:
+        imageFolder = shortName
+
+    @classmethod
+    def namer(cls, imageUrl, pageUrl):
+        return '%03d' % int(getQueryParams(pageUrl)['page'][0])
+
+    @classmethod
+    def _starter(cls):
+        # first, try hopping to previous and next comic
+        url = fetchUrl(baseUrl, _prevSearch)
+        if not url:
+            # no previous link found, try hopping to last comic
+            url = fetchUrl(baseUrl, _lastSearch)
+            if not url:
+                raise ValueError("could not find lastSearch pattern %r in %s" % (_lastSearch.pattern, baseUrl))
+            return url
+        url = fetchUrl(url, _nextSearch)
+        if not url:
+            raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, url))
+        return url
+
+    attrs = dict(
+        name='CloneManga/' + name,
+        stripUrl = baseUrl + '?page=%s',
+        imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
+        prevSearch=_prevSearch,
+        help='Index format: n',
+        namer=namer,
+    )
+    if lastStrip is None:
+        attrs['starter'] = _starter
+    else:
+        attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
+    globals()[classname] = make_scraper(classname, **attrs)
+
+
+add('AprilAndMay', 'anm', imageFolder='AAM')
+add('Kanami', 'kanami')
+add('MomokaCorner', 'momoka')
+add('NanasEverydayLife', 'nana', lastStrip='78')
+add('PaperEleven', 'pxi', imageFolder='papereleven', lastStrip='311')
+add('Tomoyo42sRoom', 't42r')
+add('PennyTribute', 'penny')
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -6,25 +6,26 @@ from re import compile

 from ..scraper import _BasicScraper
 from ..helpers import indirectStarter
-from ..util import tagre, getQueryParams
-
+from ..util import tagre


 class DMFA(_BasicScraper):
    latestUrl = 'http://www.missmab.com/'
    stripUrl = latestUrl + 'Comics/Vol_%s.php'
    imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'([^"])+')+
-      tagre("img", "src", r'(?:../)?Images/comicprev.gif'))
+    multipleImagesPerStrip = True
+    prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
+      tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
    help = 'Index format: nnn (normally, some specials)'


 class DandyAndCompany(_BasicScraper):
    latestUrl = 'http://www.dandyandcompany.com/'
-    stripUrl = latestUrl + '%s'
-    imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[^"]+)'))
-    prevSearch = compile(r'<a href="(.*)" class="prev"')
-    help = 'Index format: yyyy/mm/dd'
+    stripUrl = None
+    multipleImagesPerStrip = True
+    imageSearch = compile(tagre("a", "href", r'(http://\d+\.bp\.blogspot\.com/[^"]+)', after="imageanchor"))
+    prevSearch = compile(tagre("a", "href", r"([^']+)", quote="'", after="Older Posts"))
+    help = 'Index format: none'


 class DarkWings(_BasicScraper):
@ -63,11 +64,11 @@ class DrFun(_BasicScraper):
    latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
    stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
    imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d{6}/df.+?)">')
+    multipleImagesPerStrip = True
    prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
    help = 'Index format: nnnnn'


-
 class Dracula(_BasicScraper):
    latestUrl = 'http://draculacomic.net/'
    stripUrl = latestUrl + 'comic.php?comicID=%s'
@ -76,7 +77,6 @@ class Dracula(_BasicScraper):
    help = 'Index format: nnn'


-
 class DragonTails(_BasicScraper):
    latestUrl = 'http://www.dragon-tails.com/'
    stripUrl = latestUrl + 'archive.php?date=%s'
@ -96,7 +96,7 @@ class DreamKeepersPrelude(_BasicScraper):
 class Drowtales(_BasicScraper):
    latestUrl = 'http://www.drowtales.com/mainarchive.php'
    stripUrl = latestUrl + '?sid=%s'
-    imageSearch = compile(tagre("img", "src", r'("http://www.drowtales.com/mainarchive/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(http://www\.drowtales\.com/mainarchive/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
    help = 'Index format: number'

@ -105,7 +105,8 @@ class DieselSweeties(_BasicScraper):
    latestUrl = 'http://www.dieselsweeties.com/'
    stripUrl = latestUrl + 'archive/%s'
    imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') + tagre("img", "src", r'http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png'))
+    prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') +
+      tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
    help = 'Index format: n (unpadded)'

    @classmethod
@ -118,14 +119,10 @@ class DieselSweeties(_BasicScraper):
 class DominicDeegan(_BasicScraper):
    latestUrl = 'http://www.dominic-deegan.com/'
    stripUrl = latestUrl + 'view.php?date=%s'
-    imageSearch = compile(r'<img src="(.+?save-as=.+?)" alt')
+    imageSearch = compile(tagre("img", "src", r'(comics/\d+\.gif)'))
    prevSearch = compile(r'"(view.php\?date=.+?)".+?prev21')
    help = 'Index format: yyyy-mm-dd'

-    @classmethod
-    def namer(cls, imageUrl, pageUrl):
-        return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
-

 class DorkTower(_BasicScraper):
    latestUrl = 'http://www.dorktower.com/'
--- a/dosagelib/plugins/drunkduck.py
+++ b/dosagelib/plugins/drunkduck.py
@ -35,7 +35,7 @@ def add(name):
            return url
        url = fetchUrl(url, _nextSearch)
        if not url:
-            raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, _url))
+            raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, url))
        return url

    globals()[classname] = make_scraper(classname,
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -20,16 +20,10 @@ class EerieCuties(_BasicScraper):
 class Eriadan(_BasicScraper):
    latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
    stripUrl = latestUrl + '%s'
-    # XXX fix image search
-    imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
+    imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='alt=""'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
    help = 'Index format: yyyy/mm/dd/nnn (unpadded)'

-    @classmethod
-    def namer(cls, imageUrl, pageUrl):
-        return '%d' % (int(compile(r'p=(\d+)').search(pageUrl).group(1)))
-
-

 class ElGoonishShive(_BasicScraper):
    name = 'KeenSpot/ElGoonishShive'
@ -40,7 +34,6 @@ class ElGoonishShive(_BasicScraper):
    help = 'Index format: yyyy-mm-dd'


-
 class ElGoonishShiveNP(_BasicScraper):
    name = 'KeenSpot/ElGoonishShiveNP'
    latestUrl = 'http://www.egscomics.com/egsnp/'
@ -52,12 +45,10 @@ class ElGoonishShiveNP(_BasicScraper):

 class EmergencyExit(_BasicScraper):
    latestUrl = 'http://www.eecomics.net/'
-    stripUrl = None
+    stripUrl = latestUrl + "?strip_id=%s"
    imageSearch = compile(r'"(comics/.+?)"')
-    prevSearch = compile(r'START.+?"(.+?)"')
-    # XXX ?
-    help = 'God help us now!'
-
+    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "alt", r"Prior"))
+    help = 'Index format: n'


 class ErrantStory(_BasicScraper):
@ -102,7 +93,7 @@ class EvilInc(_BasicScraper):
 class Exiern(_BasicScraper):
    latestUrl = 'http://www.exiern.com/'
    stripUrl = latestUrl + '?p=%s'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"])'))
+    imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
    help = 'Index format: n'

@ -154,7 +145,6 @@ class ExploitationNow(_BasicScraper):
 class Ellerbisms(_BasicScraper):
    latestUrl = 'http://www.ellerbisms.com/'
    stripUrl = latestUrl + '?p=%s'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/wp-content/uploads/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
    help = 'Index format: nnn'
-
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -49,12 +49,12 @@ class FlakyPastry(_BasicScraper):
    prevSearch = compile(r'<a href="(.+?)".+?btn_back')
    help = 'Index format: nnnn'

-# XXX move to keenspot
+
 class Flipside(_BasicScraper):
    latestUrl = 'http://flipside.keenspot.com/comic.php'
    stripUrl = latestUrl + '?i=%s'
-    imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
-    prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)">&lt')
+    imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
+    prevSearch = compile(tagre("a", "href", r'(http://flipside\.keenspot\.com/comic\.php\?i=\d+)', after="prev"))
    help = 'Index format: nnnn'


@ -114,7 +114,8 @@ class FredoAndPidjin(_BasicScraper):
    homepage = 'http://www.pidjin.net/'
    stripUrl = None
    help = 'Index format: yyyy/mm/dd/name'
-    imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d\d\d\d/\d\d/\d+[^"]+\.png)'))
+    imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d+/\d+/[^"]+\.png)'))
+    multipleImagesPerStrip = True
    prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
    starter = indirectStarter(homepage,
       compile(tagre('a', 'href', "("+homepage+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
--- a/dosagelib/plugins/fallenangel.py
+++ b/dosagelib/plugins/fallenangel.py
@ -12,7 +12,7 @@ _prevSearch = compile(r' <a href="(http://www\.thefallenangel\.co\.uk/.+?)"><img

 def add(name, shortname):
    latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
-    classname = asciify(name)
+    classname = "FallenAngel_" + asciify(name)
    globals()[classname] = make_scraper(classname,
        latestUrl = latestUrl,
        stripUrl = latestUrl + '?date=%s',
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -34,17 +34,9 @@ class GUComics(_BasicScraper):
    help = 'Index format: yyyymmdd'


-class GenrezvousPoint(_BasicScraper):
-    latestUrl = 'http://www.genrezvouspoint.com/'
-    stripUrl = latestUrl + 'index.php?comicID=%s'
-    imageSearch = compile(r'<img src=\'(comics/.+?)\'')
-    prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
-    help = 'Index format: nnn'
-
-
 class GirlGenius(_BasicScraper):
    latestUrl = 'http://girlgeniusonline.com/comic.php'
-    stripUrl = 'http://www.girlgeniusonline.com/comic.php?date=%s'
+    stripUrl = latestUrl + '?date=%s'
    imageSearch = compile(r"(/ggmain/strips/.+?)'")
    prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
    help = 'Index format: yyyymmdd'
@ -99,7 +91,8 @@ class Gunshow(_BasicScraper):
    latestUrl = 'http://gunshowcomic.com/'
    stripUrl = latestUrl + '%s'
    imageSearch = compile(tagre("img", "src", r'(http://gunshowcomic\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+menu/small/previous\.gif'))
+    multipleImagesPerStrip = True
+    prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]*menu/small/previous\.gif'))
    help = 'Index format: n'


@ -131,7 +124,7 @@ class GlassHalfEmpty(_BasicScraper):
    latestUrl = 'http://www.defectivity.com/ghe/index.php'
    stripUrl = latestUrl + '?strip_id=%s'
    imageSearch = compile(r'src="(comics/.+?)"')
-    prevSearch = compile(r'</a><a href="(.+?)"><img src="\.\./images/onback\.jpg"')
+    prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
    help = 'Index format: nnn'


--- a/dosagelib/plugins/h.py
+++ b/dosagelib/plugins/h.py
@ -11,12 +11,3 @@ class HorribleVille(_BasicScraper):
    imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
    help = 'Index format: yyyymmdd'
-
-
-class HelpDesk(_BasicScraper):
-    latestUrl = 'https://www.eviscerati.org/comics?page=78'
-    stripUrl = 'https://www.eviscerati.org/comics?page=%s'
-    imageSearch = compile(tagre("img", "src", r'(https://www\.eviscerati\.org/files/comics/[^"]+)'))
-    prevSearch = compile(tagre("li", "class", r'pager-previous[^"]+') + tagre("a", "href", r'(/comics\?page=%d+)'))
-    help = 'Index format: n'
-
--- a/dosagelib/plugins/i.py
+++ b/dosagelib/plugins/i.py
@ -9,9 +9,9 @@ from ..util import tagre

 class IDreamOfAJeanieBottle(_BasicScraper):
    latestUrl = 'http://jeaniebottle.com/'
-    stripUrl = latestUrl + 'review.php?comicID='
+    stripUrl = latestUrl + '?p=%s'
    imageSearch = compile(r'(/comics/.+?)"')
-    prevSearch = compile(r'First".+?(review.php.+?)".+?prev_a.gif')
+    prevSearch = compile(tagre("a", "href", r'(http://jeaniebottle\.com/\?p=\d+)', after="prev"))
    help = 'Index format: n (unpadded)'


--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -2,7 +2,7 @@
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam

-from re import compile, MULTILINE
+from re import compile
 from ..scraper import _BasicScraper
 from ..util import tagre

@ -21,12 +21,3 @@ class JoeAndMonkey(_BasicScraper):
    imageSearch = compile(r'"(/comic/[^"]+)"')
    prevSearch = compile(r"<a href='(/\d+)'>Previous")
    help = 'Index format: nnn'
-
-
-class JoyOfTech(_BasicScraper):
-    latestUrl = 'http://www.geekculture.com/joyoftech/'
-    stripUrl = latestUrl + 'joyarchives/%s.html'
-    imageSearch = compile(tagre("img", "src", r'(joyimages/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(joyarchives/[^"]+)') + r'.+?Previous', MULTILINE)
-    help = 'Index format: nnn'
-
--- a/dosagelib/plugins/k.py
+++ b/dosagelib/plugins/k.py
@ -4,7 +4,7 @@

 from re import compile, IGNORECASE
 from ..scraper import _BasicScraper
-
+from ..util import tagre

 class Key(_BasicScraper):
    latestUrl = 'http://key.shadilyn.com/latestpage.html'
@ -25,7 +25,7 @@ class Krakow(_BasicScraper):
 class Kukuburi(_BasicScraper):
    latestUrl = 'http://www.kukuburi.com/current/'
    stripUrl = 'http://www.kukuburi.com/v2/%s/'
-    imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
+    imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
    prevSearch = compile(r'nav-previous.+?"(http.+?)"')
    help = 'Index format: yyyy/mm/dd/stripname'

--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -16,16 +16,6 @@ class LasLindas(_BasicScraper):
    help = 'Index format: stripname'


-
-class LesbianPiratesFromOuterSpace(_BasicScraper):
-    latestUrl = 'http://rosalarian.com/lesbianpirates/'
-    stripUrl = latestUrl + 'index.php?p=%s'
-    imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
-    help = 'Index format: n'
-
-
-
 class Lint(_BasicScraper):
    latestUrl = 'http://www.purnicellin.com/lint/'
    stripUrl = latestUrl + '%s'
@ -34,7 +24,6 @@ class Lint(_BasicScraper):
    help = 'Index format: yyyy/mm/dd/num-name'


-
 class LookingForGroup(_BasicScraper):
    latestUrl = 'http://www.lfgcomic.com/page/latest'
    stripUrl = 'http://www.lfgcomic.com/page/%s'
@ -51,8 +40,8 @@ class LookingForGroup(_BasicScraper):
 class LittleGamers(_BasicScraper):
    latestUrl = 'http://www.little-gamers.com/'
    stripUrl = latestUrl + '%s/'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.little-gamers\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://www.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
+    imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
+    prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
    help = 'Index format: yyyy/mm/dd/name'


--- a/dosagelib/plugins/m.py
+++ b/dosagelib/plugins/m.py
@ -67,8 +67,8 @@ class Melonpool(_BasicScraper):
 class Misfile(_BasicScraper):
    latestUrl = 'http://www.misfile.com/'
    stripUrl = latestUrl + '?date=%s'
-    imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
-    prevSearch = compile(tagre("link", "href", r'([^"]+)', before="Previous"))
+    imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
+    prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", before="Previous"))
    help = 'Index format: yyyy-mm-dd'


@ -76,15 +76,6 @@ class MysteriesOfTheArcana(_BasicScraper):
    latestUrl = 'http://mysteriesofthearcana.com/'
    stripUrl = latestUrl + 'index.php?action=comics&cid=%s'
    imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'()', after="navprevius"))
+    prevSearch = compile(tagre("a", "href", r'(index\.php[^"]+)', after="navprevious"))
    help = 'Index format: n (unpadded)'

-
-
-# XXX move to keenspot?
-class MysticRevolution(_BasicScraper):
-    latestUrl = 'http://mysticrevolution.keenspot.com/'
-    stripUrl = latestUrl + '?cid=%s'
-    imageSearch = compile(tagre("img", "src", r'(http://cdn\.mysticrevolution\.keenspot\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("link", "rel", r'(\?cid=\d+)', before="prev"))
-    help = 'Index format: n (unpadded)'
--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@ -4,14 +4,14 @@

 from re import compile
 from ..scraper import _BasicScraper
-from ..helpers import indirectStarter, _PHPScraper
+from ..helpers import indirectStarter, bounceStarter
 from ..util import tagre


 class NamirDeiter(_BasicScraper):
    latestUrl = 'http://www.namirdeiter.com/'
    stripUrl = latestUrl + 'comics/index.php?date=%s'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.namirdeiter\.com/comics/\d\.jpg)', quote=""))
+    imageSearch = compile(tagre("img", "src", r"'?(http://www\.namirdeiter\.com/comics/\d+\.jpg)'?", quote=""))
    prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
    help = 'Index format: yyyymmdd'

@ -63,17 +63,19 @@ class Nukees(_BasicScraper):
    help = 'Index format: yyyymmdd.html'


-
-class NekoTheKitty(_PHPScraper):
-    basePath = 'http://www.nekothekitty.net/cusp/'
-    latestUrl = basePath
-    prevSearch = compile(tagre("a", "href", r'(http://www.nekothekitty.net/comics/[^"]+)') +
+class NekoTheKitty(_BasicScraper):
+    basePath = 'http://www.nekothekitty.net/'
+    stripUrl = basePath + 'comics/%s'
+    starter = bounceStarter(basePath, compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
+      tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallnext.png')))
+    imageSearch = compile(tagre("img", "src", r'(http://(?:img\d+|www)\.smackjeeves\.com/images/uploaded/comics/[^"]+)'))
+    prevSearch = compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
      tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
-
+    help = 'Index format: n/n-name'


 class NichtLustig(_BasicScraper):
-    stripUrl = 'http://www.nichtlustig.de/toondb/%s.html'
+    stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
    imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
    prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
    help = 'Index format: yymmdd'
@ -101,6 +103,7 @@ class NekkoAndJoruba(_BasicScraper):
 class NobodyScores(_BasicScraper):
    latestUrl = 'http://nobodyscores.loosenutstudio.com/'
    stripUrl = latestUrl + 'index.php?id=%s'
-    imageSearch = compile(r'><img src="(http://nobodyscores\.loosenutstudio\.com/comix/.+?)"')
+    imageSearch = compile(tagre("img", "src", r'(http://nobodyscores\.loosenutstudio\.com/comix/[^"]+)'))
+    multipleImagesPerStrip = True
    prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>')
    help = 'Index format: nnn'
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -28,7 +28,7 @@ class OddFish(_BasicScraper):

 class OnTheEdge(_BasicScraper):
    latestUrl = 'http://ontheedgecomics.com/'
-    stripUrl = 'http://ontheedgecomics.com/comic/ote%s'
+    stripUrl = 'http://ontheedgecomics.com/comic/%s'
    imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
    prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
    help = 'Index format: nnn (unpadded)'
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -12,11 +12,10 @@ class PartiallyClips(_BasicScraper):
    latestUrl = 'http://partiallyclips.com/'
    stripUrl = latestUrl + '%s/'
    imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', before="prev"))
+    prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', after="prev"))
    help = 'Index format: yyyy/mm/dd/stripname'


-
 class PastelDefender(_BasicScraper):
    latestUrl = 'http://www.pasteldefender.com/coverbackcover.html'
    stripUrl = 'http://www.pasteldefender.com/%s.html'
@ -25,7 +24,6 @@ class PastelDefender(_BasicScraper):
    help = 'Index format: nnn'


-
 class PebbleVersion(_BasicScraper):
    latestUrl = 'http://www.pebbleversion.com/'
    stripUrl = latestUrl + 'Archives/Strip%s.html'
@ -37,7 +35,7 @@ class PebbleVersion(_BasicScraper):
 class PennyAndAggie(_BasicScraper):
    baseUrl = 'http://www.pennyandaggie.com/'
    stripUrl = baseUrl + 'index.php?p=%s'
-    imageSearch = compile(tagre("a", "href", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
                         tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
    starter = indirectStarter(baseUrl, prevSearch)
@ -47,20 +45,19 @@ class PennyAndAggie(_BasicScraper):
 class PennyArcade(_BasicScraper):
    baseUrl = 'http://penny-arcade.com/comic/'
    starter = bounceStarter(baseUrl,
-       compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntNext"))
+       compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnNext"))
    )
-    stripUrl = baseUrl + '%s/'
+    stripUrl = baseUrl + '%s'
    imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntPrev"))
+    prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnPrev"))
    help = 'Index format: yyyy/mm/dd'

    @classmethod
    def namer(cls, imageUrl, pageUrl):
-        yyyy, mm, dd = pageUrl.split('/')[-4:-1]
+        dummy, yyyy, mm, dd = pageUrl.rsplit('/', 3)
        return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))


-
 class PeppermintSaga(_BasicScraper):
    latestUrl = 'http://www.pepsaga.com/'
    stripUrl = latestUrl + '?p=%s'
@ -101,7 +98,7 @@ class Precocious(_BasicScraper):
 class PvPonline(_BasicScraper):
    latestUrl = 'http://pvponline.com/comic'
    stripUrl = latestUrl + '%s'
-    imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp\d+\.jpg)'))
+    imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp[^"]+\.jpg)'))
    prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous"))
    help = 'Index format: yyyy/mm/dd/stripname'

@ -135,7 +132,7 @@ evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
 class ProperBarn(_BasicScraper):
    latestUrl = 'http://www.nitrocosm.com/go/gag/'
    stripUrl = latestUrl + '%s/'
-    imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+.png)'))
+    imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+\.[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous"))
    help = 'Index format: nnn'

--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -19,7 +19,7 @@ class RadioactivePanda(_BasicScraper):
 # XXX add other comics at http://petitesymphony.com/comics/
 class Rascals(_BasicScraper):
    latestUrl = 'http://rascals.petitesymphony.com/'
-    stripUrl = latestUrl + '/comic/rascals-pg-%s/'
+    stripUrl = latestUrl + 'comic/rascals-pg-%s/'
    imageSearch = compile(tagre("img", "src", r'(http://rascals\.petitesymphony\.com/files/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://rascals\.petitesymphony\.com/comic/[^"]+)', after="Previous"))
    help = 'Index format: num'
@ -36,7 +36,7 @@ class RealLife(_BasicScraper):
 class RedString(_BasicScraper):
    latestUrl = 'http://www.redstring.strawberrycomics.com/'
    stripUrl = latestUrl + 'index.php?id=%s'
-    imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
    help = 'Index format: nnn'

--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -10,7 +10,7 @@ from ..util import tagre


 class SailorsunOrg(_BasicScraper):
-    latestUrl = 'http://www.sailorsun.org/'
+    latestUrl = 'http://sailorsun.org/'
    stripUrl = latestUrl + '?p=%s'
    imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
@ -28,7 +28,7 @@ class SamAndFuzzy(_BasicScraper):
 class SarahZero(_BasicScraper):
    latestUrl = 'http://www.sarahzero.com/'
    stripUrl = latestUrl + 'sz_%s.html'
-    imageSearch = compile(tagre("img", "src", r'(z_spreads/sz_[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(z_(?:spreads|decoy)/sz_[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(sz_\d+\.html)') + tagre("img", "src", r'z_site/sz_05_nav\.gif'))
    help = 'Index format: nnnn'

@ -45,7 +45,8 @@ class SchlockMercenary(_BasicScraper):
    latestUrl = 'http://www.schlockmercenary.com/'
    stripUrl = latestUrl + '%s'
    imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/d+)', after="nav-previous"))
+    multipleImagesPerStrip = True
+    prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous"))
    help = 'Index format: yyyy-mm-dd'


@ -102,7 +103,7 @@ class SluggyFreelance(_BasicScraper):
 class SodiumEyes(_BasicScraper):
    latestUrl = 'http://sodiumeyes.com/'
    stripUrl = latestUrl + '%s/'
-    imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^ ]+)', quote=""))
    prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
    help = 'Index format: yyyy/mm/dd/stripname'

@ -110,9 +111,9 @@ class SodiumEyes(_BasicScraper):
 class SpareParts(_BasicScraper):
    baseUrl = 'http://www.sparepartscomics.com/'
    latestUrl = baseUrl + 'comics/?date=20080328'
-    stripUrl = baseUrl + 'comics/?date=s%'
-    imageSearch = compile(tagre("img", "src", r'http://www\.sparepartscomics\.com/comics/[^"]+'))
-    prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)') + "Previous Comic")
+    stripUrl = baseUrl + 'comics/index.php?date=%s'
+    imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
+    prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic")
    help = 'Index format: yyyymmdd'


@ -127,7 +128,7 @@ class Stubble(_BasicScraper):
 class StrawberryDeathCake(_BasicScraper):
    latestUrl = 'http://strawberrydeathcake.com/'
    stripUrl = latestUrl + 'archive/%s/'
-    imageSearch = compile(tagre("img", "src", r'http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+'))
+    imageSearch = compile(tagre("img", "src", r'(http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
    help = 'Index format: stripname'

@ -144,7 +145,8 @@ class SomethingPositive(_BasicScraper):
    latestUrl = 'http://www.somethingpositive.net/'
    stripUrl = latestUrl + 'sp%s.shtml'
    imageSearch = compile(tagre("img", "src", r'(sp\d+\.png)'))
-    prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "Previous")
+    prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + 
+      "(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
    help = 'Index format: mmddyyyy'

    @classmethod
@ -152,7 +154,6 @@ class SomethingPositive(_BasicScraper):
        return pageUrl.split('/')[-1].split('.')[0]


-
 class SexyLosers(_BasicScraper):
    stripUrl = 'http://www.sexylosers.com/%s.html'
    imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
@ -172,7 +173,7 @@ class SexyLosers(_BasicScraper):
 class StarCrossdDestiny(_BasicScraper):
    latestUrl = 'http://www.starcrossd.net/comic.html'
    stripUrl = 'http://www.starcrossd.net/archives/%s.html'
-    imageSearch = compile(r'<img src="(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)">')
+    imageSearch = compile(tagre("img", "src", r'(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
    prevSearch = compile(r'<a href="(http://www\.starcrossd\.net/(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev', IGNORECASE)
    help = 'Index format: nnnnnnnn'

@ -212,11 +213,3 @@ class SMBC(_BasicScraper):
    prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
    help = 'Index format: nnnn'

-
-
-class SomethingLikeLife(_BasicScraper):
-    latestUrl = 'http://www.pulledpunches.com/'
-    stripUrl = latestUrl + '?p=%s'
-    imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
-    prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
-    help = 'Index format: nn'
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -10,7 +10,7 @@ from ..util import tagre

 class TheNoob(_BasicScraper):
    latestUrl = 'http://www.thenoobcomic.com/index.php'
-    stripUrl = latestUrl + '?pos=%'
+    stripUrl = latestUrl + '?pos=%s'
    imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button"))
    help = 'Index format: nnnn'
@ -19,7 +19,7 @@ class TheNoob(_BasicScraper):

 class TheOrderOfTheStick(_BasicScraper):
    latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
-    stripUrl = latestUrl + 'comics/oots%s.html'
+    stripUrl = 'http://www.giantitp.com/comics/oots%s.html'
    imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
    prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
    help = 'Index format: n (unpadded)'
@ -31,7 +31,8 @@ class TheParkingLotIsFull(_BasicScraper):
    latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
    stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm'
    imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
-    prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>')
+    multipleImagesPerStrip = True
+    prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
    help = 'Index format: nnn'


@ -40,7 +41,7 @@ class TheWotch(_BasicScraper):
    latestUrl = 'http://www.thewotch.com/'
    stripUrl = latestUrl + '?date=%s'
    imageSearch = compile(r"<img.+?src='(comics/.+?)'")
-    prevSearch = compile(r"<link rel='Previous' href='(\?date=\d+-\d+-\d+)'")
+    prevSearch = compile(r"<link rel='Previous' href='(/\?date=\d+-\d+-\d+)'")
    help = 'Index format: yyyy-mm-dd'


--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@ -6,12 +6,12 @@ from re import compile

 from ..scraper import _BasicScraper
 from ..helpers import bounceStarter, indirectStarter
-from ..util import getQueryParams
+from ..util import getQueryParams, tagre


 class Undertow(_BasicScraper):
    stripUrl = 'http://undertow.dreamshards.org/%s'
-    imageSearch = compile(r'<img src="(.+?)"')
+    imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
    prevSearch = compile(r'href="(.+?)".+?teynpoint')
    help = 'Index format: good luck !'
    starter = indirectStarter('http://undertow.dreamshards.org/',
--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@ -36,6 +36,7 @@ class WhyTheLongFace(_BasicScraper):
    latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
    stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
    imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
+    multipleImagesPerStrip = True
    prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
    help = 'Index format: yyyymm'

@ -66,7 +67,7 @@ class WorldOfWarcraftEh(_BasicScraper):
 class Wulffmorgenthaler(_BasicScraper):
    latestUrl = 'http://wumocomicstrip.com/'
    stripUrl = latestUrl + '%s/'
-    imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
+    imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))
    prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
    help = 'Index format: yyyy/mm/dd'

--- a/dosagelib/plugins/wlpcomics.py
+++ b/dosagelib/plugins/wlpcomics.py
@ -2,14 +2,15 @@
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012 Bastian Kleineidam

-from re import compile, IGNORECASE
+from re import compile
+from ..util import tagre
 from ..scraper import make_scraper
 from ..helpers import bounceStarter


-_imageSearch = compile(r'SRC="(http://www\.wlpcomics\.com/adult/.+?|http://www\.wlpcomics\.com/general/.+?)"', IGNORECASE)
-_prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
-_nextSearch = compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)
+_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+)'))
+_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous')
+_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next')


 def add(name, path):
@ -35,4 +36,3 @@ add('ChichiChan', 'adult/chichi/')
 add('ChocolateMilkMaid', 'adult/cm/')
 add('MaidAttack', 'general/maidattack/')
 add('ShadowChasers', 'general/shadowchasers/')
-add('Stellar', 'adult/stellar/')
--- a/dosagelib/plugins/y.py
+++ b/dosagelib/plugins/y.py
@ -19,8 +19,8 @@ class YAFGC(_BasicScraper):
 class YouSayItFirst(_BasicScraper):
    latestUrl = 'http://www.yousayitfirst.com/'
    stripUrl = latestUrl + 'comics/index.php?date=%s'
-    imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
+    imageSearch = compile(tagre("img", "src", r"(http://www\.yousayitfirst\.com/comics/[^>']+)", quote="'?"))
+    prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)', quote="'") + "Previous")
    help = 'Index format: yyyymmdd'


--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -17,6 +17,11 @@ class Zapiro(_BasicScraper):
    prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
    help = 'Index format: yyyy-mm-dd-stripname'

+    @classmethod
+    def namer(cls, imageUrl, pageUrl):
+        name = imageUrl.split('/')[-3]
+        return name
+

 class ZombieHunters(_BasicScraper):
    latestUrl = 'http://www.thezombiehunters.com/'
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -78,6 +78,7 @@ class _BasicScraper(object):
        while url:
            imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
            prevUrl = self.prevUrlModifier(prevUrl)
+            out.write("Matched previous URL %s" % prevUrl, 2)
            seen_urls.add(url)
            yield self.getComicStrip(url, imageUrls)
            # avoid recursive URL loops
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@ -163,7 +163,7 @@ def normaliseURL(url):

    pu = list(urlparse.urlparse(url))
    segments = pu[2].split('/')
-    while segments and segments[0] == '':
+    while segments and segments[0] in ('', '..'):
        del segments[0]
    pu[2] = '/' + '/'.join(segments).replace(' ', '%20')
    # remove leading '&' from query
--- a/scripts/drunkduck.py
+++ b/scripts/drunkduck.py
@ -16,14 +16,137 @@ json_file = __file__.replace(".py", ".json")

 # names of comics to exclude
 exclude_comics = [
-    "Twonks_and_Plonkers", # broken images, no real content
-    "U_Chuu_No_Hoshi_Hotoshi_Tsuko", # broken images
-    "Red_Dog_Venue", # start page is broken
    "Monster_Lover", # start page is broken
    "Legacy_of_Blaze", # broken images
    "Dead_Strangers", # broken images
    "Crack", # broken images
    "Iron_Wolf", # broken images
+    "A_Call_to_Destiny__NC_17", # start page requires login
+    "A_Call_to_Destiny_Reloaded", # start page requires login
+    "A_Day_in_the_Life_for_Erik", # broken images
+    "A_Fairly_Twisted_Reality", # start page requires login
+    "Al_and_Scout", # broken images
+    "ANGELOU_____Las_aventuras_de_Nikole", # broken images
+    "Apartment_408_Full_Size", # broken images
+    "Apple_Valley", # broken images
+    "Apt_408_Minis", # broken images
+    "atxs", # broken images
+    "A_Word_Of_Wisdom", # broken images
+    "Brathalla", # broken images
+    "Binary_Souls_Other_Dimensions", # broken images
+    "BK_Shattered_Hate", # broken images
+    "Chomp", # broken images
+    "Chu_and_Kenny", # broken images
+    "Coga_Suro_2", # broken images
+    "Creepy_Girl_and_Her_Zombie_Dog", # broken images
+    "CuoreVoodoo", # broken images
+    "dairyaire", # broken images
+    "DIS", # broken images
+    "Dot_TXT", # broken images
+    "Dreadnought_Invasion_Six", # broken images
+    "Emerald_Winter", # broken images
+    "Enter_the_Duck_2", # broken images
+    "ffff", # broken images
+    "Function_Over_Fashion", # broken images
+    "Funday_Morning", # broken images
+    "greys_journey", # broken images
+    "Head_over_Heart", # broken images
+    "Hurrocks_Fardel", # broken images
+    "Bhaddland", # start page requires login
+    "Bouncing_Orbs_of_Beauty", # start page requires login
+    "Busty_Solar", # start page requires login
+    "Illusional_Beauty", # broken images
+    "Indigo_Bunting__Vampire", # start page requires login
+    "Irrumator", # start page requires login
+    "Its_A_Boy_Thing", # start page requires login
+    "Kokuahiru_comics", # start page requires login
+    "Inside_OuT", # broken images
+    "Journey_to_Raifina", # broken images
+    "KALA_dan", # broken images
+    "Live_to_tell", # start page requires login
+    "Locoma", # broken images
+    "London_Underworld", # broken images
+    "Louder_Than_Bombs", # broken images
+    "Lucky_Dawg", # broken images
+    "Mario_in_Johto", # broken images
+    "Master", # start page requires login
+    "Mastermind_BTRN", # broken images
+    "MAYA_____The_legend_of_Wolf", # broken images
+    "Megaman_Zero", # broken images
+    "Monster_Lover_Destinys_Path", # start page requires login
+    "M_Organ_Art", # start page requires login
+    "Morning_Squirtz", # start page requires login
+    "MOSAIC", # broken images
+    "My_Angel_and_My_Devil", # broken images
+    "Nemution_Jewel", # start page requires login
+    "Nemution_Redux", # start page requires login
+    "New_Pages", # broken images
+    "Ninja_Shizatch", # broken images
+    "Normalcy_is_for_Wimps", # broken images
+    "MIKYAGU", # broken images
+    "One_Third_Of_Your_Life_Is_Spent_Sleeping_One_Third_Of_Your_Life_Is_Spent_Working_And_Half_Of_One_Third_Is_Spent_Waiting_The_Question_Is_It_Really_Your_Life", # broken images
+    "OTENBA_Files", # start page requires login
+    "Panacea", # start page requires login
+    "Parker_Lot", # broken images
+    "Peter_And_The_Wolf", # start page requires login
+    "Perspectives", # broken images
+    "Pokemon_Sinnoh_Surfer", # broken images
+    "Pokemon_World_Trainers", # broken images
+    "Potpourri_of_Lascivious_Whimsy", # start page requires login
+    "Pr0nCrest", # start page requires login
+    "punished_girls", # start page requires login
+    "Powerjeff", # broken images
+    "Comicarotica", # start page requires login
+    "Dark_Sisters", # start page requires login
+    "Death_P0rn", # start page requires login
+    "Dreams_in_Synergy", # broken images
+    "GNight_Shade", # start page requires login
+    "GRIND", # start page requires login
+    "HUSS", # start page requires login
+    "Red_Dog_Venue", # start page is broken
+    "rubber_girls", # start page requires login
+    "Robomeks", # broken images
+    "Robot_Friday", # broken images
+    "SFA", # start page requires login
+    "Shadow_Root", # start page requires login
+    "Shiro_Karasu", # start page requires login
+    "Shelter_of_Wings", # broken images
+    "Some_Notes", # broken images
+    "Sonic_Advanced_Online", # broken images
+    "Sonic_and_tails_corner", # broken images
+    "Sonic_Unreal", # broken images
+    "Tales_of_Schlock", # start page requires login
+    "Splices_of_Life", # broken images
+    "STARSEARCHERS", # broken images
+    "Ted_The_Terrible_Superhero", # broken images
+    "Terra_online_comic", # broken images
+    "The_Auragon_Base", # broken images
+    "The_Bend", # broken images
+    "The_Chronicles_of_Drew", # broken images
+    "The_Devils_Horn", # broken images
+    "The_Dragon_and_the_Lemur", # start page requires login
+    "The_Fighting_Stranger", # broken images
+    "The_Mighty_Omega", # broken images
+    "The_Misadventures_of_Everyone", # start page requires login
+    "The_NEW_Life_Of_TimmY", # broken images
+    "The_SSA", # broken images
+    "Tony_The_Hedgehog", # broken images
+    "Trapped_in_a_Comic", # start page requires login
+    "Unsound_of_Mind", # broken images
+    "Vampire_Chronicles__Dark_Lust", # start page requires login
+    "WarMage", # start page requires login
+    "Watashi_No_Ame", # broken images
+    "Weave", # broken images
+    "Weirdlings", # template error
+    "Welcome_To_Border_City", # broken images
+    "what_comes_first", # start page requires login
+    "Within_Shadows", # broken images
+    "Xolta", # start page requires login
+    "XTIN__The_Dragons_Dream_World", # start page requires login
+    "X_UP", # start page requires login
+    "Zandars_Saga", # start page requires login
+    "Twonks_and_Plonkers", # broken images, no real content
+    "U_Chuu_No_Hoshi_Hotoshi_Tsuko", # broken images
 ]


--- a/scripts/gocomics.py
+++ b/scripts/gocomics.py
@ -21,6 +21,22 @@ url_matcher = re.compile(tagre("a", "href", r'(/[^"]+)', after="alpha_list") + r
 # names of comics to exclude
 exclude_comics = [
    "FrikkFrakkAndFrank", # too few comics
+    "Apocalypseharry", # too few comics
+    "BatkidandBatrat", # too few comics
+    "BETWEENTHELINES", # comic unavailable
+    "Bonner", # missing page
+    "Buster", # comic unavailabe
+    "DALTONDOG", # comic unavailable
+    "DellAndSteve", # too few comics
+    "Dilbert", # redirect
+    "InkeeDoodles", # comic unavailable
+    "MaggiesComics", # too few comics
+    "OfMiceandMud", # too few comics
+    "OysterWar", # too few comics
+    "PIGTIMES", # comic unavailable
+    "PS", # comic unavailable
+    "SherpaAid", # comic unavailable
+    "SparComics", # comic unavailable
 ]


--- a/scripts/keenspot.py
+++ b/scripts/keenspot.py
@ -21,27 +21,59 @@ num_matcher = re.compile(r'Number of Days: (\d+)')

 # names of comics to exclude
 exclude_comics = [
+    "10", # page is gone
+    "54sinRed", # page is 403 forbidden
+    "6D4", # redirected to another page
+    "AaaSoCAwesomenessandaSliceofCheese", # broken images
+    "AcrossthePond", # page moved
+    "ACDeceptibotscomic", # no images
+    "AdamandSei", # page has 403 forbidden
+    "AdamsRoadGang", # page is gone
+    "ADVENTURERS", # page is gone
+    "AiYaiYai", # page moved
+    "AlltheCommies", # missing images
+    "AltaModaMetro", # page redirected
+    "AltarGirl", # page redirected
+    "Amerika", # no images
+    "Angels", # page has 403 forbidden
+    "AngryDMonkey", # page redirected
+    "Angst", # page redirected
+    "Animenifesto", # too few images
+    "Anna", # no images
+    "Arcana", # archive broken
+    "Area15", # no images
+    "BaidheTu", # no images
+    "BasilFlint", # page redirected
+    "beerkada", # no images
+    "BelovedLeader", # broken images
+    "BigMouthComics", # page does not follow standard layout
+    "", # page is gone
+    "", # page is gone
+    "", # page is gone
+    "BlueZombie", # broken page
+    "BoomerExpress", # redirection to another page
+    "DungeonDamage", # page does not follow standard layout
+    "EarthRiser", # redirects to a new page
+    "FaultyLogic", # page does not follow standard layout
+    "GoForIt", # page is gone
    "JuvenileDiversion", # page moved
    "JustWeird", # page has 403 forbidden
+    "Michikomonogatari", # page does not follow standard layout
    "MobileMadness", # page does not follow standard layout
    "KnightsOfTheNexus", # page does not follow standard layout
    "RogerAndDominic", # page does not follow standard layout
-    "TheAvatar", # page does not follow standard layout
-    "Michikomonogatari", # page does not follow standard layout
-    "DungeonDamage", # page does not follow standard layout
    "SaveMeGebus", # page does not follow standard layout
-    "BlueZombie", # broken page
-    "BoomerExpress", # redirection to another page
-    "FaultyLogic", # page does not follow standard layout
-    "EarthRiser", # redirects to a new page
-    "GoForIt", # page is gone
-    "ACDeceptibotscomic", # no images
-
+    "TheAvatar", # page does not follow standard layout
 ]

+# links to last valid strips
 url_overrides = {
-    # link to last valid strip
    "BallofYarn": "http://ballofyarn.comicgenesis.com/d/20020624.html",
+    "AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html",
+    "ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html",
+    "ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html",
+    "": "",
+    "": "",
 }

 def handle_url(url, res):
--- a/tests/test_comics.py
+++ b/tests/test_comics.py
@ -33,30 +33,31 @@ class _ComicTester(TestCase):
        # at least 5 strips from the start, and find strip images
        # on at least 4 pages.
        scraperobj = self.scraperclass()
-        num = empty = 0
+        num = 0
        max_strips = 5
        for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
            images = 0
            for image in strip.getImages():
                images += 1
                self.save(image)
-            if images == 0:
-                empty += 1
+            self.check(images > 0, 'failed to find images at %s' % strip.stripUrl)
+            if not self.scraperclass.multipleImagesPerStrip:
+                self.check(images == 1, 'found %d instead of 1 image at %s' % (images, strip.stripUrl))
            if num > 0:
                self.check_stripurl(strip)
            num += 1
        if self.scraperclass.prevSearch:
-            self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
-            # check that at exactly or for multiple pages at least 5 images are saved
+            self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern at %s.' % (num, strip.stripUrl))
+            # Check that exactly or for multiple pages at least 5 images are saved.
+            # This is different than the image number check above since it checks saved files,
+            # ie. it detects duplicate filenames.
            saved_images = self.get_saved_images()
            num_images = len(saved_images)
+            attrs = (num_images, saved_images, max_strips, self.tmpdir)
            if self.scraperclass.multipleImagesPerStrip:
-                self.check(num_images >= max_strips, 
-                  'saved %d %s instead of at least %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
+                self.check(num_images >= max_strips, 'saved %d %s instead of at least %d images in %s' % attrs)
            else:
-                self.check(num_images == max_strips, 
-                  'saved %d %s instead of %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
-        self.check(empty == 0, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
+                self.check(num_images == max_strips, 'saved %d %s instead of %d images in %s' % attrs)

    def check_stripurl(self, strip):
        if not self.scraperclass.stripUrl: