diff --git a/dosage b/dosage
index 5fc7f447d..cd1f0cb4f 100755
--- a/dosage
+++ b/dosage
@@ -136,7 +136,7 @@ def displayHelp(options):
     """Print help for comic strips."""
     errors = 0
     try:
-        for scraperobj in director.getScrapers(options.comic, options.basepath):
+        for scraperobj in director.getScrapers(options.comic, options.basepath, listing=True):
             errors += displayComicHelp(scraperobj)
     except ValueError as msg:
         out.exception(msg)
@@ -239,12 +239,17 @@ def doList(columnList=True, verbose=False):
         out.info(u'Available comic scrapers:')
         out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
         out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
-        scrapers = sorted(director.getAllScrapers(), key=lambda s: s.getName())
+        scrapers = sorted(director.getAllScrapers(listing=True), key=lambda s: s.getName())
         if columnList:
-            num = doColumnList(scrapers)
+            num, disabled = doColumnList(scrapers)
         else:
-            num = doSingleList(scrapers, verbose=verbose)
+            num, disabled = doSingleList(scrapers, verbose=verbose)
         out.info(u'%d supported comics.' % num)
+        if disabled:
+            out.info('')
+            out.info(u'Some comics are disabled, they are tagged with [%s:REASON], where REASON is one of:' % TAG_DISABLED)
+            for k in disabled:
+                out.info(u'  %-10s %s' % (k, disabled[k]))
         if page:
             pydoc.pager(fd.getvalue())
         return 0
@@ -254,38 +259,46 @@ def doList(columnList=True, verbose=False):
 
 def doSingleList(scrapers, verbose=False):
     """Get list of scraper names, one per line."""
+    disabled = {}
     for num, scraperobj in enumerate(scrapers):
         if verbose:
             displayComicHelp(scraperobj)
         else:
-            out.info(getScraperName(scraperobj))
-    return num
+            out.info(getScraperName(scraperobj, reasons=disabled))
+    return num, disabled
 
 
 def doColumnList(scrapers):
     """Get list of scraper names with multiple names per line."""
+    disabled = {}
     screenWidth = get_columns(sys.stdout)
     # limit name length so at least two columns are there
     limit = (screenWidth // 2) - 8
-    names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
+    names = [getScraperName(scraperobj, limit=limit, reasons=disabled) for scraperobj in scrapers]
     num = len(names)
     maxlen = max(len(name) for name in names)
     namesPerLine = max(screenWidth // (maxlen + 1), 1)
     while names:
         out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
         del names[:namesPerLine]
-    return num
+    return num, disabled
 
 TAG_ADULT = "adult"
 TAG_LANG = "lang"
+TAG_DISABLED = "dis"
 
-def getScraperName(scraperobj, limit=None):
+def getScraperName(scraperobj, limit=None, reasons=None):
     """Get comic scraper name."""
     tags = []
     if scraperobj.adult:
         tags.append(TAG_ADULT)
     if scraperobj.lang != "en":
         tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
+    disabled = scraperobj.getDisabledReasons()
+    if disabled:
+        reasons.update(disabled)
+    for reason in disabled:
+        tags.append("%s:%s" % (TAG_DISABLED, reason))
     if tags:
         suffix = " [" + ", ".join(tags) + "]"
     else:
diff --git a/dosagelib/director.py b/dosagelib/director.py
index e57aa772b..f18593536 100644
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@@ -189,12 +189,12 @@ def finish():
     out.warn("Waiting for download threads to finish.")
 
 
-def getAllScrapers():
+def getAllScrapers(listing=False):
     """Get all scrapers."""
-    return getScrapers(['@@'])
+    return getScrapers(['@@'], listing=listing)
 
 
-def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
+def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
     """Get scraper objects for the given comics."""
     if '@' in comics:
         # only scrapers whose directory already exists
@@ -203,17 +203,13 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
         for scraperclass in scraper.get_scraperclasses():
             dirname = getDirname(scraperclass.getName())
             if os.path.isdir(os.path.join(basepath, dirname)):
-                if not adult and scraperclass.adult:
-                    warn_adult(scraperclass)
-                    continue
-                yield scraperclass()
+                if shouldRunScraper(scraperclass, adult, listing):
+                    yield scraperclass()
     elif '@@' in comics:
         # all scrapers
         for scraperclass in scraper.get_scraperclasses():
-            if not adult and scraperclass.adult:
-                warn_adult(scraperclass)
-                continue
-            yield scraperclass()
+            if shouldRunScraper(scraperclass, adult, listing):
+                yield scraperclass()
     else:
         # get only selected comic scrapers
         # store them in a set to eliminate duplicates
@@ -233,15 +229,30 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
                 indexes = None
             scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
             for scraperclass in scraperclasses:
-                if not adult and scraperclass.adult:
-                    warn_adult(scraperclass)
-                    continue
-                scraperobj = scraperclass(indexes=indexes)
-                if scraperobj not in scrapers:
-                    scrapers.add(scraperobj)
-                    yield scraperobj
+                if shouldRunScraper(scraperclass, adult, listing):
+                    scraperobj = scraperclass(indexes=indexes)
+                    if scraperobj not in scrapers:
+                        scrapers.add(scraperobj)
+                        yield scraperobj
+
+
+def shouldRunScraper(scraperclass, adult=True, listing=False):
+    if listing:
+        return True
+    if not adult and scraperclass.adult:
+        warn_adult(scraperclass)
+        return False
+    reasons = scraperclass.getDisabledReasons()
+    if reasons:
+        warn_disabled(scraperclass, reasons)
+        return False
+    return True
 
 
 def warn_adult(scraperclass):
     """Print warning about adult content."""
     out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
+
+def warn_disabled(scraperclass, reasons):
+    """Print warning about disabled comic modules."""
+    out.warn(u"Skipping comic %s: %s" % (scraperclass.getName(), ' '.join(reasons.values())))
diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py
index 53118be46..7b7a62940 100644
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@@ -1,7 +1,7 @@
 # -*- coding: iso-8859-1 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-from .util import fetchUrl, getPageContent, getQueryParams
+from .util import getQueryParams
 
 def queryNamer(paramName, usePageUrl=False):
     """Get name from URL query part."""
@@ -30,10 +30,10 @@ def bounceStarter(url, nextSearch):
     @classmethod
     def _starter(cls):
         """Get bounced start URL."""
-        data, baseUrl = getPageContent(url, cls.session)
-        url1 = fetchUrl(url, data, baseUrl, cls.prevSearch)
-        data, baseUrl = getPageContent(url1, cls.session)
-        return fetchUrl(url1, data, baseUrl, nextSearch)
+        data = cls.getPage(url)
+        url1 = cls.fetchUrl(url, data, cls.prevSearch)
+        data = cls.getPage(url1)
+        return cls.fetchUrl(url1, data, nextSearch)
     return _starter
 
 
@@ -42,6 +42,6 @@ def indirectStarter(url, latestSearch):
     @classmethod
     def _starter(cls):
         """Get indirect start URL."""
-        data, baseUrl = getPageContent(url, cls.session)
-        return fetchUrl(url, data, baseUrl, latestSearch)
+        data = cls.getPage(url)
+        return cls.fetchUrl(url, data, latestSearch)
     return _starter
diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py
index 98810c0ea..c701fec4b 100644
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@@ -5,7 +5,7 @@
 from re import compile, escape
 
 from ..util import tagre
-from ..scraper import _BasicScraper
+from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import indirectStarter
 
 
@@ -148,6 +148,28 @@ class BizarreUprising(_BasicScraper):
     help = 'Index format: n/name'
 
 
+class BladeKitten(_ParserScraper):
+    description = u"Blade Kitten aka Kit Ballard, is the hottest and best bounty hunter in the Korunda System and isn't afraid to let people know it!"
+    url = 'http://www.bladekitten.com/'
+    stripUrl = url + 'comics/blade-kitten/%s/page:%s'
+    firstStripUrl = stripUrl % ('1','1')
+    imageSearch = '//img[@class="comic_page_image"]'
+    prevSearch = '//span[@class="comic_nav_prev"]//a'
+    textSearch = '//div[@class="comic_comment_inner"]//p'
+    textOptional = True
+    help = 'Index format: chapter-page'
+    starter = indirectStarter(url, '//h4//a[contains(@href, "/comics/")]')
+
+    def getIndexStripUrl(self, index):
+        return self.stripUrl % tuple(index.split('-'))
+
+    @classmethod
+    def namer(cls, imageUrl, pageUrl):
+        filename = imageUrl.rsplit('/', 1)[1]
+        _, chapter, page = pageUrl.rsplit('/', 2)
+        page = page.split(':')[1]
+        return "bladekitten-%02i-%02i-%s" % (int(chapter), int(page), filename)
+
 class BlankIt(_BasicScraper):
     description = u'An absurd, insane, and delightful webcomic from Aric McKeown and Lem Pew.'
     url = 'http://blankitcomics.com/'
diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py
index 6480ba53d..bfeac0da8 100755
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@@ -420,7 +420,7 @@ class CyanideAndHappiness(_BasicScraper):
 
     def shouldSkipUrl(self, url, data):
         """Skip pages without images."""
-        return "/comics/play-button.png" in data
+        return "/comics/play-button.png" in data[0]
 
     @classmethod
     def namer(cls, imageUrl, pageUrl):
diff --git a/dosagelib/plugins/clonemanga.py b/dosagelib/plugins/clonemanga.py
index 93b4f4107..a26310aec 100644
--- a/dosagelib/plugins/clonemanga.py
+++ b/dosagelib/plugins/clonemanga.py
@@ -3,7 +3,7 @@
 # Copyright (C) 2012-2014 Bastian Kleineidam
 from re import compile
 from ..scraper import make_scraper
-from ..util import tagre, getQueryParams, fetchUrl, getPageContent
+from ..util import tagre, getQueryParams
 
 
 _linkTag = tagre("a", "href", r'([^"]+)')
@@ -25,15 +25,15 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
     @classmethod
     def _starter(cls):
         # first, try hopping to previous and next comic
-        data, _baseUrl = getPageContent(baseUrl, cls.session)
+        data = cls.getPage(baseUrl)
         try:
-            url = fetchUrl(baseUrl, data, _baseUrl, _prevSearch)
+            url = cls.fetchUrl(baseUrl, data, _prevSearch)
         except ValueError:
             # no previous link found, try hopping to last comic
-            return fetchUrl(baseUrl, data, _baseUrl, _lastSearch)
+            return cls.fetchUrl(baseUrl, data, _lastSearch)
         else:
-            data, _baseUrl = getPageContent(url, cls.session)
-            return fetchUrl(url, data, _baseUrl, _nextSearch)
+            data = cls.getPage(url)
+            return cls.fetchUrl(url, data, _nextSearch)
 
     attrs = dict(
         name='CloneManga/' + name,
diff --git a/dosagelib/plugins/drunkduck.py b/dosagelib/plugins/drunkduck.py
index 8a48888f6..a0c385335 100644
--- a/dosagelib/plugins/drunkduck.py
+++ b/dosagelib/plugins/drunkduck.py
@@ -4,7 +4,7 @@
 
 from re import compile
 from ..scraper import make_scraper, Genre
-from ..util import tagre, fetchUrl, getPageContent
+from ..util import tagre
 
 # note: adding the compile() functions inside add() is a major performance hog
 _imageSearch =  compile(tagre("img", "src", r'(https://s3\.amazonaws\.com/media\.drunkduck\.com/[^"]+)', before="page-image"))
@@ -27,15 +27,15 @@ def add(name, path):
     @classmethod
     def _starter(cls):
         # first, try hopping to previous and next comic
-        data, baseUrl = getPageContent(_url, cls.session)
+        data = cls.getPage(_url)
         try:
-            url = fetchUrl(_url, data, baseUrl, _prevSearch)
+            url = cls.fetchUrl(_url, data, _prevSearch)
         except ValueError:
             # no previous link found, try hopping to last comic
-            return fetchUrl(_url, data, baseUrl, _lastSearch)
+            return cls.fetchUrl(_url, data, _lastSearch)
         else:
-            data, baseUrl = getPageContent(url, cls.session)
-            return fetchUrl(url, data, baseUrl, _nextSearch)
+            data = cls.getPage(url)
+            return cls.fetchUrl(url, data, _nextSearch)
 
     attrs = dict(
         name = 'DrunkDuck/' + name,
diff --git a/dosagelib/plugins/h.py b/dosagelib/plugins/h.py
index dd52c8362..daeaea9d8 100644
--- a/dosagelib/plugins/h.py
+++ b/dosagelib/plugins/h.py
@@ -3,7 +3,7 @@
 
 from re import compile, escape
 from ..scraper import _BasicScraper
-from ..util import tagre, getPageContent, fetchUrls
+from ..util import tagre
 from ..helpers import bounceStarter
 
 
@@ -21,9 +21,9 @@ class HagarTheHorrible(_BasicScraper):
     def starter(cls):
         """Return last gallery link."""
         url = 'http://www.hagardunor.net/comics.php'
-        content = getPageContent(url, cls.session)[0]
+        data = cls.getPage(url)
         pattern = compile(tagre("a", "href", cls.prevUrl))
-        for starturl in fetchUrls(url, content, url, pattern):
+        for starturl in cls.fetchUrls(url, data, pattern):
             pass
         return starturl
 
diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py
index e039bde24..0951b5236 100755
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@@ -5,7 +5,7 @@
 from re import compile, escape
 from ..scraper import _BasicScraper
 from ..helpers import bounceStarter, queryNamer, indirectStarter
-from ..util import tagre, fetchUrl, getPageContent
+from ..util import tagre
 
 
 class PandyLand(_BasicScraper):
@@ -104,10 +104,10 @@ class PennyArcade(_BasicScraper):
     @classmethod
     def starter(cls):
         """Get bounced start URL."""
-        data, baseUrl = getPageContent(cls.url, cls.session)
-        url1 = fetchUrl(cls.url, data, baseUrl, cls.prevSearch)
-        data, baseUrl = getPageContent(url1, cls.session)
-        url2 = fetchUrl(url1, data, baseUrl, cls.nextSearch)
+        data = cls.getPage(cls.url)
+        url1 = cls.fetchUrl(cls.url, data, cls.prevSearch)
+        data = cls.getPage(url1)
+        url2 = cls.fetchUrl(url1, data, cls.nextSearch)
         return cls.prevUrlModifier(url2)
 
     @classmethod
diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py
index 977f46852..e5f610d8e 100644
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@@ -4,7 +4,7 @@
 
 from re import compile, escape, IGNORECASE, sub
 from os.path import splitext
-from ..scraper import _BasicScraper
+from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import indirectStarter, bounceStarter
 from ..util import tagre, getPageContent
 
@@ -544,6 +544,25 @@ class StrawberryDeathCake(_BasicScraper):
     help = 'Index format: stripname'
 
 
+class StrongFemaleProtagonist(_ParserScraper):
+    url = 'http://strongfemaleprotagonist.com/'
+    stripUrl = url + '%s/'
+    css = True
+    imageSearch = 'article p:first-child img'
+    prevSearch = 'div.nav-previous > a'
+    help = 'Index format: issue-?/page-??'
+
+    def shouldSkipUrl(self, url, data):
+        """Skip hiatus & non-comic pages."""
+        return url in (
+            self.stripUrl % 'guest-art/tuesday',
+            self.stripUrl % 'guest-art/friday',
+            self.stripUrl % 'guest-art/wednesday',
+            self.stripUrl % 'issue-5/newspaper',
+            self.stripUrl % 'issue-5/hiatus-1',
+            self.stripUrl % 'issue-5/hiatus-2',
+        )
+
 class SuburbanTribe(_BasicScraper):
     url = 'http://www.pixelwhip.com/'
     rurl = escape(url)
diff --git a/dosagelib/plugins/smackjeeves.py b/dosagelib/plugins/smackjeeves.py
index 766e3ee2b..111f56e1d 100644
--- a/dosagelib/plugins/smackjeeves.py
+++ b/dosagelib/plugins/smackjeeves.py
@@ -3,7 +3,7 @@
 # Copyright (C) 2012-2014 Bastian Kleineidam
 from re import compile
 from ..scraper import make_scraper
-from ..util import tagre, quote, fetchUrl, case_insensitive_re, getPageContent
+from ..util import tagre, quote, case_insensitive_re
 
 # SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly.
 # That's why there are so much different search patterns.
@@ -45,11 +45,11 @@ def add(name, url, description, adult, bounce):
     def _starter(cls):
         """Get start URL."""
         url1 = modifier(url)
-        data, baseUrl = getPageContent(url1, cls.session)
-        url2 = fetchUrl(url1, data, baseUrl, cls.prevSearch)
+        data = cls.getPage(url1)
+        url2 = cls.fetchUrl(url1, data, cls.prevSearch)
         if bounce:
-            data, baseUrl = getPageContent(url2, cls.session)
-            url3 = fetchUrl(url2, data, baseUrl, _nextSearch)
+            data = cls.getPage(url2)
+            url3 = cls.fetchUrl(url2, data, _nextSearch)
             return modifier(url3)
         return modifier(url2)
 
diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py
index e43ab48ff..97403153d 100755
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@@ -5,7 +5,7 @@
 from re import compile, escape, IGNORECASE
 from ..scraper import _BasicScraper
 from ..helpers import indirectStarter
-from ..util import tagre, fetchUrl, getPageContent
+from ..util import tagre
 
 
 class TheBrads(_BasicScraper):
@@ -223,11 +223,11 @@ class TheThinHLine(_BasicScraper):
 
     indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl))
 
-    def getComicStrip(self, url, data, baseUrl):
+    def getComicStrip(self, url, data):
         """The comic strip image is in a separate page."""
-        pageUrl = fetchUrl(url, data, baseUrl, self.indirectImageSearch)
-        pageData, pageBaseUrl = getPageContent(pageUrl, self.session)
-        return super(TheThinHLine, self).getComicStrip(pageUrl, pageData, pageBaseUrl)
+        pageUrl = self.fetchUrl(url, data, self.indirectImageSearch)
+        pageData = self.getPage(pageUrl)
+        return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
 
     @classmethod
     def namer(cls, imageUrl, pageUrl):
diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py
index 3a85cbb69..b46424118 100644
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@@ -5,9 +5,26 @@ import requests
 import time
 import random
 import os
-from . import loader, configuration
-from .util import (fetchUrl, fetchUrls, fetchText, getPageContent,
-  makeSequence, get_system_uid, urlopen, getDirname, unescape)
+import re
+try:
+    from urllib.parse import urljoin
+except ImportError:
+    from urlparse import urljoin
+
+try:
+    from lxml import html
+    from lxml.html.defs import link_attrs as html_link_attrs
+except ImportError:
+    html = None
+
+try:
+    import cssselect
+except ImportError:
+    cssselect = None
+
+from . import loader, configuration, util
+from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
+        getDirname, unescape, tagre, normaliseURL, prettyMatcherList)
 from .comic import ComicStrip
 from .output import out
 from .events import getHandler
@@ -26,8 +43,8 @@ class Genre:
     other = u"Other"
 
 
-class _BasicScraper(object):
-    '''Base class with scrape functions for comics.'''
+class Scraper(object):
+    '''Base class for all comic scraper, but without a specific scrape implementation.'''
 
     # The URL for the comic strip
     url = None
@@ -59,15 +76,15 @@ class _BasicScraper(object):
     # list of genres for this comic strip
     genres = (Genre.other,)
 
-    # compiled regular expression that will locate the URL for the previous strip in a page
-    # this can also be a list or tuple of compiled regular expressions
+    # an expression that will locate the URL for the previous strip in a page
+    # this can also be a list or tuple
     prevSearch = None
 
-    # compiled regular expression that will locate the strip image URLs strip in a page
-    # this can also be a list or tuple of compiled regular expressions
+    # an expression that will locate the strip image URLs strip in a page
+    # this can also be a list or tuple
     imageSearch = None
 
-    # compiled regular expression to store a text together with the image
+    # an expression to store a text together with the image
     # sometimes comic strips have additional text info for each comic
     textSearch = None
 
@@ -94,7 +111,7 @@ class _BasicScraper(object):
 
     def __cmp__(self, other):
         """Compare scraper by name and index list."""
-        if not isinstance(other, _BasicScraper):
+        if not isinstance(other, Scraper):
             return 1
         # first, order by name
         d = cmp(self.getName(), other.getName())
@@ -111,26 +128,22 @@ class _BasicScraper(object):
         """Determine if search for images in given URL should be skipped."""
         return False
 
-    def getComicStrip(self, url, data, baseUrl):
+    def getComicStrip(self, url, data):
         """Get comic strip downloader for given URL and data."""
-        imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch)
+        imageUrls = self.fetchUrls(url, data, self.imageSearch)
         # map modifier function on image URLs
         imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
         # remove duplicate URLs
         imageUrls = set(imageUrls)
         if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
-            patterns = [x.pattern for x in makeSequence(self.imageSearch)]
-            out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
+            out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch)))
             image = sorted(imageUrls)[0]
-            out.warn(u"choosing image %s" % image)
+            out.warn(u"Choosing image %s" % image)
             imageUrls = (image,)
         elif not imageUrls:
-            patterns = [x.pattern for x in makeSequence(self.imageSearch)]
-            out.warn(u"found no images at %s with patterns %s" % (url, patterns))
+            out.warn(u"Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch)))
         if self.textSearch:
-            text = fetchText(url, data, self.textSearch, optional=self.textOptional)
-            if text:
-                text = unescape(text).strip()
+            text = self.fetchText(url, data, self.textSearch, optional=self.textOptional)
         else:
             text = None
         return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session, text=text)
@@ -167,13 +180,13 @@ class _BasicScraper(object):
         seen_urls = set()
         while url:
             out.info(u'Get strip URL %s' % url, level=1)
-            data, baseUrl = getPageContent(url, self.session)
+            data = self.getPage(url)
             if self.shouldSkipUrl(url, data):
                 out.info(u'Skipping URL %s' % url)
                 self.skippedUrls.add(url)
             else:
                 try:
-                    yield self.getComicStrip(url, data, baseUrl)
+                    yield self.getComicStrip(url, data)
                 except ValueError as msg:
                     # image not found
                     out.exception(msg)
@@ -185,7 +198,7 @@ class _BasicScraper(object):
                 maxstrips -= 1
                 if maxstrips <= 0:
                     break
-            prevUrl = self.getPrevUrl(url, data, baseUrl)
+            prevUrl = self.getPrevUrl(url, data)
             seen_urls.add(url)
             if prevUrl in seen_urls:
                 # avoid recursive URL loops
@@ -196,18 +209,18 @@ class _BasicScraper(object):
                 # wait up to 2 seconds for next URL
                 time.sleep(1.0 + random.random())
 
-    def getPrevUrl(self, url, data, baseUrl):
+    def getPrevUrl(self, url, data):
         """Find previous URL."""
         prevUrl = None
         if self.prevSearch:
             try:
-                prevUrl = fetchUrl(url, data, baseUrl, self.prevSearch)
+                prevUrl = self.fetchUrl(url, data, self.prevSearch)
             except ValueError as msg:
                 # assume there is no previous URL, but print a warning
                 out.warn(u"%s Assuming no previous comic strips exist." % msg)
             else:
                 prevUrl = self.prevUrlModifier(prevUrl)
-                out.debug(u"Matched previous URL %s" % prevUrl)
+                out.debug(u"Found previous URL %s" % prevUrl)
                 getHandler().comicPageLink(self.getName(), url, prevUrl)
         return prevUrl
 
@@ -278,6 +291,186 @@ class _BasicScraper(object):
                 with open(filename, 'w') as f:
                     f.write('All comics should be downloaded here.')
 
+    @classmethod
+    def getPage(cls, url):
+        """
+        Fetch a page and return the opaque repesentation for the data parameter
+        of fetchUrls and fetchText.
+
+        Implementation notes: While this base class does not restrict how the
+        returned data is structured, subclasses (specific scrapers) should specify
+        how this data works, since the stracture is passed into different methods
+        which can be defined by comic modules and these methods should be able to
+        use the data if they so desire... (Affected methods: shouldSkipUrl,
+        imageUrlModifier)
+        """
+        raise ValueError("No implementation for getPage!")
+
+    @classmethod
+    def fetchUrls(cls, url, data, urlSearch):
+        raise ValueError("No implementation for fetchUrls!")
+
+    @classmethod
+    def fetchUrl(cls, url, data, urlSearch):
+        return cls.fetchUrls(url, data, urlSearch)[0]
+
+    @classmethod
+    def fetchText(cls, url, data, textSearch, optional):
+        raise ValueError("No implementation for fetchText!")
+
+    @classmethod
+    def getDisabledReasons(cls):
+        """
+        Get a dict of reasons why this comic module is disabled. The key is a
+        short (unique) identifier, the value is a string explaining why the
+        module is deactivated. If the module is not disabled, just return an
+        empty dict.
+        """
+        return {}
+
+
+class _BasicScraper(Scraper):
+    """
+    Scraper base class that matches regular expressions against HTML pages.
+
+    Subclasses of this scraper should use compiled regular expressions as
+    values for prevSearch, imageSearch and textSearch.
+
+    Implementation note: The return value of getPage is a tuple: the first
+    element is the raw HTML page text, the second element is the base URL (if
+    any).
+    """
+
+    BASE_SEARCH = re.compile(tagre("base", "href", '([^"]*)'))
+
+    @classmethod
+    def getPage(cls, url):
+        content = getPageContent(url, cls.session)
+        # determine base URL
+        baseUrl = None
+        match = cls.BASE_SEARCH.search(content)
+        if match:
+            baseUrl = match.group(1)
+        else:
+            baseUrl = url
+        return (content, baseUrl)
+
+    @classmethod
+    def fetchUrls(cls, url, data, urlSearch):
+        """Search all entries for given URL pattern(s) in a HTML page."""
+        searchUrls = []
+        searches = makeSequence(urlSearch)
+        for search in searches:
+            for match in search.finditer(data[0]):
+                searchUrl = match.group(1)
+                if not searchUrl:
+                    raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
+                out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern))
+                searchUrls.append(normaliseURL(urljoin(data[1], searchUrl)))
+            if searchUrls:
+                # do not search other links if one pattern matched
+                break
+        if not searchUrls:
+            patterns = [x.pattern for x in searches]
+            raise ValueError("Patterns %s not found at URL %s." % (patterns, url))
+        return searchUrls
+
+    @classmethod
+    def fetchText(cls, url, data, textSearch, optional):
+        """Search text entry for given text pattern in a HTML page."""
+        if textSearch:
+            match = textSearch.search(data[0])
+            if match:
+                text = match.group(1)
+                out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
+                return unescape(text).strip()
+            if optional:
+                return None
+            else:
+                raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
+        else:
+            return None
+
+
+class _ParserScraper(Scraper):
+    """
+    Scraper base class that uses a HTML parser and XPath expressions.
+
+    All links are resolved before XPath searches are applied, so all URLs are
+    absolute!
+
+    Subclasses of this class should use XPath expressions as values for
+    prevSearch, imageSearch and textSearch. When the XPath directly selects an
+    attribute, it is used as the output.
+
+    All those searches try to do something intelligent when they match a
+    complete HTML Element: prevSearch and imageSearch try to find a "link
+    attribute" and use that as URL. textSearch strips all tags from the content
+    of the HTML element and returns that.
+    """
+
+    # Switch between CSS and XPath selectors for this class. Since CSS needs
+    # another Python module, XPath is the default for now.
+    css = False
+
+    @classmethod
+    def getPage(cls, url):
+        tree = html.document_fromstring(getPageContent(url, cls.session))
+        tree.make_links_absolute(url)
+        return tree
+
+    @classmethod
+    def fetchUrls(cls, url, data, urlSearch):
+        """Search all entries for given XPath in a HTML page."""
+        searchUrls = []
+        if cls.css:
+            searchFun = data.cssselect
+        else:
+            searchFun = data.xpath
+        searches = makeSequence(urlSearch)
+        for search in searches:
+            for match in searchFun(search):
+                try:
+                    for attrib in html_link_attrs:
+                        if attrib in match.attrib:
+                            searchUrls.append(match.get(attrib))
+                except AttributeError:
+                    searchUrls.append(str(match))
+            if searchUrls:
+                # do not search other links if one pattern matched
+                break
+        if not searchUrls:
+            raise ValueError("XPath %s not found at URL %s." % (searches, url))
+        return searchUrls
+
+    @classmethod
+    def fetchText(cls, url, data, textSearch, optional):
+        """Search text entry for given text XPath in a HTML page."""
+        if textSearch:
+            text = ''
+            for match in data.xpath(textSearch):
+                try:
+                    text += ' ' + match.text_content()
+                except AttributeError:
+                    text += ' ' + unicode(match)
+            if text.strip() == '':
+                if optional:
+                    return None
+                else:
+                    raise ValueError("XPath %s did not match anything at URL %s." % (textSearch, url))
+            out.debug(u'Matched text %r with XPath %s' % (text, textSearch))
+            return unescape(text).strip()
+        else:
+            return None
+
+    @classmethod
+    def getDisabledReasons(cls):
+        res = {}
+        if cls.css and cssselect is None:
+            res['css'] = u"This module needs the cssselect (python-cssselect) python module which is not installed."
+        if html is None:
+            res['lxml'] = u"This module needs the lxml (python-lxml) python module which is not installed."
+        return res
 
 def find_scraperclasses(comic, multiple_allowed=False):
     """Get a list comic scraper classes. Can return more than one entries if
@@ -309,14 +502,14 @@ _scraperclasses = None
 def get_scraperclasses():
     """Find all comic scraper classes in the plugins directory.
     The result is cached.
-    @return: list of _BasicScraper classes
-    @rtype: list of _BasicScraper
+    @return: list of Scraper classes
+    @rtype: list of Scraper
     """
     global _scraperclasses
     if _scraperclasses is None:
         out.debug(u"Loading comic modules...")
         modules = loader.get_modules('plugins')
-        plugins = loader.get_plugins(modules, _BasicScraper)
+        plugins = loader.get_plugins(modules, Scraper)
         _scraperclasses = list(plugins)
         check_scrapers()
         out.debug(u"... %d modules loaded." % len(_scraperclasses))
diff --git a/dosagelib/util.py b/dosagelib/util.py
index c0f0df3f3..b16cc07d1 100644
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@@ -7,9 +7,9 @@ try:
 except ImportError:
     from urllib import quote as url_quote, unquote as url_unquote
 try:
-    from urllib.parse import urlparse, urlunparse, urljoin, urlsplit
+    from urllib.parse import urlparse, urlunparse, urlsplit
 except ImportError:
-    from urlparse import urlparse, urlunparse, urljoin, urlsplit
+    from urlparse import urlparse, urlunparse, urlsplit
 try:
     from urllib import robotparser
 except ImportError:
@@ -176,8 +176,6 @@ def case_insensitive_re(name):
     return "".join("[%s%s]" % (c.lower(), c.upper()) for c in name)
 
 
-baseSearch = re.compile(tagre("base", "href", '([^"]*)'))
-
 def isValidPageContent(data):
     """Check if page content is empty or has error messages."""
     # The python requests library sometimes returns empty data.
@@ -203,14 +201,7 @@ def getPageContent(url, session, max_content_bytes=MaxContentBytes):
     if not isValidPageContent(data):
         raise ValueError("Got invalid page content from %s: %r" % (url, data))
     out.debug(u"Got page content %r" % data, level=3)
-    # determine base URL
-    baseUrl = None
-    match = baseSearch.search(data)
-    if match:
-        baseUrl = match.group(1)
-    else:
-        baseUrl = url
-    return data, baseUrl
+    return data
 
 
 def getImageObject(url, referrer, session, max_content_bytes=MaxImageBytes):
@@ -226,40 +217,16 @@ def makeSequence(item):
     return (item,)
 
 
-def fetchUrls(url, data, baseUrl, urlSearch):
-    """Search all entries for given URL pattern(s) in a HTML page."""
-    searchUrls = []
-    searches = makeSequence(urlSearch)
-    for search in searches:
-        for match in search.finditer(data):
-            searchUrl = match.group(1)
-            if not searchUrl:
-                raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
-            out.debug(u'matched URL %r with pattern %s' % (searchUrl, search.pattern))
-            searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
-        if searchUrls:
-            # do not search other links if one pattern matched
-            break
-    if not searchUrls:
-        patterns = [x.pattern for x in searches]
-        raise ValueError("Patterns %s not found at URL %s." % (patterns, url))
-    return searchUrls
-
-
-def fetchUrl(url, data, baseUrl, urlSearch):
-    """Search first URL entry for given URL pattern in a HTML page."""
-    return fetchUrls(url, data, baseUrl, urlSearch)[0]
-
-
-def fetchText(url, data, textSearch, optional=False):
-    """Search text entry for given text pattern in a HTML page."""#
-    match = textSearch.search(data)
-    if match:
-        text = match.group(1)
-        out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
-        return text
-    if not optional:
-        raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
+def prettyMatcherList(things):
+    """Try to construct a nicely-formatted string for a list of matcher
+    objects. Those may be compiled regular expressions or strings..."""
+    norm = []
+    for x in makeSequence(things):
+        if hasattr(x, 'pattern'):
+            norm.append(x.pattern)
+        else:
+            norm.append(x)
+    return "('%s')" % "', '".join(norm)
 
 
 _htmlparser = HTMLParser()
diff --git a/requirements.txt b/requirements.txt
index a37178243..d43beb4bd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@
 requests
 # optional:
 argcomplete
+lxml