Merge pull request #169 from webcomics/xpath-ext

Replace xpath_class function with an XPath extension
2020-08-03 22:18:52 +02:00 · 2020-08-03 22:18:52 +02:00 · 912b30191d
commit 912b30191d
parent bd44fdbb79 7a176b29f2
26 changed files with 148 additions and 117 deletions
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@ -62,9 +62,3 @@ def indirectStarter(self):
    data = self.getPage(url)
    newurl = self.fetchUrl(url, data, self.latestSearch)
    return self.link_modifier(url, newurl)
-
-
-def xpath_class(name):
-    """Returns an XPath expressions which finds a tag which has a specified
-    class."""
-    return 'contains(concat(" ", @class, " "), " %s ")' % name
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@ -7,7 +7,7 @@ from re import compile, escape

 from ..util import tagre
 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic


@ -71,7 +71,7 @@ class Baroquen(_BasicScraper):
 class Bearmageddon(_WordPressScraper):
    url = 'http://bearmageddon.com/bearmo/page-1/'
    firstStripUrl = url
-    latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
+    latestSearch = '//a[d:class("comic-nav-last")]'
    starter = indirectStarter


@ -187,8 +187,8 @@ class BlankIt(_ParserScraper):
    url = 'http://blankitcomics.com/'
    firstStripUrl = url + 'comic/well-what-would-you-do'
    imageSearch = '//div[@id="comic"]//img'
-    prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
-    latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
+    prevSearch = '//a[d:class("comic-nav-previous")]'
+    latestSearch = '//a[d:class("comic-nav-last")]'
    starter = indirectStarter


@ -235,7 +235,7 @@ class BMovieComic(_BasicScraper):

 class BobWhite(_ParserScraper):
    url = 'http://www.bobwhitecomics.com/'
-    imageSearch = '//span[%s]/img' % xpath_class('webcomic-object')
+    imageSearch = '//span[d:class("webcomic-object")]/img'
    prevSearch = '//a[@rel="previous"]'


@ -296,7 +296,6 @@ class ButImACatPerson(_WordPressScraper):
    endOfLife = True


-
 class ButtercupFestival(_ParserScraper):
    url = 'http://www.buttercupfestival.com/'
    stripUrl = url + '%s.htm'
--- a/dosagelib/plugins/clonemanga.py
+++ b/dosagelib/plugins/clonemanga.py
@ -1,15 +1,15 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
-from ..helpers import indirectStarter, xpath_class
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
+from ..helpers import indirectStarter
 from ..scraper import _ParserScraper
 from ..util import getQueryParams


 class CloneManga(_ParserScraper):
    baseUrl = 'http://manga.clone-army.org'
-    imageSearch = '//div[%s]//img' % xpath_class('subsectionContainer')
+    imageSearch = '//div[d:class("subsectionContainer")]//img'
    prevSearch = '//a[span[text()="<<"]]'
    latestSearch = '//a[span[text()=">|"]]'
    starter = indirectStarter
--- a/dosagelib/plugins/comicfury.py
+++ b/dosagelib/plugins/comicfury.py
@ -6,10 +6,10 @@
 import os

 from ..scraper import _ParserScraper
-from ..helpers import bounceStarter, xpath_class
+from ..helpers import bounceStarter

-XPATH_LINK = '//a[%s and contains(text(), "%s")]'
-XPATH_IMG = '//div[{}]//a[img[contains(@alt, "%s")]]'.format(xpath_class('comicnav'))
+XPATH_LINK = '//a[d:class("%s") and contains(text(), "%s")]'
+XPATH_IMG = '//div[d:class("comicnav")]//a[img[contains(@alt, "%s")]]'


 class ComicFury(_ParserScraper):
@ -21,12 +21,12 @@ class ComicFury(_ParserScraper):
        # 137 (needs to be before the generic a@rel, because layout is wrong)
        '//a[contains(@title, "previous")]',
        '//a[@rel="prev"]',
-        XPATH_LINK % (xpath_class("comicnavlink"), "Previous"),
+        XPATH_LINK % ('comicnavlink', 'Previous'),
        XPATH_IMG % ('Previous'),
        # Art, ConsolersDLC, etc.
        u'//nav//a[contains(text(), "\u2039")]',
        # LatchkeyKingdom
-        '//a[%s and img[contains(@src, "Previous")]]' % xpath_class('navi'),
+        '//a[d:class("navi") and img[contains(@src, "Previous")]]',
        # RedSpot
        '//a[contains(text(), "Back")]',
        # KATRAN
@ -37,12 +37,12 @@ class ComicFury(_ParserScraper):
        # 137 (see above)
        '//a[contains(@title, "next")]',
        '//a[@rel="next"]',
-        XPATH_LINK % (xpath_class("comicnavlink"), "Next"),
+        XPATH_LINK % ('comicnavlink', 'Next'),
        XPATH_IMG % ('Next'),
        # Art, ConsolersDLC, etc.
        u'//nav//a[contains(text(), "\u203A")]',
        # LatchkeyKingdom
-        '//a[%s and img[contains(@src, "Next")]]' % xpath_class('navi'),
+        '//a[d:class("navi") and img[contains(@src, "Next")]]',
        # RedSpot, KATRAN
        '//a[contains(text(), "Next")]',
    )
--- a/dosagelib/plugins/common.py
+++ b/dosagelib/plugins/common.py
@ -4,7 +4,6 @@
 # Copyright (C) 2015-2020 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from ..scraper import _ParserScraper
-from ..helpers import indirectStarter, xpath_class

 # Common base classes for comics with the same structure (same hosting
 # software, for example) go here. Since those are shared by many modules,
@ -14,24 +13,24 @@ from ..helpers import indirectStarter, xpath_class

 class _WordPressScraper(_ParserScraper):
    imageSearch = '//div[@id="comic"]//img'
-    prevSearch = '//a[%s]' % xpath_class('comic-nav-previous')
-    nextSearch = '//a[%s]' % xpath_class('comic-nav-next')
-    latestSearch = '//a[%s]' % xpath_class('comic-nav-last')
+    prevSearch = '//a[d:class("comic-nav-previous")]'
+    nextSearch = '//a[d:class("comic-nav-next")]'
+    latestSearch = '//a[d:class("comic-nav-last")]'


 class _WPNavi(_WordPressScraper):
-    prevSearch = '//a[%s]' % xpath_class('navi-prev')
+    prevSearch = '//a[d:class("navi-prev")]'


 class _WPNaviIn(_WordPressScraper):
-    prevSearch = '//a[%s]' % xpath_class('navi-prev-in')
+    prevSearch = '//a[d:class("navi-prev-in")]'


 class _WPWebcomic(_WordPressScraper):
-    imageSearch = '//div[{}]//img'.format(xpath_class('webcomic-image'))
-    prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
-    nextSearch = '///a[{}]'.format(xpath_class('next-webcomic-link'))
-    latestSearch = '//a[{}]'.format(xpath_class('last-webcomic-link'))
+    imageSearch = '//div[d:class("webcomic-image")]//img'
+    prevSearch = '//a[d:class("previous-webcomic-link")]'
+    nextSearch = '///a[d:class("next-webcomic-link")]'
+    latestSearch = '//a[d:class("last-webcomic-link")]'


 class _ComicControlScraper(_ParserScraper):
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -6,7 +6,7 @@
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, bounceStarter, xpath_class
+from ..helpers import indirectStarter, bounceStarter
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNaviIn, _WPWebcomic

@ -190,8 +190,8 @@ class Dilbert(_ParserScraper):
    stripUrl = url + 'strip/%s'
    firstStripUrl = stripUrl % '1989-04-16'
    starter = indirectStarter
-    prevSearch = '//div[%s]/a' % xpath_class('nav-left')
-    imageSearch = '//img[%s]' % xpath_class('img-comic')
+    prevSearch = '//div[d:class("nav-left")]/a'
+    imageSearch = '//img[d:class("img-comic")]'
    latestSearch = '//a[@class="img-comic-link"]'
    help = 'Index format: yyyy-mm-dd'

@ -260,14 +260,14 @@ class DominicDeegan(_ParserScraper):
 class DorkTower(_ParserScraper):
    url = 'http://www.dorktower.com/'
    firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/'
-    imageSearch = '//div[%s]//a/img' % xpath_class('entry-content')
-    prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')
+    imageSearch = '//div[d:class("entry-content")]//a/img'
+    prevSearch = '//a[d:class("btn")][text()="Previous"]'


 class DoomsdayMyDear(_ParserScraper):
    url = 'http://doomsdaymydear.com/'
-    imageSearch = '//img[{}]'.format(xpath_class('attachment-full'))
-    prevSearch = '//a[{}]'.format(xpath_class('previous-webcomic-link'))
+    imageSearch = '//img[d:class("attachment-full")]'
+    prevSearch = '//a[d:class("previous-webcomic-link")]'


 class Draconia(_WPWebcomic):
@ -307,10 +307,9 @@ class DresdenCodak(_ParserScraper):
    url = 'http://dresdencodak.com/'
    startUrl = url + 'cat/comic/'
    firstStripUrl = url + '2007/02/08/pom/'
-    imageSearch = '//section[%s]//img[%s]' % (
-        xpath_class('entry-content'), xpath_class('aligncenter'))
+    imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]'
    prevSearch = '//a[img[contains(@src, "prev")]]'
-    latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
+    latestSearch = '//a[d:class("tc-grid-bg-link")]'
    starter = indirectStarter

    # Blog and comic are mixed...
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -6,7 +6,7 @@
 import os
 from re import compile, IGNORECASE

-from ..helpers import bounceStarter, indirectStarter, xpath_class
+from ..helpers import bounceStarter, indirectStarter
 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi
@ -35,7 +35,7 @@ class EatLiver(_ParserScraper):
    url = 'http://www.eatliver.com/'
    starter = indirectStarter
    multipleImagesPerStrip = True
-    imageSearch = '//div[%s]//img' % xpath_class('post-content')
+    imageSearch = '//div[d:class("post-content")]//img'
    prevSearch = '//a[@rel="prev"]'
    latestSearch = '//a[@rel="bookmark"]'

@ -175,7 +175,7 @@ class Everblue(_ParserScraper):
 class EverybodyLovesEricRaymond(_ParserScraper):
    url = 'http://geekz.co.uk/lovesraymond/'
    firstStripUrl = url + 'archive/slashdotted'
-    imageSearch = '//div[%s]//img' % xpath_class('entry-content')
+    imageSearch = '//div[d:class("entry-content")]//img'
    prevSearch = '//a[@rel="prev"]'


@ -255,6 +255,6 @@ class ExtraOrdinary(_ParserScraper):
    url = 'https://www.exocomics.com/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '01'
-    prevSearch = '//a[%s]' % xpath_class('prev')
-    imageSearch = '//img[%s]' % xpath_class('image-style-main-comic')
+    prevSearch = '//a[d:class("prev")]'
+    imageSearch = '//img[d:class("image-style-main-comic")]'
    help = 'Index format: number'
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -7,7 +7,7 @@ from re import compile, escape

 from ..util import tagre
 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class
+from ..helpers import indirectStarter, joinPathPartsNamer
 from .common import _ComicControlScraper, _WPNaviIn, _WordPressScraper


@ -62,8 +62,8 @@ class FirstWorldProblems(_ParserScraper):
        'http://bradcolbow.com/archive/C5/')
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % 'P10'
-    imageSearch = '//div[{}]//img'.format(xpath_class('entry'))
-    prevSearch = '//a[{}]'.format(xpath_class('prev'))
+    imageSearch = '//div[d:class("entry")]//img'
+    prevSearch = '//a[d:class("prev")]'
    multipleImagesPerStrip = True
    endOfLife = True

@ -83,7 +83,7 @@ class Flemcomics(_ParserScraper):
        'http://www.flemcomics.com/')
    stripUrl = url + 'd/%s.html'
    firstStripUrl = stripUrl % '19980101'
-    imageSearch = '//img[{}]'.format(xpath_class('ksc'))
+    imageSearch = '//img[d:class("ksc")]'
    prevSearch = '//a[@rel="prev"]'
    endOfLife = True
    help = 'Index format: yyyymmdd'
@ -174,10 +174,10 @@ class FredoAndPidjin(_ParserScraper):
    url = 'https://www.pidjin.net/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
-    imageSearch = '//div[%s]//img' % xpath_class("episode")
+    imageSearch = '//div[d:class("episode")]//img'
    multipleImagesPerStrip = True
-    prevSearch = '//span[%s]/a' % xpath_class("prev")
-    latestSearch = '//section[%s]//a' % xpath_class("latest")
+    prevSearch = '//span[d:class("prev")]/a'
+    latestSearch = '//section[d:class("latest")]//a'
    starter = indirectStarter
    namer = joinPathPartsNamer((0, 1, 2))

--- a/dosagelib/plugins/gocomics.py
+++ b/dosagelib/plugins/gocomics.py
@ -3,14 +3,14 @@
 # Copyright (C) 2012-2014 Bastian Kleineidam
 # Copyright (C) 2015-2020 Tobias Gruetzmacher
 from ..scraper import _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter


 class GoComics(_ParserScraper):
    url = 'https://www.gocomics.com/'
-    imageSearch = '//picture[{}]/img'.format(xpath_class('item-comic-image'))
-    prevSearch = '//a[{}]'.format(xpath_class('js-previous-comic'))
-    latestSearch = '//div[{}]//a'.format(xpath_class('gc-deck--cta-0'))
+    imageSearch = '//picture[d:class("item-comic-image")]/img'
+    prevSearch = '//a[d:class("js-previous-comic")]'
+    latestSearch = '//div[d:class("gc-deck--cta-0")]//a'
    starter = indirectStarter
    help = 'Index format: yyyy/mm/dd'

--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -6,7 +6,7 @@ from re import compile, escape

 from ..scraper import _BasicScraper
 from ..util import tagre
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from .common import _ComicControlScraper


@ -41,7 +41,7 @@ class JoeAndMonkey(_BasicScraper):


 class JohnnyWander(_ComicControlScraper):
-    imageSearch = ('//ul[%s]/li/@data-src' % xpath_class('cc-showbig'),
+    imageSearch = ('//ul[d:class("cc-showbig")]/li/@data-src',
                   _ComicControlScraper.imageSearch)
    url = 'http://www.johnnywander.com/'

--- a/dosagelib/plugins/m.py
+++ b/dosagelib/plugins/m.py
@ -6,7 +6,7 @@
 import json
 from re import compile, escape, IGNORECASE

-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPWebcomic
@ -74,7 +74,7 @@ class MarriedToTheSea(_ParserScraper):
    url = 'http://marriedtothesea.com/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '022806'
-    imageSearch = '//div[%s]//p/img' % xpath_class('jumbotron')
+    imageSearch = '//div[d:class("jumbotron")]//p/img'
    prevSearch = '//a[contains(text(), "Yesterday")]'
    help = 'Index format: mmddyy'

--- a/dosagelib/plugins/n.py
+++ b/dosagelib/plugins/n.py
@ -6,7 +6,7 @@
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPWebcomic

@ -134,7 +134,7 @@ class Nimona(_ParserScraper):
        'http://gingerhaze.com/nimona/')
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % "page-1"
-    imageSearch = '//div[{}]//img'.format(xpath_class('field-name-field-comic-page'))
+    imageSearch = '//div[d:class("field-name-field-comic-page")]//img'
    prevSearch = '//a[img[contains(@src, "/comicdrop_prev_label")]]'
    endOfLife = True

--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -6,7 +6,7 @@
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import bounceStarter, queryNamer, indirectStarter, xpath_class
+from ..helpers import bounceStarter, queryNamer, indirectStarter
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi

@ -99,8 +99,8 @@ class PennyArcade(_ParserScraper):
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '1998/11/18'
    imageSearch = '//div[@id="comicFrame"]//img'
-    prevSearch = '//a[%s]' % xpath_class('btnPrev')
-    nextSearch = '//a[%s]' % xpath_class('btnNext')
+    prevSearch = '//a[d:class("btnPrev")]'
+    nextSearch = '//a[d:class("btnNext")]'
    starter = bounceStarter
    help = 'Index format: yyyy/mm/dd'

@ -231,7 +231,7 @@ class PokeyThePenguin(_ParserScraper):
 class PoorlyDrawnLines(_ParserScraper):
    url = 'http://poorlydrawnlines.com/comic/'
    firstStripUrl = url + 'campus-characters/'
-    imageSearch = '//div[%s]//img' % xpath_class('comic')
+    imageSearch = '//div[d:class("comic")]//img'
    prevSearch = '//a[@rel="prev"]'


@ -269,7 +269,7 @@ class PrinceOfSartar(_WPNavi):
    url = 'http://www.princeofsartar.com/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % 'introduction-chapter-1'
-    nextSearch = '//a[%s]' % xpath_class('navi-next')
+    nextSearch = '//a[d:class("navi-next")]'
    starter = bounceStarter
    help = 'Index format: name'

--- a/dosagelib/plugins/q.py
+++ b/dosagelib/plugins/q.py
@ -4,7 +4,6 @@
 # Copyright (C) 2015-2020 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from ..scraper import _ParserScraper
-from ..helpers import xpath_class


 class QuantumVibe(_ParserScraper):
@ -28,6 +27,6 @@ class Qwantz(_ParserScraper):
    url = 'http://www.qwantz.com/index.php'
    stripUrl = url + '?comic=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//img[{}]'.format(xpath_class('comic'))
+    imageSearch = '//img[d:class("comic")]'
    prevSearch = '//a[@rel="prev"]'
    help = 'Index format: n'
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -6,7 +6,7 @@
 from re import compile
 from urllib.parse import urljoin

-from ..helpers import bounceStarter, xpath_class
+from ..helpers import bounceStarter
 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
 from .common import _WordPressScraper, _WPWebcomic
@ -107,7 +107,7 @@ class RomanticallyApocalyptic(_ParserScraper):
    url = 'http://romanticallyapocalyptic.com/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '0'
-    imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel')
+    imageSearch = '//div[d:class("comicpanel")]/center//img'
    prevSearch = '//a[@accesskey="p"]'
    help = 'Index format: n'
    adult = True
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -7,7 +7,7 @@ from re import compile, escape, IGNORECASE, sub
 from os.path import splitext

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
+from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic

@ -120,7 +120,7 @@ class SchoolBites(_ParserScraper):
    url = ('https://web.archive.org/web/20170215065523/'
        'http://schoolbites.net/')
    stripUrl = url + 'd/%s.html'
-    imageSearch = '//img[{}]'.format(xpath_class('ksc'))
+    imageSearch = '//img[d:class("ksc")]'
    prevSearch = '//a[@rel="prev"]'
    endOfLife = True
    help = 'Index format: yyyymmdd'
@ -132,7 +132,7 @@ class Schuelert(_ParserScraper):
    stripUrl = url + 'index.php?paged=%s'
    firstStripUrl = stripUrl % '3'
    imageSearch = '//img[contains(@src, "wp-content")]'
-    prevSearch = '//span[{}]/a'.format(xpath_class('prevlink'))
+    prevSearch = '//span[d:class("prevlink")]/a'
    multipleImagesPerStrip = True
    endOfLife = True
    lang = 'de'
@ -143,7 +143,7 @@ class Science(_ParserScraper):
        'http://sci-ence.org/%s/')
    url = stripUrl % 'new-york-comic-con-2013'
    firstStripUrl = stripUrl % 'periodic-table-element-ass'
-    prevSearch = '//a[{}]'.format(xpath_class('navi-prev'))
+    prevSearch = '//a[d:class("navi-prev")]'
    imageSearch = '//div[@class="comicpane"]//img'
    endOfLife = True

@ -159,7 +159,7 @@ class SequentialArt(_ParserScraper):
    url = 'https://www.collectedcurios.com/sequentialart.php'
    stripUrl = url + '?s=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//img[{}]'.format(xpath_class('w3-image'))
+    imageSearch = '//img[d:class("w3-image")]'
    prevSearch = '//a[@id="backOne"]'
    help = 'Index format: name'

@ -286,9 +286,9 @@ class SluggyFreelance(_ParserScraper):
    url = 'http://sluggy.com/'
    stripUrl = 'http://archives.sluggy.com/book.php?chapter=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//div[%s]/img/@data-src' % xpath_class('comic_content')
-    prevSearch = '//div[%s]/a' % xpath_class('previous')
-    latestSearch = '//a[%s]' % xpath_class('archives_link')
+    imageSearch = '//div[d:class("comic_content")]/img/@data-src'
+    prevSearch = '//div[d:class("previous")]/a'
+    latestSearch = '//a[d:class("archives_link")]'
    starter = indirectStarter
    multipleImagesPerStrip = True
    help = 'Index format: chapter'
@ -374,7 +374,7 @@ class SpaceJunkArlia(_ParserScraper):
    url = 'http://spacejunkarlia.com/'
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '0'
-    imageSearch = '//div[%s]/img' % xpath_class('content')
+    imageSearch = '//div[d:class("content")]/img'
    prevSearch = '//a[text()="<"]'
    help = 'Index format: number'

@ -382,7 +382,7 @@ class SpaceJunkArlia(_ParserScraper):
 class SpaceTrawler(_ParserScraper):
    url = 'https://www.baldwinpage.com/spacetrawler/'
    firstStripUrl = url + '2010/01/01/spacetrawler-4/'
-    imageSearch = '//img[%s]' % xpath_class('size-full')
+    imageSearch = '//img[d:class("size-full")]'
    prevSearch = '//a[@rel="prev"]'


--- a/dosagelib/plugins/smackjeeves.py
+++ b/dosagelib/plugins/smackjeeves.py
@ -3,14 +3,13 @@
 # Copyright (C) 2019-2020 Daniel Ring
 import re

-from ..helpers import xpath_class
 from ..scraper import _ParserScraper


 class SmackJeeves(_ParserScraper):
    baseUrl = 'https://www.smackjeeves.com/discover/'
    apiBase = 'https://www.smackjeeves.com/api/discover/'
-    prevSearch = '//a[i[{}]]'.format(xpath_class('i-arrow-double-left-black'))
+    prevSearch = '//a[i[d:class("i-arrow-double-left-black")]]'
    imageSearch = re.compile("comicData:[^']*'([^']*)'", re.DOTALL)
    help = 'Index format: n'

--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -10,7 +10,7 @@ except ImportError:
    from cached_property import cached_property

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic

@ -40,8 +40,8 @@ class TheBrads(_ParserScraper):
        'http://bradcolbow.com/archive/C4/')
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % 'P125'
-    imageSearch = '//div[{}]//img'.format(xpath_class('entry'))
-    prevSearch = '//a[{}]'.format(xpath_class('prev'))
+    imageSearch = '//div[d:class("entry")]//img'
+    prevSearch = '//a[d:class("prev")]'
    multipleImagesPerStrip = True
    endOfLife = True

@ -120,7 +120,7 @@ class TheLandscaper(_ParserScraper):
        'http://landscaper.visual-assault.net/comic/%s')
    url = stripUrl % 'latest'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//article[{}]//img[1]'.format(xpath_class('comic'))
+    imageSearch = '//article[d:class("comic")]//img[1]'
    prevSearch = '//a[contains(text(), "Previous")]'
    endOfLife = True

@ -294,8 +294,8 @@ class TumbleDryComics(_WordPressScraper):
 class Turnoff(_ParserScraper):
    name = 'turnoff'
    url = 'https://turnoff.us/'
-    imageSearch = '//article[%s]//img' % xpath_class('post-content')
-    prevSearch = '//div[%s]//a' % xpath_class('prev')
+    imageSearch = '//article[d:class("post-content")]//img'
+    prevSearch = '//div[d:class("prev")]//a'
    stripUrl = url + 'geek/%s'
    firstStripUrl = stripUrl % 'tcp-buddies'
    multipleImagesPerStrip = True
@ -341,8 +341,8 @@ class Twokinds(_ParserScraper):
    url = 'http://twokinds.keenspot.com/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//article[%s]//img' % xpath_class('comic')
-    prevSearch = '//a[%s]' % xpath_class('navprev')
+    imageSearch = '//article[d:class("comic")]//img'
+    prevSearch = '//a[d:class("navprev")]'
    help = 'Index format: n (unpadded)'


--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@ -6,7 +6,7 @@
 from re import compile

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi

@ -46,7 +46,7 @@ class Unsounded(_ParserScraper):
    stripUrl = url + 'comic/ch%s/ch%s_%s.html'
    firstStripUrl = stripUrl % ('01', '01', '01')
    imageSearch = '//img[contains(@src, "pageart/")]'
-    prevSearch = '//a[%s]' % xpath_class('back')
+    prevSearch = '//a[d:class("back")]'
    latestSearch = '//div[@id="chapter_box"][1]//a[last()]'
    multipleImagesPerStrip = True
    starter = indirectStarter
--- a/dosagelib/plugins/v.py
+++ b/dosagelib/plugins/v.py
@ -6,7 +6,7 @@
 from re import compile

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import bounceStarter, indirectStarter, xpath_class
+from ..helpers import bounceStarter, indirectStarter
 from ..util import tagre


@ -71,8 +71,8 @@ class VictimsOfTheSystem(_BasicScraper):
 class ViiviJaWagner(_ParserScraper):
    url = 'http://www.hs.fi/viivijawagner/'
    imageSearch = '//meta[@property="og:image"]/@content'
-    prevSearch = '//a[%s]' % xpath_class('prev')
-    latestSearch = '//div[%s]//a' % xpath_class('cartoon-content')
+    prevSearch = '//a[d:class("prev")]'
+    latestSearch = '//div[d:class("cartoon-content")]//a'
    starter = indirectStarter
    lang = 'fi'

--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@ -7,7 +7,7 @@ from re import compile, escape, IGNORECASE

 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
-from ..helpers import bounceStarter, indirectStarter, xpath_class
+from ..helpers import bounceStarter, indirectStarter
 from .common import _ComicControlScraper, _WPNavi, _WPNaviIn, _WPWebcomic


@ -28,8 +28,8 @@ class WastedTalent(_BasicScraper):

 class WebcomicName(_ParserScraper):
    url = 'https://webcomicname.com/'
-    imageSearch = '//figure[{}]//img'.format(xpath_class('tmblr-full'))
-    prevSearch = '//a[{}]'.format(xpath_class('next'))
+    imageSearch = '//figure[d:class("tmblr-full")]//img'
+    prevSearch = '//a[d:class("next")]'
    multipleImagesPerStrip = True


@ -38,10 +38,10 @@ class WebDesignerCOTW(_ParserScraper):
    url = baseUrl + 'category/comics/'
    starter = indirectStarter
    firstStripUrl = baseUrl + '2009/11/comics-of-the-week-1/'
-    imageSearch = '//article[%s]//img' % xpath_class('article-content')
+    imageSearch = '//article[d:class("article-content")]//img'
    multipleImagesPerStrip = True
-    prevSearch = '//a[span[%s]]' % xpath_class('icon-right-small')
-    latestSearch = '//a[%s]' % xpath_class('anim-link')
+    prevSearch = '//a[span[d:class("icon-right-small")]]'
+    latestSearch = '//a[d:class("anim-link")]'

    def shouldSkipUrl(self, url, data):
        """Skip non-comic URLs."""
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -1,12 +1,12 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2019 Tobias Gruetzmacher
+# Copyright (C) 2015-2020 Tobias Gruetzmacher
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
-from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class
+from ..helpers import bounceStarter, joinPathPartsNamer
 from .common import _WPNavi


@ -21,8 +21,8 @@ class Zapiro(_ParserScraper):
    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
    imageSearch = '//div[@id="cartoon"]/img'
-    prevSearch = '//a[%s]' % xpath_class('left')
-    nextSearch = '//a[%s]' % xpath_class('right')
+    prevSearch = '//a[d:class("left")]'
+    nextSearch = '//a[d:class("right")]'
    namer = joinPathPartsNamer((-1,), ())


@ -31,8 +31,8 @@ class ZenPencils(_WPNavi):
    multipleImagesPerStrip = True
    firstStripUrl = url + 'comic/1-ralph-waldo-emerson-make-them-cry/'
    starter = bounceStarter
-    prevSearch = '//a[%s]' % xpath_class('navi-prev')
-    nextSearch = '//a[%s]' % xpath_class('navi-next')
+    prevSearch = '//a[d:class("navi-prev")]'
+    nextSearch = '//a[d:class("navi-next")]'


 class ZombieHunters(_BasicScraper):
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -26,6 +26,7 @@ from .util import (get_page, makeSequence, get_system_uid, tagre, normaliseURL,
 from .comic import ComicStrip
 from .output import out
 from .events import getHandler
+from .xml import NS


 ARCHIVE_ORG_URL = re.compile(r'https?://web\.archive\.org/web/[^/]*/')
@ -434,10 +435,6 @@ class _ParserScraper(Scraper):
    XML_DECL = re.compile(
        r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)

-    NS = {
-        "re": "http://exslt.org/regular-expressions"
-    }
-
    # Switch between CSS and XPath selectors for this class. Since CSS needs
    # another Python module, XPath is the default for now.
    css = False
@ -519,7 +516,7 @@ class _ParserScraper(Scraper):
            searchFun = data.cssselect
        else:
            def searchFun(s):
-                return data.xpath(s, namespaces=self.NS)
+                return data.xpath(s, namespaces=NS)
        patterns = makeSequence(patterns)
        for search in patterns:
            matched = False
--- a/dosagelib/xml.py
+++ b/dosagelib/xml.py
@ -0,0 +1,20 @@
+# SPDX-License-Identifier: MIT
+# Copyright (C) 2020 Tobias Gruetzmacher
+from lxml import etree
+
+
+NS = {
+    'd': 'https://dosage.rocks/xpath',
+    're': 'http://exslt.org/regular-expressions'
+}
+
+
+def find_by_class(context, cls):
+    attributes = context.context_node.attrib
+    if 'class' in attributes:
+        return cls in attributes['class'].split(' ')
+    return False
+
+
+dosagens = etree.FunctionNamespace(NS['d'])
+dosagens['class'] = find_by_class
--- a/tests/httpmocks.py
+++ b/tests/httpmocks.py
@ -14,7 +14,7 @@ def _file(name):


@lru_cache()
-def _content(name):
+def content(name):
    with gzip.open(_file(name + '.html.gz'), 'r') as f:
        return f.read()

@ -26,7 +26,7 @@ def _img(name):


 def page(url, pagename):
-    add(GET, url, _content(pagename))
+    add(GET, url, content(pagename))


 def png(url, name='empty'):
--- a/tests/test_xml.py
+++ b/tests/test_xml.py
@ -0,0 +1,25 @@
+# SPDX-License-Identifier: MIT
+# Copyright (C) 2020 Tobias Gruetzmacher
+
+from lxml import html
+
+from dosagelib.xml import NS
+
+import httpmocks
+
+
+tree = html.document_fromstring(httpmocks.content('zp-222'))
+
+
+class TestXML:
+    def xpath(self, path):
+        return tree.xpath(path, namespaces=NS)
+
+    def test_class_ext(self):
+        assert len(self.xpath('//li[d:class("menu-item-3773")]')) == 1
+        assert len(self.xpath('//ul[d:class("menu")]')) == 1
+        assert len(self.xpath('//li[d:class("menu-item-object-custom")]')) == 2
+        assert len(self.xpath('//li[d:class("menu-item")]')) == 25
+
+    def test_re_ext(self):
+        assert len(self.xpath(r'//img[re:test(@src, "posters.*jpg")]')) == 1