Fix a bunch of comic modules.

2016-10-31 06:57:47 +01:00 · 2016-10-31 06:57:47 +01:00 · 47e2502ec7
commit 47e2502ec7
parent 446b81fc45
10 changed files with 52 additions and 105 deletions
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -167,11 +167,12 @@ class DMFA(_BasicScraper):
    help = 'Index format: nnn (normally, some specials)'


-class DoemainOfOurOwn(_BasicScraper):
+class DoemainOfOurOwn(_ParserScraper):
    url = 'http://www.doemain.com/'
    stripUrl = url + 'index.cgi/%s'
-    imageSearch = compile(r"<img border='0' width='\d+' height='\d+' src='(/strips/\d{4}/\d{6}-[^\']+)'")
-    prevSearch = compile(r'<a href="(/index\.cgi/\d{4}-\d{2}-\d{2})"><img width="\d+" height="\d+" border="\d+" alt="Previous Strip"')
+    imageSearch = '//td/img[contains(@src, "/strips/")]'
+    prevSearch = '//a[img[@alt="Previous Strip"]]'
+    endOfLife = True
    help = 'Index format: yyyy-mm-dd'


@ -194,17 +195,11 @@ class DominicDeegan(_BasicScraper):
    help = 'Index format: yyyy-mm-dd'


-class DorkTower(_BasicScraper):
+class DorkTower(_ParserScraper):
    url = 'http://www.dorktower.com/'
-    rurl = escape(url)
-    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
-    imageSearch = compile(tagre("div", "class", "entry-content") +
-                          "\s*<p>\s*" +
-                          tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl,
-                                after=' alt'))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + "Previous")
-    help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
+    firstStripUrl = url + '1997/01/01/shadis-magazine-strip-1/'
+    imageSearch = '//div[%s]//a/img' % xpath_class('entry-content')
+    prevSearch = '//a[%s][text()="Previous"]' % xpath_class('btn')


 class Dracula(_BasicScraper):
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -56,17 +56,10 @@ class EatLiver(_ParserScraper):
    latestSearch = '//a[@rel="bookmark"]'


-class EatThatToast(_BasicScraper):
+class EatThatToast(_WordPressScraper):
    url = 'http://eatthattoast.com/'
-    rurl = escape(url)
-    stripUrl = url + 'comic/%s'
-    firstStripUrl = stripUrl % 'thewizard/'
-    imageSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl,
-                               after='comic-nav-base comic-nav-previous'))
-    textSearch = compile(tagre("div", "id", r'comic') + "\s*.*\s*" +
-                         tagre("img", "alt", r'([^"]+)'))
-    help = 'Index Format: name'
+    firstStripUrl = url + 'comic/thewizard/'
+    textSearch = _WordPressScraper.imageSearch + '/@alt'


 class EdibleDirt(_BasicScraper):
@ -225,13 +218,12 @@ class ExtraLife(_BasicScraper):
    help = 'Index format: stripname'


-class ExtraOrdinary(_BasicScraper):
+class ExtraOrdinary(_ParserScraper):
    url = 'http://www.exocomics.com/'
-    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '01'
-    prevSearch = compile(tagre("a", "href", r'(%s\d+)' % rurl, before="prev"))
-    imageSearch = compile(tagre("img", "src", r'(%scomics/comics/\d+\.[^"]+)' % rurl))
+    prevSearch = '//a[%s]' % xpath_class('prev')
+    imageSearch = '//img[%s]' % xpath_class('image-style-main-comic')
    help = 'Index format: number'


--- a/dosagelib/plugins/j.py
+++ b/dosagelib/plugins/j.py
@ -10,7 +10,7 @@ from re import compile, escape
 from ..scraper import _BasicScraper
 from ..util import tagre
 from ..helpers import indirectStarter
-from .common import _ComicControlScraper
+from .common import _ComicControlScraper, xpath_class


 class JackCannon(_BasicScraper):
@ -53,6 +53,8 @@ class JoeAndMonkey(_BasicScraper):


 class JohnnyWander(_ComicControlScraper):
+    imageSearch = ('//ul[%s]/li/@data-src' % xpath_class('cc-showbig'),
+                   _ComicControlScraper.imageSearch)
    url = 'http://www.johnnywander.com/'


--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -257,6 +257,7 @@ class Removed(Scraper):
            cls('PensAndTales/FireflyCross'),
            cls('PetiteSymphony/Djandora'),
            cls('PetiteSymphony/Generation17'),
+            cls('PunksAndNerds', 'mis'),
            cls('PunksAndNerdsOld'),
            cls('RedsPlanet'),
            cls('SmackJeeves/Aarrevaara'),
@ -329,6 +330,7 @@ class Removed(Scraper):
            cls('Stubble'),
            cls('SuburbanTribe'),
            cls('TheOuterQuarter'),
+            cls('TheParkingLotIsFull'),
            cls('ThunderAndLightning'),
            cls('TinyKittenTeeth'),
            cls('TwoTwoOneFour'),
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -203,6 +203,7 @@ class Precocious(_ParserScraper):
    prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
    help = 'Index format: yyyy/mm/dd'

+
 class PrinceOfSartar(_WordPressScraper):
    url = 'http://www.princeofsartar.com/'
    stripUrl = url + 'comic/%s/'
@ -219,6 +220,7 @@ class PrinceOfSartar(_WordPressScraper):
        image_ext = image_url.rsplit('.', 1)[1]
        return '%s.%s' % (title, image_ext)

+
 class PS238(_ParserScraper):
    url = 'http://ps238.nodwick.com/'
    stripUrl = url + 'comic/%s/'
@ -227,14 +229,6 @@ class PS238(_ParserScraper):
    help = 'Index format: yyyy-mm-dd'


-class PunksAndNerds(_WordPressScraper):
-    url = 'http://www.punksandnerds.com/'
-    stripUrl = url + '?p=%s'
-    firstStripUrl = stripUrl % '15'
-    prevSearch = '//a[%s]' % xpath_class('navi-prev')
-    help = 'Index format: nnn'
-
-
 class PvPonline(_BasicScraper):
    url = 'http://pvponline.com/comic'
    stripUrl = url + '%s'
--- a/dosagelib/plugins/r.py
+++ b/dosagelib/plugins/r.py
@ -9,7 +9,6 @@ from re import compile
 from six.moves.urllib.parse import urljoin

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter
 from ..util import tagre
 from .common import _WordPressScraper, xpath_class

@ -80,8 +79,6 @@ class RomanticallyApocalyptic(_ParserScraper):
    firstStripUrl = stripUrl % '0'
    imageSearch = '//div[%s]/center//img' % xpath_class('comicpanel')
    prevSearch = '//a[@accesskey="p"]'
-    latestSearch = '//a[span[%s]]' % xpath_class('glyphicon-fast-forward')
-    starter = indirectStarter
    help = 'Index format: n'
    adult = True

--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -298,33 +298,17 @@ class SluggyFreelance(_BasicScraper):
    help = 'Index format: yymmdd'


-class SMBC(_ParserScraper):
+class SMBC(_ComicControlScraper):
    url = 'http://www.smbc-comics.com/'
-    stripUrl = url + 'index.php?id=%s'
-    firstStripUrl = stripUrl % '1'
+    firstStripUrl = url + 'comic/2002-09-05'
    multipleImagesPerStrip = True
    imageSearch = ['//img[@id="cc-comic"]', '//div[@id="aftercomic"]/img']
-    prevSearch = '//a[@class="prev"]'
-    help = 'Index format: nnnn'
    textSearch = '//img[@id="cc-comic"]/@title'

    def namer(self, image_url, page_url):
        """Remove random noise from name."""
        return image_url.rsplit('-', 1)[-1]

-    def shouldSkipUrl(self, url, data):
-        """Skip promo or missing update pages."""
-        return url in (
-            self.stripUrl % '2865',
-            self.stripUrl % '2653',
-            self.stripUrl % '2424',
-            self.stripUrl % '2226',
-            self.stripUrl % '2069',
-            self.stripUrl % '1895',
-            self.stripUrl % '1896',
-            self.stripUrl % '1589',
-        )
-

 class SnowFlame(_WordPressScraper):
    url = 'http://www.snowflamecomic.com/'
@ -375,23 +359,22 @@ class Sorcery101(_ParserScraper):
    help = 'Index format: stripname'


-class SpaceTrawler(_WordPressScraper):
-    base_url = 'http://spacetrawler.com/'
-    url = base_url + '2013/12/24/spacetrawler-379/'
-    firstStripUrl = base_url + '2010/01/01/spacetrawler-4/'
-    prevSearch = '//a[%s]' % xpath_class('navi-prev')
-    endOfLife = True
-
-
-class SpaceJunkArlia(_BasicScraper):
-    url = 'http://spacejunkarlia.com'
-    stripUrl = url + '/index.php?strip_id=%s'
+class SpaceJunkArlia(_ParserScraper):
+    url = 'http://spacejunkarlia.com/'
+    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '0'
-    imageSearch = compile(tagre('img', 'src', r'(comics/[^"]+)'))
-    prevSearch = compile(tagre('a', 'href', r'(\?strip_id=\d+)') + '&lt;<')
+    imageSearch = '//div[%s]/img' % xpath_class('content')
+    prevSearch = '//a[text()="<"]'
    help = 'Index format: number'


+class SpaceTrawler(_ParserScraper):
+    url = 'https://www.baldwinpage.com/spacetrawler/'
+    firstStripUrl = url + '2010/01/01/spacetrawler-4/'
+    imageSearch = '//img[%s]' % xpath_class('size-full')
+    prevSearch = '//a[@rel="prev"]'
+
+
 class Spamusement(_BasicScraper):
    url = 'http://spamusement.com/'
    rurl = escape(url)
@ -487,7 +470,7 @@ class StrongFemaleProtagonist(_ParserScraper):
    stripUrl = url + '%s/'
    css = True
    imageSearch = 'article p img'
-    prevSearch = 'div.nav-previous > a'
+    prevSearch = 'a.page-nav__item--left'
    help = 'Index format: issue-?/page-??'

    def shouldSkipUrl(self, url, data):
@ -499,7 +482,7 @@ class StrongFemaleProtagonist(_ParserScraper):
            self.stripUrl % 'issue-5/newspaper',
            self.stripUrl % 'issue-5/hiatus-1',
            self.stripUrl % 'issue-5/hiatus-2',
-            self.stripUrl % 'ssue-1/no-page',
+            self.stripUrl % 'issue-1/no-page',
        )


@ -532,6 +515,7 @@ class StuffNoOneToldMe(_BasicScraper):
    def shouldSkipUrl(self, url, data):
        """Skip pages without images."""
        return url in (
+            self.stripUrl % '2016/05/so-you-would-like-to-share-my-comics',  # no comic
            self.stripUrl % '2012/08/self-rant',  # no comic
            self.stripUrl % '2012/06/if-you-wonder-where-ive-been',  # video
            self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to',  # video
--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -14,14 +14,11 @@ from .common import (_ComicControlScraper, _TumblrScraper, _WordPressScraper,
                     xpath_class)


-class TheBrads(_BasicScraper):
-    url = 'http://bradcolbow.com/archive/C4/'
-    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % 'P125'
-    imageSearch = compile(tagre("img", "src", r'(http://s3\.amazonaws\.com/the_brads/the-?brads[-_][^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://bradcolbow\.com/archive/C4/[^"]+)', before="prev"))
+class TheBrads(_ParserScraper):
+    url = 'http://bradcolbow.com/archive/'
+    imageSearch = '//div[%s]//img' % xpath_class('entry')
+    prevSearch = '//a[%s]' % xpath_class('prev')
    multipleImagesPerStrip = True
-    help = 'Index format: a letter and a number'


 class TheDevilsPanties(_BasicScraper):
@ -88,17 +85,6 @@ class TheOrderOfTheStick(_BasicScraper):
        return page_url.rsplit('/', 1)[-1][:-5]


-class TheParkingLotIsFull(_BasicScraper):
-    baseUrl = 'http://plif.courageunfettered.com/'
-    url = baseUrl + 'archive/arch2002.htm'
-    stripUrl = baseUrl + 'archive/arch%s.htm'
-    firstStripUrl = stripUrl % '1998'
-    imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
-    multipleImagesPerStrip = True
-    prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
-    help = 'Index format: nnn'
-
-
 class TheThinHLine(_TumblrScraper):
    url = 'http://thinhline.tumblr.com/'
    firstStripUrl = url + 'post/4177372348/thl-1-a-cats-got-his-tongue-click-on-the'
@ -147,13 +133,10 @@ class ThreePanelSoul(_ComicControlScraper):

 class ToonHole(_WordPressScraper):
    url = 'http://toonhole.com/'
-    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
-    prevSearch = '//a[@rel="prev"]'
-    help = 'Index format: yyyy/mm/stripname'
+    firstStripUrl = url + 'comic/toon-hole-coming-soon-2010/'

    def shouldSkipUrl(self, url, data):
-        return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",)
+        return url in (self.url + "comic/if-game-of-thrones-was-animated/",)


 class TracyAndTristan(_BasicScraper):
--- a/dosagelib/plugins/v.py
+++ b/dosagelib/plugins/v.py
@ -6,8 +6,9 @@
 from __future__ import absolute_import, division, print_function
 from re import compile

-from ..scraper import _BasicScraper
+from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
+from .common import xpath_class


 class VampireCheerleaders(_BasicScraper):
@ -51,13 +52,10 @@ class VictimsOfTheSystem(_BasicScraper):
    help = 'Index format: nnn-nnn'


-class ViiviJaWagner(_BasicScraper):
+class ViiviJaWagner(_ParserScraper):
    url = 'http://www.hs.fi/viivijawagner/'
-    stripUrl = None
-    imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)',
-                               before="prev-cm"))
-    help = 'Index format: none'
+    imageSearch = '//div[@id="full-comic"]//img'
+    prevSearch = '//a[%s]' % xpath_class('prev-cm')
    lang = 'fi'

    def namer(self, image_url, page_url):
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -23,9 +23,9 @@ class ZapComic(_ParserScraper):
 class Zapiro(_ParserScraper):
    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
-    imageSearch = '//div[@id="cartoon_full_size"]//img'
-    prevSearch = '//li[@class="nav_older"]/a'
-    nextSearch = '//li[@class="nav_newer"]/a'
+    imageSearch = '//img[%s]' % xpath_class('img-fluid')
+    prevSearch = '//a[%s]' % xpath_class('left')
+    nextSearch = '//a[%s]' % xpath_class('right')

    def namer(self, image_url, page_url):
        parts = page_url.rsplit('/', 1)