Fix some more comic modules.

2016-05-16 23:16:29 +02:00 · 2016-05-16 23:16:29 +02:00 · a6cf4e7040
commit a6cf4e7040
parent be1a63da0c
10 changed files with 50 additions and 116 deletions
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@ -186,19 +186,6 @@ class BoredAndEvil(_BasicScraper):
    help = 'Index format: yyyy-mm-dd'


-class BoyOnAStickAndSlither(_BasicScraper):
-    url = 'http://www.boasas.com/'
-    stripUrl = url + 'page/%s'
-    firstStripUrl = stripUrl % '2'
-    imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)'))
-    prevSearch = compile(tagre("a", "href", r'(/page/\d+)') +
-                         "<span>Next page")
-    help = 'Index format: n (unpadded)'
-
-    def namer(self, image_url, page_url):
-        return page_url.rsplit('/')[-1]
-
-
 class BratHalla(_WordPressScraper):
    url = 'http://brat-halla.com/'

--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -126,8 +126,8 @@ class DieselSweeties(_ParserScraper):
    firstStripUrl = stripUrl % '1'
    imageSearch = '//img[@class="xomic"]'
    prevSearch = '//div[@id="prev"]//a[contains(text(), "previous")]'
-    nextSearch = '//div[@id="prev"]//a[contains(text(), "next")]'
-    starter = bounceStarter
+    latestSearch = prevSearch
+    starter = indirectStarter
    help = 'Index format: n (unpadded)'


--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@ -46,19 +46,13 @@ class EasilyAmused(_WordPressScraper):
    starter = indirectStarter


-class EatLiver(_BasicScraper):
+class EatLiver(_ParserScraper):
    url = 'http://www.eatliver.com/'
-    rurl = escape(url)
    starter = indirectStarter
-    stripUrl = url + "i.php?n=%s"
-    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("link", "href", r'(%simg/\d+/[^"]+)' % rurl,
-                                before="image_src"))
-    prevSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
-                         "&#060;&#060; Previous")
-    latestSearch = compile(tagre("a", "href", r'(i\.php\?n=\d+)') +
-                           tagre("img", "src", r'img/small/[^"]+') +
-                           r"</a>\s*<br")
+    multipleImagesPerStrip = True
+    imageSearch = '//div[%s]//img' % xpath_class('post-content')
+    prevSearch = '//a[@rel="prev"]'
+    latestSearch = '//a[@rel="bookmark"]'


 class EatThatToast(_BasicScraper):
@ -185,18 +179,9 @@ class EvilDiva(_BasicScraper):
    help = 'Index format: n (unpadded)'


-class EvilInc(_BasicScraper):
+class EvilInc(_WordPressScraper):
    url = 'http://evil-inc.com/'
-    stripUrl = url + 'comic/%s'
-    firstStripUrl = stripUrl % 'monday-3'
-    imageSearch = compile(
-        tagre("div", "id", "comic") +
-        r'\s*.*\s*' +  # filter out the variant href tag
-        tagre("img", "src",
-              r'(http://i\d\.wp\.com/evil-inc\.com/wp-content/uploads/[^"]+)'))
-    prevSearch = compile(tagre("span", "class", "mininav-prev") +
-                         tagre("a", "href", r'([^"]+)'))
-    help = 'Index format: stripname'
+    firstStripUrl = url + 'comic/monday-3/'


 class Evilish(_ParserScraper):
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -92,9 +92,9 @@ class Flipside(_BasicScraper):


 class FonFlatter(_ParserScraper):
-    url = 'http://www.fonflatter.de/'
+    url = 'https://www.fonflatter.de/'
    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % '2005/09/20/01-begegnung-mit-batman'
+    firstStripUrl = url + '2005/09/20/01-begegnung-mit-batman/'
    lang = 'de'
    imageSearch = r'//img[re:test(@src, "/fred_\d+")]'
    prevSearch = '//a[@rel="prev"]'
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -52,9 +52,11 @@ class LasLindas(_BasicScraper):


 class LastNerdsOnEarth(_ParserScraper):
-    url = 'http://www.lastnerdsonearth.com/latest/'
+    baseUrl = 'http://www.lastnerdsonearth.com/'
+    url = baseUrl + 'latest/'
+    firstStripUrl = baseUrl + 'ch1p1'
    imageSearch = '//div[@id="content"]/a/img'
-    prevSearch = '//div[@id="comicnav"]/a[last()-2]'
+    prevSearch = '//div[@id="comicnav"]/a[img[contains(@src, "nav-prev")]]'


 class LeastICouldDo(_BasicScraper):
@ -97,7 +99,7 @@ class LoFiJinks(_WPNaviIn):


 class LookingForGroup(_ParserScraper):
-    url = 'http://www.lfgcomic.com/'
+    url = 'http://www.lfg.co/'
    stripUrl = url + 'page/%s/'
    firstStripUrl = stripUrl % '1'
    css = True
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -93,6 +93,7 @@ class Optipess(_WordPressScraper):
    firstStripUrl = url + '2008/12/01/jason-friend-of-the-butterflies/'
    prevSearch = '//a[%s]' % xpath_class('navi-prev')
    textSearch = '//div[@id="comic"]//img/@alt'
+    textOptional = True


 class OurHomePlanet(_BasicScraper):
--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -117,7 +117,9 @@ class PHDComics(_ParserScraper):
    def shouldSkipUrl(self, url, data):
        """Skip pages without images."""
        return url in (
-            self.stripUrl % '1669',  # video
+            # video
+            self.stripUrl % '1880',
+            self.stripUrl % '1669',
        )


@ -125,15 +127,9 @@ class Picklewhistle(_ComicControlScraper):
    url = 'http://www.picklewhistle.com/'


-class PicPakDog(_BasicScraper):
+class PicPakDog(_WordPressScraper):
    url = 'http://www.picpak.net/'
-    rurl = escape(url)
-    stripUrl = url + 'comic/%s/'
-    firstStripUrl = stripUrl % 'dogs-cant-spell'
-    imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+-[^"]+\.png)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
-                               after="nav-prev"))
-    help = 'Index format: stripname'
+    firstStripUrl = url + 'comic/dogs-cant-spell/'


 # Keep, because naming is different to PHDComics...
@ -199,14 +195,12 @@ class PoorlyDrawnLines(_BasicScraper):
    help = 'Index Format: name'


-class Precocious(_BasicScraper):
+class Precocious(_ParserScraper):
    url = 'http://www.precociouscomic.com/'
-    starter = indirectStarter
    stripUrl = url + 'archive/comic/%s'
-    imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))'))
-    prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
-    latestSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') +
-                           tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
+    firstStripUrl = stripUrl % '2009/03/09'
+    imageSearch = '//img[contains(@src, "/comics/")]'
+    prevSearch = '//a[img[contains(@src, "/back_arrow")]]'
    help = 'Index format: yyyy/mm/dd'


--- a/dosagelib/plugins/t.py
+++ b/dosagelib/plugins/t.py
@ -145,7 +145,7 @@ class ThreePanelSoul(_ComicControlScraper):


 class ToonHole(_WordPressScraper):
-    url = 'http://www.toonhole.com/'
+    url = 'http://toonhole.com/'
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '2009/12/toon-hole-coming-soon-2010'
    prevSearch = '//a[@rel="prev"]'
--- a/dosagelib/plugins/w.py
+++ b/dosagelib/plugins/w.py
@ -7,20 +7,15 @@ from __future__ import absolute_import, division, print_function

 from re import compile, escape, IGNORECASE

-from ..scraper import _BasicScraper
+from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
 from ..helpers import indirectStarter
-from .common import _ComicControlScraper, _WordPressScraper
+from .common import _ComicControlScraper, _WordPressScraper, xpath_class


-class WapsiSquare(_BasicScraper):
+class WapsiSquare(_WordPressScraper):
    url = 'http://wapsisquare.com/'
-    rurl = escape(url)
-    stripUrl = url + 'comic/%s/'
-    firstStripUrl = stripUrl % '09092001'
-    imageSearch = compile(r'<img src="(%scomics/.+?)"' % rurl)
-    prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
-    help = 'Index format: stripname'
+    firstStripUrl = url + 'comic/09092001/'


 class WastedTalent(_BasicScraper):
@ -33,15 +28,6 @@ class WastedTalent(_BasicScraper):
    help = 'Index format: stripname'


-class WayfarersMoon(_BasicScraper):
-    url = 'http://www.wayfarersmoon.com/'
-    stripUrl = url + 'index.php?page=%s'
-    firstStripUrl = stripUrl % '0'
-    imageSearch = compile(r'<img src="(/admin.+?)"')
-    prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
-    help = 'Index format: nn'
-
-
 class WebDesignerCOTW(_BasicScraper):
    url = 'http://www.webdesignerdepot.com/'
    rurl = escape(url)
@ -91,23 +77,10 @@ class Weregeek(_BasicScraper):
    help = 'Index format: yyyy/mm/dd'


-class WhiteNinja(_BasicScraper):
-    baseUrl = 'http://www.whiteninjacomics.com/'
-    url = baseUrl + 'comics.shtml'
-    stripUrl = baseUrl + 'comics/%s.shtml'
-    imageSearch = compile(r'<img src=(/images/comics/(?!t-).+?\.gif) border=0')
-    prevSearch = compile(r'(/comics/.+?shtml).+?previous')
-    help = 'Index format: s (comic name)'
-
-
-class WhiteNoise(_BasicScraper):
-    baseUrl = 'http://www.wncomic.com/'
-    url = baseUrl + 'archive.php'
-    stripUrl = baseUrl + 'archive_comments.php?strip_id=%s'
-    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(r'(istrip_files/strips/.+?)"')
-    prevSearch = compile(r'</a><a href="(.+?)"><img src="images/top_back.jpg" ')
-    help = 'Index format: n'
+class WhiteNoise(_WordPressScraper):
+    url = 'http://whitenoisecomic.com/'
+    firstStripUrl = url + 'comic/book-one/'
+    prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')


 class Whomp(_ComicControlScraper):
@ -129,13 +102,13 @@ class WhyTheLongFace(_BasicScraper):
    help = 'Index format: yyyymm'


-class Wigu(_BasicScraper):
-    url = 'http://wigucomics.com/'
-    stripUrl = url + 'oc/index.php?comic=%s'
+class Wigu(_ParserScraper):
+    stripUrl = 'http://www.wigucomics.com/adventures/index.php?comic=%s'
+    url = stripUrl % '-1'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("img", "src", r'(/oc/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/oc/index\.php\?comic=\d+)',
-                               after="go back"))
+    imageSearch = '//div[@id="comic"]//img[contains(@src, "/comics/")]'
+    prevSearch = '//a[@alt="go back"]'
+    endOfLife = True
    help = 'Index format: n'


@ -164,7 +137,6 @@ class Wondermark(_BasicScraper):
 class WorldOfMrToast(_BasicScraper):
    baseUrl = 'http://www.theimaginaryworld.com/'
    url = baseUrl + 'mrTcomicA.html'
-    stripUrl = baseUrl + '%s.html'
    imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
    # list the archive links since there is no prev/next navigation
    prevurls = (
@ -185,9 +157,9 @@ class WorldOfMrToast(_BasicScraper):
    )
    firstStripUrl = prevurls[-1]
    multipleImagesPerStrip = True
-    help = 'Index format: none'
+    endOfLife = True

-    def getPrevUrl(self, url, data, baseUrl):
+    def getPrevUrl(self, url, data):
        idx = self.prevurls.index(url)
        try:
            return self.prevurls[idx + 1]
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -20,23 +20,16 @@ class ZapComic(_ParserScraper):
    prevSearch = 'a.previous-comic-link'


-class Zapiro(_BasicScraper):
-    url = 'http://www.mg.co.za/zapiro/'
+class Zapiro(_ParserScraper):
+    url = 'http://mg.co.za/zapiro/'
    starter = bounceStarter
-    stripUrl = 'http://mg.co.za/cartoon/%s'
-    firstStripUrl = stripUrl % 'zapiro_681'
-    imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
-    prevSearch = compile(tagre("li", "class", r'nav_older') +
-                         tagre("a", "href",
-                               r'(http://mg\.co\.za/cartoon/[^"]+)'))
-    nextSearch = compile(tagre("li", "class", r'nav_older') +
-                         tagre("a", "href",
-                               r'(http://mg\.co\.za/cartoon/[^"]+)'))
-    help = 'Index format: yyyy-mm-dd-stripname'
+    imageSearch = '//div[@id="cartoon_full_size"]//img'
+    prevSearch = '//li[@class="nav_older"]/a'
+    nextSearch = '//li[@class="nav_newer"]/a'

    def namer(self, image_url, page_url):
-        name = image_url.split('/')[-3]
-        return name
+        parts = page_url.rsplit('/', 1)
+        return parts[1]


 class ZenPencils(_WordPressScraper):