Fix some modules.

2017-02-12 02:16:38 +01:00 · 2017-02-12 02:16:38 +01:00 · ebe98bc8ba
commit ebe98bc8ba
parent 20ca5d7fc2
6 changed files with 36 additions and 66 deletions
--- a/dosagelib/plugins/d.py
+++ b/dosagelib/plugins/d.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -219,20 +219,19 @@ class DreamKeepersPrelude(_ParserScraper):
    help = 'Index format: n'


-class DresdenCodak(_BasicScraper):
+class DresdenCodak(_ParserScraper):
    url = 'http://dresdencodak.com/'
-    rurl = escape(url)
-    stripUrl = None
+    startUrl = url + 'cat/comic/'
    firstStripUrl = url + '2007/02/08/pom/'
-    imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) +
-                         tagre("img", "src", r"%sm_prev2?\.png" % rurl,
-                               quote=""))
-    latestSearch = compile(tagre("div", "id", "preview") +
-                           tagre("a", "href",
-                                 r'(%s\d+/\d+/\d+/[^"]+)' % rurl))
+    imageSearch = '//section[%s]//img' % xpath_class('entry-content')
+    prevSearch = '//a[@rel="prev"]'
+    latestSearch = '//a[%s]' % xpath_class('tc-grid-bg-link')
    starter = indirectStarter

+    # Blog and comic are mixed...
+    def shouldSkipUrl(self, url, data):
+        return not data.xpath(self.imageSearch)
+

 class DrFun(_BasicScraper):
    baseUrl = 'http://www.ibiblio.org/Dave/'
@ -287,10 +286,3 @@ class DumbingOfAge(_BasicScraper):
    prevSearch = compile(tagre("a", "href", r'(%s\d+/[^"]+)' % rurl, after="prev"))
    imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl))
    help = 'Index format: yyyy/comic/book-num/seriesname/stripname'
-
-
-class DungeonsAndDenizens(_WordPressScraper):
-    url = 'http://dungeond.com/'
-    firstStripUrl = url + '2005/08/23/08232005/'
-    endOfLife = True
-    prevSearch = '//a[%s]' % xpath_class('navi-prev')
--- a/dosagelib/plugins/k.py
+++ b/dosagelib/plugins/k.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -27,15 +27,6 @@ class KevinAndKell(_BasicScraper):
        return self.stripUrl % tuple(map(int, index.split('-')))


-class Key(_BasicScraper):
-    baseUrl = 'http://key.shadilyn.com/'
-    url = baseUrl + 'latestpage.html'
-    stripUrl = baseUrl + 'pages/%s.html'
-    imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
-    prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
-    help = 'Index format: nnn'
-
-
 class KickInTheHead(_WordPressScraper):
    url = 'http://www.kickinthehead.org/'
    firstStripUrl = url + '2003/03/20/ipod-envy/'
--- a/dosagelib/plugins/m.py
+++ b/dosagelib/plugins/m.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -9,7 +9,7 @@ from re import compile, escape, IGNORECASE

 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
-from .common import _ComicControlScraper, _WordPressScraper
+from .common import _ComicControlScraper, _WordPressScraper, xpath_class


 class MacHall(_BasicScraper):
@ -71,14 +71,12 @@ class Marilith(_BasicScraper):
    help = 'Index format: yyyymmdd'


-class MarriedToTheSea(_BasicScraper):
+class MarriedToTheSea(_ParserScraper):
    url = 'http://www.marriedtothesea.com/'
-    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '022806'
-    imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl,
-                                before="overflow"))
-    prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
+    imageSearch = '//div[%s]//p/img' % xpath_class('jumbotron')
+    prevSearch = '//a[contains(text(), "Yesterday")]'
    help = 'Index format: mmddyy'

    def namer(self, image_url, page_url):
@ -201,6 +199,7 @@ class MysteriesOfTheArcana(_ParserScraper):
    imageSearch = '//div[@id="comic"]//img'
    prevSearch = '//a[@class="navprevious"]'

+
 class MonsterUnderTheBed(_WordPressScraper):
-	adult = True
-	url = 'http://themonsterunderthebed.net/'
+    adult = True
+    url = 'http://themonsterunderthebed.net/'
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -113,12 +113,12 @@ class Optipess(_WordPressScraper):
    textOptional = True


-class OurHomePlanet(_BasicScraper):
-    url = 'http://gdk.gd-kun.net/'
-    stripUrl = url + '%s.html'
+class OurHomePlanet(_ParserScraper):
+    url = 'http://www.ourhomeplanet.net/'
+    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % '01'
-    imageSearch = compile(r'<img src="(pages/comic.+?)"')
-    prevSearch = compile(r'coords="50,18,95,65".+?href="(.+?\.html)".+?alt=')
+    imageSearch = '//a[@rel="next"]/img'
+    prevSearch = '//a[@rel="prev"]'
    help = 'Index format: n (unpadded)'


--- a/dosagelib/plugins/p.py
+++ b/dosagelib/plugins/p.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -109,7 +109,7 @@ class PHDComics(_ParserScraper):
    url = baseUrl + 'comics.php'
    stripUrl = baseUrl + 'comics/archive.php?comicid=%s'
    firstStripUrl = stripUrl % '1'
-    imageSearch = '//img[@id="comic"]'
+    imageSearch = '//img[@id="comic2"]'
    prevSearch = '//a[img[contains(@src, "prev_button")]]'
    nextSearch = '//a[img[contains(@src, "next_button")]]'
    help = 'Index format: n (unpadded)'
@ -138,15 +138,6 @@ class PiledHigherAndDeeper(PHDComics):
    namer = queryNamer('comicid', use_page_url=True)


-class Pimpette(_ParserScraper):
-    url = 'http://pimpette.ca/'
-    stripUrl = url + 'index.php?date=%s'
-    firstStripUrl = stripUrl % '20030905'
-    imageSearch = '//div[@id="strip"]/img'
-    prevSearch = '//a[text()="previous"]'
-    help = 'Index format: yyyymmdd'
-
-
 class Pixel(_BasicScraper):
    url = 'http://pixelcomic.net/'
    rurl = escape(url)
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2017 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -65,7 +65,7 @@ class SandraOnTheRocks(_BasicScraper):


 class ScandinaviaAndTheWorld(_ParserScraper):
-    url = 'http://satwcomic.com/'
+    url = 'https://satwcomic.com/'
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
    starter = indirectStarter
@ -156,7 +156,7 @@ class SequentialArt(_BasicScraper):

 class SexyLosers(_ParserScraper):
    adult = True
-    url = 'http://www.sexylosers.com/'
+    url = 'https://www.sexylosers.com/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % '003'
    imageSearch = '//div[@class="entry-content"]//img'
@ -240,13 +240,10 @@ class Sithrah(_ParserScraper):
    prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')


-class SkinDeep(_BasicScraper):
+class SkinDeep(_ParserScraper):
    url = 'http://www.skindeepcomic.com/'
-    stripUrl = url + 'archive/%s/'
-    imageSearch = compile(r'<span class="webcomic-object[^>]*><img src="([^"]*)"')
-    prevSearch = compile(tagre("a", "href", r'([^"]+)',
-                               after="previous-webcomic-link"))
-    help = 'Index format: custom'
+    imageSearch = '//a[%s]/img' % xpath_class('webcomic-link')
+    prevSearch = '//a[%s]' % xpath_class('previous-webcomic-link')


 class SleeplessDomain(_ComicControlScraper):
@ -408,9 +405,9 @@ class StarCrossdDestiny(_ParserScraper):

 class StationV3(_ParserScraper):
    url = 'http://www.stationv3.com/'
-    stripUrl = url + 'd2/%s.html'
-    firstStripUrl = stripUrl % '20150628'
-    imageSearch = '//img[contains(@src,"/comics2/")]'
+    stripUrl = url + 'd3/%s.html'
+    firstStripUrl = stripUrl % '20170101'
+    imageSearch = '//img[contains(@src,"/comics3/")]'
    prevSearch = '//a[img[contains(@src,"/previous2")]]'
    help = 'Index format: yyyymmdd'