Fix some modules (a-c)

2021-01-18 01:25:03 +01:00 · 2021-01-18 01:25:03 +01:00 · 3d05e59c36
commit 3d05e59c36
parent 2b1dca8305
4 changed files with 43 additions and 38 deletions
--- a/dosagelib/plugins/a.py
+++ b/dosagelib/plugins/a.py
@ -8,10 +8,10 @@ from re import compile, escape, MULTILINE
 from ..util import tagre
 from ..scraper import _BasicScraper, _ParserScraper
 from ..helpers import regexNamer, bounceStarter, indirectStarter
-from .common import _WordPressScraper, _WPNavi, _WPNaviIn, _WPWebcomic
+from .common import _WordPressScraper, _WordPressSpliced, _WPNavi, _WPNaviIn, _WPWebcomic


-class AbbysAgency(_WordPressScraper):
+class AbbysAgency(_WordPressSpliced):
    url = 'https://abbysagency.us/'
    stripUrl = url + 'blog/comic/%s/'
    firstStripUrl = stripUrl % 'a'
@ -169,7 +169,7 @@ class Alice(_WordPressScraper):
    starter = indirectStarter


-class AlienDice(_WordPressScraper):
+class AlienDice(_WordPressSpliced):
    url = 'https://aliendice.com/'
    stripUrl = url + 'comic/%s/'
    firstStripUrl = stripUrl % '05162001'
@ -185,7 +185,7 @@ class AlienDice(_WordPressScraper):
        return imageUrl.rsplit('/', 1)[-1].replace('20010831', '2001-08-31')


-class AlienDiceLegacy(_WordPressScraper):
+class AlienDiceLegacy(_WordPressSpliced):
    name = 'AlienDice/Legacy'
    stripUrl = 'https://aliendice.com/comic/%s/'
    url = stripUrl % 'legacy-2-15'
@ -304,14 +304,21 @@ class Anaria(_ParserScraper):
        return filename.replace('00.jpg', 'new00.jpg').replace('new', '1')


-class Angband(_BasicScraper):
+class Angband(_ParserScraper):
    url = 'http://angband.calamarain.net/'
-    stripUrl = url + 'view.php?date=%s'
-    firstStripUrl = stripUrl % '2005-12-30'
-    imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') +
-                         "Previous")
-    help = 'Index format: yyyy-mm-dd'
+    stripUrl = url + '%s'
+    imageSearch = '//img'
+    multipleImagesPerStrip = True
+    endOfLife = True
+
+    def starter(self):
+        page = self.getPage(self.url)
+        self.pages = page.xpath('//p/a[not(contains(@href, "cast"))]/@href')
+        self.firstStripUrl = self.pages[0]
+        return self.pages[-1]
+
+    def getPrevUrl(self, url, data):
+        return self.pages[self.pages.index(url) - 1]


 class Angels2200(_BasicScraper):
--- a/dosagelib/plugins/b.py
+++ b/dosagelib/plugins/b.py
@ -42,7 +42,7 @@ class BalderDash(_ComicControlScraper):


 class BallerinaMafia(_ParserScraper):
-    url = 'http://www.ballerinamafia.net/'
+    url = 'https://web.archive.org/web/20200115230012/http://ballerinamafia.net/'
    stripUrl = url + 'index.php?pid=%s'
    firstStripUrl = stripUrl % '20100906'
    imageSearch = ('//img[contains(@alt, "Comic")]',
@ -135,7 +135,7 @@ class BetweenFailures(_WPWebcomic):


 class BeyondTheVeil(_WordPressScraper):
-    url = 'http://beyondtheveilcomic.com/'
+    url = 'https://web.archive.org/web/20201009235642/http://beyondtheveilcomic.com/'
    stripUrl = url + '?comic=%s'
    firstStripUrl = stripUrl % '01252010'
    endOfLife = True
@ -178,9 +178,9 @@ class BirdBoy(_WordPressScraper):
        try:
            pageNr = int(strip)
        except ValueError:
-            pageNr = None # Use the string to fetch a cover page
+            pageNr = None  # Use the string to fetch a cover page
        if volume == 'synopsis':
-            strip = '{0}{1}'.format(pageNr, '-02' if strip in [1,3] else '')
+            strip = '{0}{1}'.format(pageNr, '-02' if strip in [1, 3] else '')
        else:
            volume = 'volume-' + volume
            if pageNr is not None:
--- a/dosagelib/plugins/c.py
+++ b/dosagelib/plugins/c.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 from re import compile, escape

@ -11,13 +11,12 @@ from ..util import tagre
 from .common import _WordPressScraper, _WPNavi, _WPWebcomic


-class CampComic(_BasicScraper):
+class CampComic(_ParserScraper):
    url = 'http://campcomic.com/comic/'
-    rurl = escape(url)
    stripUrl = url + '%s'
    firstStripUrl = stripUrl % '6'
-    imageSearch = compile(tagre("img", "src", r'(http://hw1\.pa-cdn\.com/camp/assets/img/katie/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev"))
+    imageSearch = '//div[@id="comic"]/img'
+    prevSearch = '//a[d:class("btnPrev")]'
    help = 'Index Format: number'


@ -112,9 +111,9 @@ class Catalyst(_BasicScraper):


 class CatAndGirl(_ParserScraper):
-    url = 'http://catandgirl.com/'
+    url = 'https://catandgirl.com/'
    imageSearch = '//div[@id="comic"]//img'
-    prevSearch = '//a[@rel="prev"]'
+    prevSearch = '//a[d:class("pager--prev")]'


 class CatenaCafe(_WordPressScraper):
@ -287,7 +286,7 @@ class CollegeCatastrophe(_ParserScraper):
    stripUrl = url + '/%s'
    firstStripUrl = stripUrl % '2000-11-10'
    imageSearch = '//img[@class="comic-image"]'
-    prevSearch = '//a[@class="prev"]'
+    prevSearch = '//a[span[contains(text(),"Previous")]]'
    endOfLife = True
    multipleImagesPerStrip = True

@ -365,13 +364,15 @@ class CorydonCafe(_ParserScraper):


 class CourtingDisaster(_WordPressScraper):
-    url = 'http://www.courting-disaster.com/'
+    url = 'https://web.archive.org/web/20201127150157/http://www.courting-disaster.com/'
    firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/'
+    endOfLife = True


 class CraftedFables(_WordPressScraper):
-    url = 'http://www.caf-fiends.net/comicpress/'
+    url = 'https://web.archive.org/web/20191126025641/http://www.caf-fiends.net/comicpress/'
    prevSearch = '//a[@rel="prev"]'
+    endOfLife = True


 class CrimsonDark(_BasicScraper):
@ -407,20 +408,12 @@ class CrossTimeCafe(_ParserScraper):
    endOfLife = True


-class CucumberQuest(_BasicScraper):
-    url = 'http://cucumber.gigidigi.com/'
-    rurl = escape(url)
-    stripUrl = url + 'cq/%s/'
+class CucumberQuest(_WPWebcomic):
+    baseUrl = 'http://cucumber.gigidigi.com/'
+    stripUrl = baseUrl + 'cq/%s/'
    firstStripUrl = stripUrl % 'page-1'
-    startUrl = url + 'recent.html'
+    url = firstStripUrl
    starter = indirectStarter
-    imageSearch = (
-        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+[^"]+)' % rurl)),
-        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/ch\d+[^"]+)' % rurl)),
-        compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/bonus[^"]+)' % rurl)),
-    )
-    prevSearch = compile(tagre("a", "href", r'(%scq/[^"]+/)' % rurl, after="previous"))
-    latestSearch = compile(r'window\.location="(/cq/[^"]+/)"')
    help = 'Index format: stripname'


--- a/dosagelib/plugins/common.py
+++ b/dosagelib/plugins/common.py
@ -18,6 +18,11 @@ class _WordPressScraper(_ParserScraper):
    latestSearch = '//a[d:class("comic-nav-last")]'


+class _WordPressSpliced(_ParserScraper):
+    imageSearch = '//div[@id="spliced-comic"]//img'
+    prevSearch = '//a[d:class("previous-comic")]'
+
+
 class _WPNavi(_WordPressScraper):
    prevSearch = '//a[d:class("navi-prev")]'

@ -26,7 +31,7 @@ class _WPNaviIn(_WordPressScraper):
    prevSearch = '//a[d:class("navi-prev-in")]'


-class _WPWebcomic(_WordPressScraper):
+class _WPWebcomic(_ParserScraper):
    imageSearch = '//div[d:class("webcomic-image")]//img'
    prevSearch = '//a[d:class("previous-webcomic-link")]'
    nextSearch = '///a[d:class("next-webcomic-link")]'