Fix a bunch of comics (fixes #277)

2023-08-27 02:23:16 +02:00 · 2023-08-27 02:23:16 +02:00 · 9e05fae304
commit 9e05fae304
parent 6d20de8b2a
4 changed files with 43 additions and 65 deletions
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile, escape
 from ..scraper import _BasicScraper, _ParserScraper
@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper):
    help = 'Index format: yyyymmdd'
-class GirlsWithSlingshots(_BasicScraper):
+class GirlsWithSlingshots(ComicControlScraper):
    url = 'https://girlswithslingshots.com/'
-    rurl = escape(url)
+    firstStripUrl = url + 'comic/gws1'
    stripUrl = url + 'comic/%s'
    firstStripUrl = stripUrl % 'gws1'
    imageSearch = (
        compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)),
        compile(tagre("img", "src",
                      r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')),
    )
    prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
                               before='rel="prev"'))
    help = 'Index format: stripname'
 class GleefulNihilism(WordPressScraper):
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -3,11 +3,8 @@
 # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
 # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
 # SPDX-FileCopyrightText: © 2019 Daniel Ring
-from re import compile
+from ..scraper import ParserScraper, _ParserScraper
 from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
 from ..helpers import bounceStarter, indirectStarter
 from ..util import tagre
 from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper):
        return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
-class LittleGamers(_BasicScraper):
+class LittleGamers(ParserScraper):
-    url = 'http://www.little-gamers.com/'
+    url = 'https://www.little-gamers.com/'
-    stripUrl = url + '%s/'
+    firstStripUrl = url + '2000/12/01/99'
-    firstStripUrl = stripUrl % '2000/12/01/99'
+    imageSearch = '//div[d:class("comic")]//img'
-    imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
+    prevSearch = ('//a[@id="previous"]',
-    prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))
+        '//div[d:class("comic-navigation")]//a[text()="previous"]')
    help = 'Index format: yyyy/mm/dd/name'
 class LittleTales(_ParserScraper):
@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper):
    starter = indirectStarter
-class LoFiJinks(WordPressNaviIn):
+class LoFiJinks(WordPressScraper):
    baseUrl = 'https://hijinksensue.com/comic/'
    url = baseUrl + 'learning-to-love-again/'
    firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from ..scraper import Scraper
@ -1695,4 +1695,5 @@ class Renamed(Scraper):
            # Renamed in 3.1
            cls('Exiern', 'ComicFury/Exiern'),
            cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
        )
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -9,7 +9,8 @@ from os.path import splitext
 from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
 from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
 from ..util import tagre
-from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
+from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced,
    WordPressNavi, WordPressWebcomic)
 class SabrinaOnline(_BasicScraper):
@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper):
        return archivepages[-1]
 class SafelyEndangered(WordPressNavi):
    url = 'http://www.safelyendangered.com/'
    firstStripUrl = url + 'comic/ignored/'
 class SaffronAndSage(WordPressScraper):
    url = 'https://www.saffroncomic.com/'
    firstStripUrl = url + 'comic/p0001/'
@ -74,19 +70,18 @@ class Savestate(WordPressNavi):
 class ScandinaviaAndTheWorld(_ParserScraper):
-    url = 'https://satwcomic.com/'
+    url = 'https://satwcomic.com/sweden-denmark-and-norway'
-    stripUrl = url + '%s'
+    firstStripUrl = url
    firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
    starter = indirectStarter
    imageSearch = '//img[@itemprop="image"]'
    prevSearch = '//a[@accesskey="p"]'
-    latestSearch = '//a[text()="View latest comic"]'
+    latestSearch = '//a[contains(@title, "Latest")]'
    textSearch = '//span[@itemprop="articleBody"]'
    help = 'Index format: stripname'
 class ScaryGoRound(_ParserScraper):
-    url = 'http://www.scarygoround.com/sgr/ar.php'
+    url = ('https://web.archive.org/web/20190327203330/'
        'https://www.scarygoround.com/sgr/ar.php')
    stripUrl = url + '?date=%s'
    firstStripUrl = stripUrl % '20020604'
    imageSearch = '//img[contains(@src, "/strips/")]'
@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper):
    allow_errors = (403,)
-class Sheldon(_BasicScraper):
+class Sheldon(ParserScraper):
-    url = 'http://www.sheldoncomics.com/'
+    url = 'https://www.sheldoncomics.com/'
-    rurl = escape(url)
+    firstStripUrl = url + 'comic/well-who-is-this/'
-    stripUrl = url + 'archive/%s.html'
+    imageSearch = '//div[@id="comic"]//img'
-    firstStripUrl = stripUrl % '011130'
+    prevSearch = '//a[img[d:class("left")]]'
    imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
    prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
                               after="sidenav-prev"))
    help = 'Index format: yymmdd'
 class Shifters(ParserScraper):
@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper):
    help = 'Index format: number'
-class Shortpacked(_ParserScraper):
+class Shortpacked(ComicControlScraper):
-    url = 'http://www.shortpacked.com/index.php'
+    url = 'https://www.shortpacked.com/comic/'
-    stripUrl = url + '?id=%s'
+    firstStripUrl = url + 'just-a-toy-store'
    css = True
    imageSearch = 'img#comic'
    prevSearch = 'a.prev'
    help = 'Index format: nnn'
-class ShotgunShuffle(WordPressScraper):
+class ShotgunShuffle(WordPressSpliced):
-    url = 'http://shotgunshuffle.com/'
+    # Currently down, use archive.org in the meantime (08-2023)
    url = ('https://web.archive.org/web/20230131163842/'
        'https://shotgunshuffle.com/')
    firstStripUrl = url + 'comic/pilot/'
@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper):
        return imageurls
-class SpaceJunkArlia(_ParserScraper):
+class SpaceJunkArlia(ParserScraper):
-    url = 'http://spacejunkarlia.com/'
+    url = ('https://web.archive.org/web/20220121133701/'
        'http://spacejunkarlia.com/')
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '0'
    imageSearch = '//div[d:class("content")]/img'
    prevSearch = '//a[text()="<"]'
    endOfLife = True
    help = 'Index format: number'
@ -717,4 +708,4 @@ class SwordsComic(ParserScraper):
    firstStripUrl = stripUrl % 'cover'
    imageSearch = '//div[@class="page-image-wrapper"]//img'
    prevSearch = '//a[@class="navigation-button navigation-previous"]'
-    help = 'Index format: Swordsnnn (unpadded)'
+    help = 'Index format: Swordsnnn (unpadded)'