Fix a bunch of comics (fixes #277)

2023-08-27 02:23:16 +02:00 · 2023-08-27 02:23:16 +02:00 · 9e05fae304
commit 9e05fae304
parent 6d20de8b2a
4 changed files with 43 additions and 65 deletions
--- a/dosagelib/plugins/g.py
+++ b/dosagelib/plugins/g.py
@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper):
    help = 'Index format: yyyymmdd'


-class GirlsWithSlingshots(_BasicScraper):
+class GirlsWithSlingshots(ComicControlScraper):
    url = 'https://girlswithslingshots.com/'
-    rurl = escape(url)
-    stripUrl = url + 'comic/%s'
-    firstStripUrl = stripUrl % 'gws1'
-    imageSearch = (
-        compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)),
-        compile(tagre("img", "src",
-                      r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')),
-    )
-    prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
-                               before='rel="prev"'))
-    help = 'Index format: stripname'
+    firstStripUrl = url + 'comic/gws1'


 class GleefulNihilism(WordPressScraper):
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@ -3,11 +3,8 @@
 # SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
 # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
 # SPDX-FileCopyrightText: © 2019 Daniel Ring
-from re import compile
-
-from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
+from ..scraper import ParserScraper, _ParserScraper
 from ..helpers import bounceStarter, indirectStarter
-from ..util import tagre
 from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn


@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper):
        return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]


-class LittleGamers(_BasicScraper):
-    url = 'http://www.little-gamers.com/'
-    stripUrl = url + '%s/'
-    firstStripUrl = stripUrl % '2000/12/01/99'
-    imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))
-    help = 'Index format: yyyy/mm/dd/name'
+class LittleGamers(ParserScraper):
+    url = 'https://www.little-gamers.com/'
+    firstStripUrl = url + '2000/12/01/99'
+    imageSearch = '//div[d:class("comic")]//img'
+    prevSearch = ('//a[@id="previous"]',
+        '//div[d:class("comic-navigation")]//a[text()="previous"]')


 class LittleTales(_ParserScraper):
@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper):
    starter = indirectStarter


-class LoFiJinks(WordPressNaviIn):
+class LoFiJinks(WordPressScraper):
    baseUrl = 'https://hijinksensue.com/comic/'
    url = baseUrl + 'learning-to-love-again/'
    firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
--- a/dosagelib/plugins/old.py
+++ b/dosagelib/plugins/old.py
@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from ..scraper import Scraper


@ -1695,4 +1695,5 @@ class Renamed(Scraper):

            # Renamed in 3.1
            cls('Exiern', 'ComicFury/Exiern'),
+            cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
        )
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -9,7 +9,8 @@ from os.path import splitext
 from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
 from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
 from ..util import tagre
-from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
+from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced,
+    WordPressNavi, WordPressWebcomic)


 class SabrinaOnline(_BasicScraper):
@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper):
        return archivepages[-1]


-class SafelyEndangered(WordPressNavi):
-    url = 'http://www.safelyendangered.com/'
-    firstStripUrl = url + 'comic/ignored/'
-
-
 class SaffronAndSage(WordPressScraper):
    url = 'https://www.saffroncomic.com/'
    firstStripUrl = url + 'comic/p0001/'
@ -74,19 +70,18 @@ class Savestate(WordPressNavi):


 class ScandinaviaAndTheWorld(_ParserScraper):
-    url = 'https://satwcomic.com/'
-    stripUrl = url + '%s'
-    firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
+    url = 'https://satwcomic.com/sweden-denmark-and-norway'
+    firstStripUrl = url
    starter = indirectStarter
    imageSearch = '//img[@itemprop="image"]'
    prevSearch = '//a[@accesskey="p"]'
-    latestSearch = '//a[text()="View latest comic"]'
+    latestSearch = '//a[contains(@title, "Latest")]'
    textSearch = '//span[@itemprop="articleBody"]'
-    help = 'Index format: stripname'


 class ScaryGoRound(_ParserScraper):
-    url = 'http://www.scarygoround.com/sgr/ar.php'
+    url = ('https://web.archive.org/web/20190327203330/'
+        'https://www.scarygoround.com/sgr/ar.php')
    stripUrl = url + '?date=%s'
    firstStripUrl = stripUrl % '20020604'
    imageSearch = '//img[contains(@src, "/strips/")]'
@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper):
    allow_errors = (403,)


-class Sheldon(_BasicScraper):
-    url = 'http://www.sheldoncomics.com/'
-    rurl = escape(url)
-    stripUrl = url + 'archive/%s.html'
-    firstStripUrl = stripUrl % '011130'
-    imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
-                               after="sidenav-prev"))
-    help = 'Index format: yymmdd'
+class Sheldon(ParserScraper):
+    url = 'https://www.sheldoncomics.com/'
+    firstStripUrl = url + 'comic/well-who-is-this/'
+    imageSearch = '//div[@id="comic"]//img'
+    prevSearch = '//a[img[d:class("left")]]'


 class Shifters(ParserScraper):
@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper):
    help = 'Index format: number'


-class Shortpacked(_ParserScraper):
-    url = 'http://www.shortpacked.com/index.php'
-    stripUrl = url + '?id=%s'
-    css = True
-    imageSearch = 'img#comic'
-    prevSearch = 'a.prev'
-    help = 'Index format: nnn'
+class Shortpacked(ComicControlScraper):
+    url = 'https://www.shortpacked.com/comic/'
+    firstStripUrl = url + 'just-a-toy-store'


-class ShotgunShuffle(WordPressScraper):
-    url = 'http://shotgunshuffle.com/'
+class ShotgunShuffle(WordPressSpliced):
+    # Currently down, use archive.org in the meantime (08-2023)
+    url = ('https://web.archive.org/web/20230131163842/'
+        'https://shotgunshuffle.com/')
    firstStripUrl = url + 'comic/pilot/'


@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper):
        return imageurls


-class SpaceJunkArlia(_ParserScraper):
-    url = 'http://spacejunkarlia.com/'
+class SpaceJunkArlia(ParserScraper):
+    url = ('https://web.archive.org/web/20220121133701/'
+        'http://spacejunkarlia.com/')
    stripUrl = url + '?strip_id=%s'
    firstStripUrl = stripUrl % '0'
    imageSearch = '//div[d:class("content")]/img'
    prevSearch = '//a[text()="<"]'
+    endOfLife = True
    help = 'Index format: number'