Fix a bunch of comics (fixes #277)
This commit is contained in:
parent
6d20de8b2a
commit
9e05fae304
4 changed files with 43 additions and 65 deletions
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile, escape
|
from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
|
@ -73,19 +73,9 @@ class GirlGenius(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class GirlsWithSlingshots(_BasicScraper):
|
class GirlsWithSlingshots(ComicControlScraper):
|
||||||
url = 'https://girlswithslingshots.com/'
|
url = 'https://girlswithslingshots.com/'
|
||||||
rurl = escape(url)
|
firstStripUrl = url + 'comic/gws1'
|
||||||
stripUrl = url + 'comic/%s'
|
|
||||||
firstStripUrl = stripUrl % 'gws1'
|
|
||||||
imageSearch = (
|
|
||||||
compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)),
|
|
||||||
compile(tagre("img", "src",
|
|
||||||
r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)')),
|
|
||||||
)
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%scomic/[^"]+)' % rurl,
|
|
||||||
before='rel="prev"'))
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class GleefulNihilism(WordPressScraper):
|
class GleefulNihilism(WordPressScraper):
|
||||||
|
|
|
@ -3,11 +3,8 @@
|
||||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from re import compile
|
from ..scraper import ParserScraper, _ParserScraper
|
||||||
|
|
||||||
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
|
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
from ..util import tagre
|
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
|
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn
|
||||||
|
|
||||||
|
|
||||||
|
@ -152,13 +149,12 @@ class LilithsWord(ComicControlScraper):
|
||||||
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
|
return imageUrl.rsplit('/', 1)[-1].split('-', 1)[1]
|
||||||
|
|
||||||
|
|
||||||
class LittleGamers(_BasicScraper):
|
class LittleGamers(ParserScraper):
|
||||||
url = 'http://www.little-gamers.com/'
|
url = 'https://www.little-gamers.com/'
|
||||||
stripUrl = url + '%s/'
|
firstStripUrl = url + '2000/12/01/99'
|
||||||
firstStripUrl = stripUrl % '2000/12/01/99'
|
imageSearch = '//div[d:class("comic")]//img'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
|
prevSearch = ('//a[@id="previous"]',
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers\.com/[^"]+)', before="comic-nav-prev-link"))
|
'//div[d:class("comic-navigation")]//a[text()="previous"]')
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
|
||||||
|
|
||||||
|
|
||||||
class LittleTales(_ParserScraper):
|
class LittleTales(_ParserScraper):
|
||||||
|
@ -197,7 +193,7 @@ class LoadingArtist(_ParserScraper):
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class LoFiJinks(WordPressNaviIn):
|
class LoFiJinks(WordPressScraper):
|
||||||
baseUrl = 'https://hijinksensue.com/comic/'
|
baseUrl = 'https://hijinksensue.com/comic/'
|
||||||
url = baseUrl + 'learning-to-love-again/'
|
url = baseUrl + 'learning-to-love-again/'
|
||||||
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
from ..scraper import Scraper
|
from ..scraper import Scraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -1695,4 +1695,5 @@ class Renamed(Scraper):
|
||||||
|
|
||||||
# Renamed in 3.1
|
# Renamed in 3.1
|
||||||
cls('Exiern', 'ComicFury/Exiern'),
|
cls('Exiern', 'ComicFury/Exiern'),
|
||||||
|
cls('SafelyEndangered', 'WebToons/SafelyEndangered'),
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,7 +9,8 @@ from os.path import splitext
|
||||||
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper, ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
|
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .common import ComicControlScraper, WordPressScraper, WordPressNavi, WordPressWebcomic
|
from .common import (ComicControlScraper, WordPressScraper, WordPressSpliced,
|
||||||
|
WordPressNavi, WordPressWebcomic)
|
||||||
|
|
||||||
|
|
||||||
class SabrinaOnline(_BasicScraper):
|
class SabrinaOnline(_BasicScraper):
|
||||||
|
@ -33,11 +34,6 @@ class SabrinaOnline(_BasicScraper):
|
||||||
return archivepages[-1]
|
return archivepages[-1]
|
||||||
|
|
||||||
|
|
||||||
class SafelyEndangered(WordPressNavi):
|
|
||||||
url = 'http://www.safelyendangered.com/'
|
|
||||||
firstStripUrl = url + 'comic/ignored/'
|
|
||||||
|
|
||||||
|
|
||||||
class SaffronAndSage(WordPressScraper):
|
class SaffronAndSage(WordPressScraper):
|
||||||
url = 'https://www.saffroncomic.com/'
|
url = 'https://www.saffroncomic.com/'
|
||||||
firstStripUrl = url + 'comic/p0001/'
|
firstStripUrl = url + 'comic/p0001/'
|
||||||
|
@ -74,19 +70,18 @@ class Savestate(WordPressNavi):
|
||||||
|
|
||||||
|
|
||||||
class ScandinaviaAndTheWorld(_ParserScraper):
|
class ScandinaviaAndTheWorld(_ParserScraper):
|
||||||
url = 'https://satwcomic.com/'
|
url = 'https://satwcomic.com/sweden-denmark-and-norway'
|
||||||
stripUrl = url + '%s'
|
firstStripUrl = url
|
||||||
firstStripUrl = stripUrl % 'sweden-denmark-and-norway'
|
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
imageSearch = '//img[@itemprop="image"]'
|
imageSearch = '//img[@itemprop="image"]'
|
||||||
prevSearch = '//a[@accesskey="p"]'
|
prevSearch = '//a[@accesskey="p"]'
|
||||||
latestSearch = '//a[text()="View latest comic"]'
|
latestSearch = '//a[contains(@title, "Latest")]'
|
||||||
textSearch = '//span[@itemprop="articleBody"]'
|
textSearch = '//span[@itemprop="articleBody"]'
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
||||||
|
|
||||||
class ScaryGoRound(_ParserScraper):
|
class ScaryGoRound(_ParserScraper):
|
||||||
url = 'http://www.scarygoround.com/sgr/ar.php'
|
url = ('https://web.archive.org/web/20190327203330/'
|
||||||
|
'https://www.scarygoround.com/sgr/ar.php')
|
||||||
stripUrl = url + '?date=%s'
|
stripUrl = url + '?date=%s'
|
||||||
firstStripUrl = stripUrl % '20020604'
|
firstStripUrl = stripUrl % '20020604'
|
||||||
imageSearch = '//img[contains(@src, "/strips/")]'
|
imageSearch = '//img[contains(@src, "/strips/")]'
|
||||||
|
@ -198,15 +193,11 @@ class Sharksplode(WordPressScraper):
|
||||||
allow_errors = (403,)
|
allow_errors = (403,)
|
||||||
|
|
||||||
|
|
||||||
class Sheldon(_BasicScraper):
|
class Sheldon(ParserScraper):
|
||||||
url = 'http://www.sheldoncomics.com/'
|
url = 'https://www.sheldoncomics.com/'
|
||||||
rurl = escape(url)
|
firstStripUrl = url + 'comic/well-who-is-this/'
|
||||||
stripUrl = url + 'archive/%s.html'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
firstStripUrl = stripUrl % '011130'
|
prevSearch = '//a[img[d:class("left")]]'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl,
|
|
||||||
after="sidenav-prev"))
|
|
||||||
help = 'Index format: yymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class Shifters(ParserScraper):
|
class Shifters(ParserScraper):
|
||||||
|
@ -278,17 +269,15 @@ class ShipInABottle(WordPressScraper):
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
class Shortpacked(_ParserScraper):
|
class Shortpacked(ComicControlScraper):
|
||||||
url = 'http://www.shortpacked.com/index.php'
|
url = 'https://www.shortpacked.com/comic/'
|
||||||
stripUrl = url + '?id=%s'
|
firstStripUrl = url + 'just-a-toy-store'
|
||||||
css = True
|
|
||||||
imageSearch = 'img#comic'
|
|
||||||
prevSearch = 'a.prev'
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class ShotgunShuffle(WordPressScraper):
|
class ShotgunShuffle(WordPressSpliced):
|
||||||
url = 'http://shotgunshuffle.com/'
|
# Currently down, use archive.org in the meantime (08-2023)
|
||||||
|
url = ('https://web.archive.org/web/20230131163842/'
|
||||||
|
'https://shotgunshuffle.com/')
|
||||||
firstStripUrl = url + 'comic/pilot/'
|
firstStripUrl = url + 'comic/pilot/'
|
||||||
|
|
||||||
|
|
||||||
|
@ -452,12 +441,14 @@ class SpaceFurries(ParserScraper):
|
||||||
return imageurls
|
return imageurls
|
||||||
|
|
||||||
|
|
||||||
class SpaceJunkArlia(_ParserScraper):
|
class SpaceJunkArlia(ParserScraper):
|
||||||
url = 'http://spacejunkarlia.com/'
|
url = ('https://web.archive.org/web/20220121133701/'
|
||||||
|
'http://spacejunkarlia.com/')
|
||||||
stripUrl = url + '?strip_id=%s'
|
stripUrl = url + '?strip_id=%s'
|
||||||
firstStripUrl = stripUrl % '0'
|
firstStripUrl = stripUrl % '0'
|
||||||
imageSearch = '//div[d:class("content")]/img'
|
imageSearch = '//div[d:class("content")]/img'
|
||||||
prevSearch = '//a[text()="<"]'
|
prevSearch = '//a[text()="<"]'
|
||||||
|
endOfLife = True
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue