dosage/dosagelib/plugins/w.py

240 lines
7.8 KiB
Python
Raw Permalink Normal View History

# SPDX-License-Identifier: MIT
2024-03-17 20:44:46 +00:00
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from re import compile, escape, IGNORECASE
2012-06-20 19:58:13 +00:00
2023-06-07 04:03:36 +00:00
from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
2012-11-26 06:13:32 +00:00
from ..util import tagre
2020-09-30 19:13:11 +00:00
from ..helpers import bounceStarter
2022-06-06 13:00:56 +00:00
from .common import ComicControlScraper, WordPressScraper, WordPressNaviIn, WordPressWebcomic
2012-06-20 19:58:13 +00:00
class WapsiSquare(WordPressNaviIn):
2013-03-06 19:21:10 +00:00
url = 'http://wapsisquare.com/'
2016-05-16 21:16:29 +00:00
firstStripUrl = url + 'comic/09092001/'
2012-06-20 19:58:13 +00:00
2020-09-30 19:17:46 +00:00
def shouldSkipUrl(self, url, data):
"""Skip pages without images."""
2024-03-17 20:44:46 +00:00
return self.match(data, '//iframe') # videos
2020-09-30 19:17:46 +00:00
2012-06-20 19:58:13 +00:00
2020-09-30 19:24:01 +00:00
class WastedTalent(_ParserScraper):
2013-02-06 21:08:36 +00:00
url = 'http://www.wastedtalent.ca/'
stripUrl = url + 'comic/%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % 'anime-crack'
2020-09-30 19:24:01 +00:00
imageSearch = '//div[d:class("comic_content")]/img'
prevSearch = '//li[d:class("previous")]/a'
multipleImagesPerStrip = True
2013-02-06 21:08:36 +00:00
2020-04-19 21:42:27 +00:00
class WebcomicName(_ParserScraper):
url = 'https://webcomicname.com/'
imageSearch = '//figure[d:class("tmblr-full")]//img'
prevSearch = '//a[d:class("next")]'
2020-04-19 21:42:27 +00:00
multipleImagesPerStrip = True
2023-06-07 04:03:36 +00:00
class Weregeek(ParserScraper):
url = 'http://www.weregeek.com/'
2023-06-07 04:03:36 +00:00
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'comic-1'
imageSearch = '//div[d:class("webcomic-media")]//img'
prevSearch = '//a[d:class("previous-webcomic-link")]'
2019-07-13 07:49:31 +00:00
class WereIWolf(_ParserScraper):
stripUrl = 'https://wolfwares.ca/comics/Were I wolf/strip2.php?name=%s&start=%s'
url = stripUrl % ('4 Black and White - part 3', 'latest')
firstStripUrl = stripUrl % ('1 Sirens', '0')
imageSearch = '//img[contains(@src, "ROW")]'
prevSearch = '//a[./img[contains(@src, "previous")]]'
multipleImagesPerStrip = True
endOfLife = True
chapters = ('1 Sirens',
'2 Black and White',
'3 Black and White - Princess and Knight',
'4 Black and White - part 3')
def namer(self, imageUrl, pageUrl):
# Prepend chapter number to image filename
for chapter in self.chapters:
if chapter in pageUrl:
chapterNum = chapter[0]
return chapterNum + '_' + imageUrl.rsplit('/', 1)[-1]
def getPrevUrl(self, url, data):
# Fix missing navigation links between chapters
if url == self.stripUrl % (self.chapters[3], '0'):
return self.stripUrl % (self.chapters[2], 'latest')
if url == self.stripUrl % (self.chapters[2], '0'):
return self.stripUrl % (self.chapters[1], 'latest')
if url == self.stripUrl % (self.chapters[1], '0'):
return self.stripUrl % (self.chapters[0], 'latest')
return super(WereIWolf, self).getPrevUrl(url, data)
def getIndexStripUrl(self, index):
# Get comic strip URL from index
index = index.split('-')
return self.stripUrl % (index[0], index[1])
class WhiteNoise(WordPressWebcomic):
2016-05-16 21:16:29 +00:00
url = 'http://whitenoisecomic.com/'
2019-10-19 22:27:43 +00:00
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'book-one'
imageSearch = '//div[@id="comic"]//img'
2013-04-10 16:36:33 +00:00
class WhiteNoiseLee(ComicControlScraper):
2019-06-21 07:54:45 +00:00
url = 'http://www.white-noise-comic.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '1-0'
starter = bounceStarter
def namer(self, imageUrl, pageUrl):
return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
class Whomp(ComicControlScraper):
2014-06-24 18:48:49 +00:00
url = 'http://www.whompcomic.com/'
firstStripUrl = url + 'comic/06152010'
textSearch = '//img[@id="cc-comic"]/@title'
2014-06-24 18:48:49 +00:00
2012-06-20 19:58:13 +00:00
class WhyTheLongFace(_BasicScraper):
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.absurdnotions.org/'
rurl = escape(baseUrl)
url = baseUrl + 'wtlf200709.html'
stripUrl = baseUrl + 'wtlf%s.html'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '200306'
imageSearch = compile(r'<img src="(%swtlf.+?|lf\d+.\w{1,4})"' % rurl,
IGNORECASE)
2012-12-04 06:02:40 +00:00
multipleImagesPerStrip = True
2012-06-20 19:58:13 +00:00
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
help = 'Index format: yyyymm'
2022-06-06 14:48:39 +00:00
class Widdershins(ComicControlScraper):
url = 'https://widdershinscomic.com/'
stripUrl = url + 'wdshn/%s'
firstStripUrl = stripUrl % 'sleight-of-hand-cover'
starter = bounceStarter
def namer(self, imageUrl, pageUrl):
return pageUrl.rsplit('/', 1)[-1] + '.' + imageUrl.rsplit('.', 1)[-1]
2016-05-16 21:16:29 +00:00
class Wigu(_ParserScraper):
stripUrl = 'http://www.wigucomics.com/adventures/index.php?comic=%s'
url = stripUrl % '-1'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '1'
2016-05-16 21:16:29 +00:00
imageSearch = '//div[@id="comic"]//img[contains(@src, "/comics/")]'
prevSearch = '//a[@alt="go back"]'
endOfLife = True
2012-11-26 06:13:32 +00:00
help = 'Index format: n'
2012-06-20 19:58:13 +00:00
class WildeLife(ComicControlScraper):
2019-07-02 07:23:52 +00:00
url = 'http://www.wildelifecomic.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '1'
2022-06-06 13:00:56 +00:00
class Wolfpac(WordPressScraper):
url = 'https://wolfpac.ca/'
firstStripUrl = url + 'archives/comic/wolfpac-title'
2013-02-06 21:08:36 +00:00
class Wonderella(_BasicScraper):
url = 'http://nonadventures.com/'
rurl = escape(url)
2013-02-06 21:08:36 +00:00
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2006/09/09/the-torment-of-a-thousand-yesterdays'
imageSearch = compile(tagre("div", "id", r"comic", quote=r'["\']') +
r"\s*" +
tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl,
after="prev"))
2013-02-06 21:08:36 +00:00
help = 'Index format: yyyy/mm/dd/name'
2022-06-06 14:22:31 +00:00
class Wondermark(WordPressScraper):
2013-04-10 16:36:33 +00:00
url = 'http://wondermark.com/'
stripUrl = url + '%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '001'
2022-06-06 14:22:31 +00:00
prevSearch = '//a[@rel="prev"]'
help = 'Index format: nnn (001-999), 1knn (1000-1099), cnnnn (1100-)'
2013-04-10 16:36:33 +00:00
class WorldOfMrToast(_BasicScraper):
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.theimaginaryworld.com/'
url = baseUrl + 'mrTcomicA.html'
imageSearch = compile(tagre("img", "src", r'(comic[^"]+)'))
# list the archive links since there is no prev/next navigation
prevurls = (
url,
2013-04-13 18:58:00 +00:00
baseUrl + 'mrTcomicW02.html',
baseUrl + 'mrTcomicW01.html',
baseUrl + 'mrGcomic03.html',
baseUrl + 'mrGcomic02.html',
baseUrl + 'mrGcomic01.html',
baseUrl + 'mrTcomicT05.html',
baseUrl + 'mrTcomicT04.html',
baseUrl + 'mrTcomicT03.html',
baseUrl + 'mrTcomicT02.html',
baseUrl + 'mrTcomicT01.html',
baseUrl + 'mrTcomicIW3.html',
baseUrl + 'mrTcomicIW2.html',
baseUrl + 'mrTcomicIW1.html',
)
firstStripUrl = prevurls[-1]
multipleImagesPerStrip = True
2016-05-16 21:16:29 +00:00
endOfLife = True
2016-05-16 21:16:29 +00:00
def getPrevUrl(self, url, data):
idx = self.prevurls.index(url)
try:
return self.prevurls[idx + 1]
except IndexError:
return None
2013-03-21 17:33:16 +00:00
class WormWorldSaga(_BasicScraper):
url = 'http://www.wormworldsaga.com/'
stripUrl = url + 'chapters/%s/index.php'
firstStripUrl = stripUrl % 'chapter01/EN'
imageSearch = (
compile(tagre("img", "src", r'(images/CH\d+_\d+\.[^"]+)')),
compile(tagre("img", "src", r'(panels/CH\d+_[^"]+)')),
)
2013-04-30 04:42:49 +00:00
latestChapter = 5
2013-03-21 17:33:16 +00:00
multipleImagesPerStrip = True
def starter(self):
2013-03-21 17:33:16 +00:00
return '%schapters/chapter%02d/%s/index.php' % (
self.url, self.latestChapter, self.lang.upper())
2013-03-21 17:33:16 +00:00
2015-05-31 23:45:22 +00:00
def getPrevUrl(self, url, data):
2013-03-21 17:33:16 +00:00
"""Find previous URL."""
if 'chapter04' in url:
return url.replace('chapter04', 'chapter03')
if 'chapter03' in url:
return url.replace('chapter03', 'chapter02')
if 'chapter02' in url:
return url.replace('chapter02', 'chapter01')
return None
class WormWorldSagaFrench(WormWorldSaga):
lang = 'fr'
2013-03-21 17:33:16 +00:00
class WormWorldSagaGerman(WormWorldSaga):
lang = 'de'
2013-03-21 17:33:16 +00:00
class WormWorldSagaSpanish(WormWorldSaga):
lang = 'es'