2019-08-22 05:36:09 +00:00
|
|
|
# SPDX-License-Identifier: MIT
|
2023-06-10 13:05:57 +00:00
|
|
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
|
|
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
2022-06-06 00:27:22 +00:00
|
|
|
from ..output import out
|
2022-06-06 10:08:32 +00:00
|
|
|
from ..scraper import ParserScraper
|
2019-08-22 05:36:09 +00:00
|
|
|
|
|
|
|
|
2022-06-06 10:08:32 +00:00
|
|
|
class Tapas(ParserScraper):
|
2019-08-22 05:36:09 +00:00
|
|
|
baseUrl = 'https://tapas.io/'
|
2020-04-07 08:18:20 +00:00
|
|
|
imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
|
|
|
|
prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
|
|
|
|
latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
|
2019-08-22 05:36:09 +00:00
|
|
|
multipleImagesPerStrip = True
|
|
|
|
|
|
|
|
def __init__(self, name, url):
|
2022-06-05 23:54:22 +00:00
|
|
|
super().__init__('Tapas/' + name)
|
2020-06-20 06:12:53 +00:00
|
|
|
self.url = self.baseUrl + 'series/' + url + '/info'
|
2019-08-22 05:36:09 +00:00
|
|
|
self.stripUrl = self.baseUrl + 'episode/%s'
|
|
|
|
|
2021-01-21 09:41:26 +00:00
|
|
|
def starter(self):
|
|
|
|
# Retrieve comic metadata from info page
|
|
|
|
info = self.getPage(self.url)
|
2024-03-17 20:44:46 +00:00
|
|
|
series = self.match(info, '//@data-series-id')[0]
|
2021-01-21 09:41:26 +00:00
|
|
|
# Retrieve comic metadata from API
|
|
|
|
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
|
|
|
|
data.raise_for_status()
|
|
|
|
episodes = data.json()['data']['body']
|
|
|
|
return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]
|
|
|
|
|
2020-06-20 06:12:53 +00:00
|
|
|
def getPrevUrl(self, url, data):
|
|
|
|
# Retrieve comic metadata from API
|
|
|
|
data = self.session.get(url + '/info')
|
|
|
|
data.raise_for_status()
|
|
|
|
apiData = data.json()['data']
|
|
|
|
if apiData['scene'] == 2:
|
|
|
|
self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
|
|
|
|
return self.stripUrl % apiData['prev_ep_id']
|
|
|
|
|
2023-06-10 13:05:57 +00:00
|
|
|
def extract_image_urls(self, url, data):
|
2019-08-22 05:36:09 +00:00
|
|
|
# Save link order for position-based filenames
|
2023-06-10 13:05:57 +00:00
|
|
|
self._cached_image_urls = super().extract_image_urls(url, data)
|
|
|
|
return self._cached_image_urls
|
2019-08-22 05:36:09 +00:00
|
|
|
|
2022-06-06 00:27:22 +00:00
|
|
|
def shouldSkipUrl(self, url, data):
|
2024-03-17 20:44:46 +00:00
|
|
|
if self.match(data, '//button[d:class("js-have-to-sign")]'):
|
2022-06-06 00:27:22 +00:00
|
|
|
out.warn(f'Nothing to download on "{url}", because a login is required.')
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2019-08-22 05:36:09 +00:00
|
|
|
def namer(self, imageUrl, pageUrl):
|
|
|
|
# Construct filename from episode number and image position on page
|
|
|
|
episodeNum = pageUrl.rsplit('/', 1)[-1]
|
2023-06-10 13:05:57 +00:00
|
|
|
imageNum = self._cached_image_urls.index(imageUrl)
|
2019-08-22 05:36:09 +00:00
|
|
|
imageExt = pageUrl.rsplit('.', 1)[-1]
|
2023-06-10 13:05:57 +00:00
|
|
|
if len(self._cached_image_urls) > 1:
|
2019-08-22 05:36:09 +00:00
|
|
|
filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
|
|
|
|
else:
|
|
|
|
filename = "%s.%s" % (episodeNum, imageExt)
|
|
|
|
return filename
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def getmodules(cls):
|
|
|
|
return (
|
|
|
|
# Manually-added comics
|
2020-04-25 02:48:54 +00:00
|
|
|
cls('AmpleTime', 'Ample-Time'),
|
2022-06-06 00:31:08 +00:00
|
|
|
cls('FANGS', 'fangscomic'),
|
|
|
|
cls('FishNuggets', 'Fish-Nuggets'),
|
2023-06-07 04:06:24 +00:00
|
|
|
cls('Ginpu', 'Ginpu-Studios-Comics'),
|
2022-06-06 00:31:08 +00:00
|
|
|
cls('HoneyAndTheMoon', 'Honey-and-the-Moon'),
|
2021-01-21 09:43:43 +00:00
|
|
|
cls('InsignificantOtters', 'IOtters'),
|
2022-06-06 00:31:08 +00:00
|
|
|
cls('MagicalBoy', 'magicalboy'),
|
2019-08-22 05:36:09 +00:00
|
|
|
cls('NoFuture', 'NoFuture'),
|
|
|
|
cls('OrensForge', 'OrensForge'),
|
2022-06-06 00:31:08 +00:00
|
|
|
cls('RadioactivePanda', 'Radioactive-Panda'),
|
2019-08-22 05:36:09 +00:00
|
|
|
cls('RavenWolf', 'RavenWolf'),
|
2021-03-16 07:55:44 +00:00
|
|
|
cls('SyntheticInstinct', 'Synthetic-Instinct'),
|
2019-08-22 05:36:09 +00:00
|
|
|
cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
|
2022-06-06 00:31:08 +00:00
|
|
|
cls('TheInkApprentice', 'The-Ink-Apprentice'),
|
|
|
|
cls('TheSeaInYou', 'theseainyou'),
|
|
|
|
cls('TheSelkiesSkin', 'theselkiesskincomic'),
|
|
|
|
cls('TheWitchsThrone', 'thewitchsthrone'),
|
2021-11-28 06:01:50 +00:00
|
|
|
cls('VenturaCityDrifters', 'Ventura-City-Drifters'),
|
2019-08-22 05:36:09 +00:00
|
|
|
|
|
|
|
# START AUTOUPDATE
|
|
|
|
# END AUTOUPDATE
|
|
|
|
)
|