dosage/dosagelib/plugins/tapas.py

78 lines
2.9 KiB
Python
Raw Normal View History

2019-08-22 05:36:09 +00:00
# SPDX-License-Identifier: MIT
2022-05-28 15:52:42 +00:00
# Copyright (C) 2019-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2022 Daniel Ring
from ..output import out
2019-08-22 05:36:09 +00:00
from ..scraper import _ParserScraper
from ..xml import NS
2019-08-22 05:36:09 +00:00
class Tapas(_ParserScraper):
2019-08-22 05:36:09 +00:00
baseUrl = 'https://tapas.io/'
2020-04-07 08:18:20 +00:00
imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
2019-08-22 05:36:09 +00:00
multipleImagesPerStrip = True
def __init__(self, name, url):
super().__init__('Tapas/' + name)
2020-06-20 06:12:53 +00:00
self.url = self.baseUrl + 'series/' + url + '/info'
2019-08-22 05:36:09 +00:00
self.stripUrl = self.baseUrl + 'episode/%s'
def starter(self):
# Retrieve comic metadata from info page
info = self.getPage(self.url)
series = info.xpath('//@data-series-id')[0]
# Retrieve comic metadata from API
data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
data.raise_for_status()
episodes = data.json()['data']['body']
return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]
2020-06-20 06:12:53 +00:00
def getPrevUrl(self, url, data):
# Retrieve comic metadata from API
data = self.session.get(url + '/info')
data.raise_for_status()
apiData = data.json()['data']
if apiData['scene'] == 2:
self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
return self.stripUrl % apiData['prev_ep_id']
2019-08-22 05:36:09 +00:00
def fetchUrls(self, url, data, urlSearch):
# Save link order for position-based filenames
self.imageUrls = super().fetchUrls(url, data, urlSearch)
return self.imageUrls
def shouldSkipUrl(self, url, data):
if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):
out.warn(f'Nothing to download on "{url}", because a login is required.')
return True
return False
2019-08-22 05:36:09 +00:00
def namer(self, imageUrl, pageUrl):
# Construct filename from episode number and image position on page
episodeNum = pageUrl.rsplit('/', 1)[-1]
imageNum = self.imageUrls.index(imageUrl)
imageExt = pageUrl.rsplit('.', 1)[-1]
if len(self.imageUrls) > 1:
filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
else:
filename = "%s.%s" % (episodeNum, imageExt)
return filename
@classmethod
def getmodules(cls):
return (
# Manually-added comics
2020-04-25 02:48:54 +00:00
cls('AmpleTime', 'Ample-Time'),
2021-01-21 09:43:43 +00:00
cls('InsignificantOtters', 'IOtters'),
2019-08-22 05:36:09 +00:00
cls('NoFuture', 'NoFuture'),
cls('OrensForge', 'OrensForge'),
cls('RavenWolf', 'RavenWolf'),
2021-03-16 07:55:44 +00:00
cls('SyntheticInstinct', 'Synthetic-Instinct'),
2019-08-22 05:36:09 +00:00
cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
2021-11-28 06:01:50 +00:00
cls('VenturaCityDrifters', 'Ventura-City-Drifters'),
2019-08-22 05:36:09 +00:00
# START AUTOUPDATE
# END AUTOUPDATE
)