dosage/dosagelib/plugins/tapas.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2019-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2022 Daniel Ring
from ..output import out
from ..scraper import _ParserScraper
from ..xml import NS


class Tapas(_ParserScraper):
    baseUrl = 'https://tapas.io/'
    imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'
    prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'
    latestSearch = '//ul[contains(@class, "js-episode-list")]//a'
    multipleImagesPerStrip = True

    def __init__(self, name, url):
        super().__init__('Tapas/' + name)
        self.url = self.baseUrl + 'series/' + url + '/info'
        self.stripUrl = self.baseUrl + 'episode/%s'

    def starter(self):
        # Retrieve comic metadata from info page
        info = self.getPage(self.url)
        series = info.xpath('//@data-series-id')[0]
        # Retrieve comic metadata from API
        data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')
        data.raise_for_status()
        episodes = data.json()['data']['body']
        return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]

    def getPrevUrl(self, url, data):
        # Retrieve comic metadata from API
        data = self.session.get(url + '/info')
        data.raise_for_status()
        apiData = data.json()['data']
        if apiData['scene'] == 2:
            self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
        return self.stripUrl % apiData['prev_ep_id']

    def fetchUrls(self, url, data, urlSearch):
        # Save link order for position-based filenames
        self.imageUrls = super().fetchUrls(url, data, urlSearch)
        return self.imageUrls

    def shouldSkipUrl(self, url, data):
        if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):
            out.warn(f'Nothing to download on "{url}", because a login is required.')
            return True
        return False

    def namer(self, imageUrl, pageUrl):
        # Construct filename from episode number and image position on page
        episodeNum = pageUrl.rsplit('/', 1)[-1]
        imageNum = self.imageUrls.index(imageUrl)
        imageExt = pageUrl.rsplit('.', 1)[-1]
        if len(self.imageUrls) > 1:
            filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
        else:
            filename = "%s.%s" % (episodeNum, imageExt)
        return filename

    @classmethod
    def getmodules(cls):
        return (
            # Manually-added comics
            cls('AmpleTime', 'Ample-Time'),
            cls('InsignificantOtters', 'IOtters'),
            cls('NoFuture', 'NoFuture'),
            cls('OrensForge', 'OrensForge'),
            cls('RavenWolf', 'RavenWolf'),
            cls('SyntheticInstinct', 'Synthetic-Instinct'),
            cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
            cls('VenturaCityDrifters', 'Ventura-City-Drifters'),

            # START AUTOUPDATE
            # END AUTOUPDATE
        )
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`# SPDX-License-Identifier: MIT`
Clean up some minor warnings 2022-05-28 15:52:42 +00:00			`# Copyright (C) 2019-2022 Tobias Gruetzmacher`
Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`# Copyright (C) 2019-2022 Daniel Ring`
			`from ..output import out`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`from ..scraper import _ParserScraper`
Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`from ..xml import NS`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00

Rename Tapastic to Tapas The site is know just as "Tapas" since longer then Dosage has support for it. Since the module was merged just recently, this rename shouldn't affect many users... 2022-06-05 23:54:22 +00:00			`class Tapas(_ParserScraper):`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`baseUrl = 'https://tapas.io/'`
Fix Tapastic scraper 2020-04-07 08:18:20 +00:00			`imageSearch = '//article[contains(@class, "js-episode-article")]//img/@data-src'`
			`prevSearch = '//a[contains(@class, "js-prev-ep-btn")]'`
			`latestSearch = '//ul[contains(@class, "js-episode-list")]//a'`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`multipleImagesPerStrip = True`

			`def __init__(self, name, url):`
Rename Tapastic to Tapas The site is know just as "Tapas" since longer then Dosage has support for it. Since the module was merged just recently, this rename shouldn't affect many users... 2022-06-05 23:54:22 +00:00			`super().__init__('Tapas/' + name)`
Fix Tapastic scraper 2020-06-20 06:12:53 +00:00			`self.url = self.baseUrl + 'series/' + url + '/info'`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`self.stripUrl = self.baseUrl + 'episode/%s'`

Fix Tapastic scraper for comics with episode list in ascending order 2021-01-21 09:41:26 +00:00			`def starter(self):`
			`# Retrieve comic metadata from info page`
			`info = self.getPage(self.url)`
			`series = info.xpath('//@data-series-id')[0]`
			`# Retrieve comic metadata from API`
			`data = self.session.get(self.baseUrl + 'series/' + series + '/episodes?sort=NEWEST')`
			`data.raise_for_status()`
			`episodes = data.json()['data']['body']`
			`return self.stripUrl % episodes.split('data-id="')[1].split('"')[0]`

Fix Tapastic scraper 2020-06-20 06:12:53 +00:00			`def getPrevUrl(self, url, data):`
			`# Retrieve comic metadata from API`
			`data = self.session.get(url + '/info')`
			`data.raise_for_status()`
			`apiData = data.json()['data']`
			`if apiData['scene'] == 2:`
			`self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']`
			`return self.stripUrl % apiData['prev_ep_id']`

Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`def fetchUrls(self, url, data, urlSearch):`
			`# Save link order for position-based filenames`
			`self.imageUrls = super().fetchUrls(url, data, urlSearch)`
			`return self.imageUrls`

Print a warning if a Tapas page needs a login 2022-06-06 00:27:22 +00:00			`def shouldSkipUrl(self, url, data):`
			`if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):`
			`out.warn(f'Nothing to download on "{url}", because a login is required.')`
			`return True`
			`return False`

Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`def namer(self, imageUrl, pageUrl):`
			`# Construct filename from episode number and image position on page`
			`episodeNum = pageUrl.rsplit('/', 1)[-1]`
			`imageNum = self.imageUrls.index(imageUrl)`
			`imageExt = pageUrl.rsplit('.', 1)[-1]`
			`if len(self.imageUrls) > 1:`
			`filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)`
			`else:`
			`filename = "%s.%s" % (episodeNum, imageExt)`
			`return filename`

			`@classmethod`
			`def getmodules(cls):`
			`return (`
			`# Manually-added comics`
Add Tapastic/AmpleTime 2020-04-25 02:48:54 +00:00			`cls('AmpleTime', 'Ample-Time'),`
Fix InsignificantOtters 2021-01-21 09:43:43 +00:00			`cls('InsignificantOtters', 'IOtters'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`cls('NoFuture', 'NoFuture'),`
			`cls('OrensForge', 'OrensForge'),`
			`cls('RavenWolf', 'RavenWolf'),`
Add Tapastic/SyntheticInstinct 2021-03-16 07:55:44 +00:00			`cls('SyntheticInstinct', 'Synthetic-Instinct'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00			`cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),`
Add Tapastic/VenturaCityDrifters 2021-11-28 06:01:50 +00:00			`cls('VenturaCityDrifters', 'Ventura-City-Drifters'),`
Add Tapastic site engine 2019-08-22 05:36:09 +00:00
			`# START AUTOUPDATE`
			`# END AUTOUPDATE`
			`)`