Add Tapastic site engine
This commit is contained in:
parent
3152b0ba28
commit
e30f63ec4d
4 changed files with 107 additions and 2 deletions
69
dosagelib/plugins/tapastic.py
Normal file
69
dosagelib/plugins/tapastic.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..scraper import _ParserScraper
|
||||||
|
|
||||||
|
|
||||||
|
class Tapastic(_ParserScraper):
|
||||||
|
baseUrl = 'https://tapas.io/'
|
||||||
|
imageSearch = '//article[@class="ep-contents"]//img'
|
||||||
|
episodeIdSearch = re.compile(r'episodeId : (\d+),')
|
||||||
|
episodeListSearch = re.compile(r'episodeList : (.*),')
|
||||||
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
def __init__(self, name, url):
|
||||||
|
super(Tapastic, self).__init__('Tapastic/' + name)
|
||||||
|
self.url = self.baseUrl + 'series/' + url
|
||||||
|
self.stripUrl = self.baseUrl + 'episode/%s'
|
||||||
|
|
||||||
|
def starter(self):
|
||||||
|
# Retrieve series data object
|
||||||
|
seriesPage = self.getPage(self.url)
|
||||||
|
dataScript = seriesPage.xpath('//script[contains(text(), "var _data")]')[0].text
|
||||||
|
# Extract episode list
|
||||||
|
currentEpisode = self.episodeIdSearch.findall(dataScript)[0]
|
||||||
|
self.episodeList = json.loads(self.episodeListSearch.findall(dataScript)[0])
|
||||||
|
return self.stripUrl % currentEpisode
|
||||||
|
|
||||||
|
def fetchUrls(self, url, data, urlSearch):
|
||||||
|
# Save link order for position-based filenames
|
||||||
|
self.imageUrls = super().fetchUrls(url, data, urlSearch)
|
||||||
|
# Update firstStripUrl with the correct episode title
|
||||||
|
if int(url.rsplit('/', 1)[-1]) == self.episodeList[0]['id']:
|
||||||
|
self.firstStripUrl = url
|
||||||
|
return self.imageUrls
|
||||||
|
|
||||||
|
def getPrevUrl(self, url, data):
|
||||||
|
episodeId = int(url.rsplit('/', 1)[-1])
|
||||||
|
index = [i for i, ep in enumerate(self.episodeList) if ep['id'] == episodeId][0]
|
||||||
|
if index == 0:
|
||||||
|
return None
|
||||||
|
return self.stripUrl % str(self.episodeList[index - 1]['id'])
|
||||||
|
|
||||||
|
def namer(self, imageUrl, pageUrl):
|
||||||
|
# Construct filename from episode number and image position on page
|
||||||
|
episodeNum = pageUrl.rsplit('/', 1)[-1]
|
||||||
|
imageNum = self.imageUrls.index(imageUrl)
|
||||||
|
imageExt = pageUrl.rsplit('.', 1)[-1]
|
||||||
|
if len(self.imageUrls) > 1:
|
||||||
|
filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
|
||||||
|
else:
|
||||||
|
filename = "%s.%s" % (episodeNum, imageExt)
|
||||||
|
return filename
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getmodules(cls):
|
||||||
|
return (
|
||||||
|
# Manually-added comics
|
||||||
|
cls('NoFuture', 'NoFuture'),
|
||||||
|
cls('OrensForge', 'OrensForge'),
|
||||||
|
cls('RavenWolf', 'RavenWolf'),
|
||||||
|
cls('TheCatTheVineAndTheVictory', 'The-Cat-The-Vine-and-The-Victory'),
|
||||||
|
cls('TheGodsPack', 'The-Gods-Pack'),
|
||||||
|
|
||||||
|
# START AUTOUPDATE
|
||||||
|
# END AUTOUPDATE
|
||||||
|
)
|
|
@ -9,7 +9,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
|
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
echo "Executing ${script}.py"
|
echo "Executing ${script}.py"
|
||||||
|
|
36
scripts/tapastic.py
Normal file
36
scripts/tapastic.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# Copyright (C) 2019-2020 Tobias Gruetzmacher
|
||||||
|
# Copyright (C) 2019-2020 Daniel Ring
|
||||||
|
"""
|
||||||
|
Script to get a list of Tapastic comics and save the info in a
|
||||||
|
JSON file for further processing.
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlsplit, parse_qs
|
||||||
|
|
||||||
|
from scriptutil import ComicListUpdater
|
||||||
|
from dosagelib.util import check_robotstxt
|
||||||
|
|
||||||
|
|
||||||
|
class TapasticUpdater(ComicListUpdater):
|
||||||
|
def collect_results(self):
|
||||||
|
# Retrieve the first 10 top comics list pages
|
||||||
|
url = 'https://tapas.io/comics?browse=ALL&sort_type=LIKE&pageNumber='
|
||||||
|
count = 10
|
||||||
|
|
||||||
|
data = [self.get_url(url + str(i), robot=False) for i in range(0, count)]
|
||||||
|
for page in data:
|
||||||
|
for comiclink in page.xpath('//a[@class="preferred title"]'):
|
||||||
|
comicurl = comiclink.attrib['href']
|
||||||
|
name = comiclink.text
|
||||||
|
self.add_comic(name, comicurl)
|
||||||
|
|
||||||
|
def get_entry(self, name, url):
|
||||||
|
shortName = name.replace(' ', '').replace('\'', '')
|
||||||
|
titleNum = int(parse_qs(urlsplit(url).query)['title_no'][0])
|
||||||
|
url = url.rsplit('/', 1)[0].replace('/series/', '')
|
||||||
|
return u"cls('%s', '%s', %d)," % (shortName, url, titleNum)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TapasticUpdater(__file__).run()
|
|
@ -11,7 +11,7 @@ d=$(dirname $0)
|
||||||
if [ $# -ge 1 ]; then
|
if [ $# -ge 1 ]; then
|
||||||
list="$*"
|
list="$*"
|
||||||
else
|
else
|
||||||
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot webcomicfactory webtoons"
|
list="arcamax comicfury comicgenesis comicskingdom creators gocomics keenspot tapastic webcomicfactory webtoons"
|
||||||
fi
|
fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
target="${d}/../dosagelib/plugins/${script}.py"
|
target="${d}/../dosagelib/plugins/${script}.py"
|
||||||
|
|
Loading…
Reference in a new issue