Fix ComicFury update script

This commit is contained in:
Tobias Gruetzmacher 2022-11-26 17:46:31 +01:00
parent bdae76d12d
commit a94cc2b53b
No known key found for this signature in database

View file

@ -2,7 +2,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2019 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
""" """
Script to get ComicFury comics and save the info in a JSON file for further Script to get ComicFury comics and save the info in a JSON file for further
processing. processing.
@ -138,47 +138,36 @@ class ComicFuryUpdater(ComicListUpdater):
"""Parse one search result page.""" """Parse one search result page."""
data = self.get_url(url) data = self.get_url(url)
count = 999 for comicdiv in data.cssselect('div.webcomic-result'):
for comicdiv in data.cssselect('div.searchresult'): comiclink = comicdiv.cssselect('div.webcomic-result-title a')[0]
comiclink = comicdiv.cssselect('h3 a')[0]
comicurl = comiclink.attrib['href'] comicurl = comiclink.attrib['href']
name = comiclink.text name = comiclink.text
info = comicdiv.cssselect('span.comicinfo') info = comicdiv.cssselect('span.stat-value')
# find out how many images this comic has # find out how many images this comic has
count = int(info[1].text.strip()) count = int(info[0].text.strip())
# find activity self.add_comic(name, comicurl, count)
active = info[6].text.strip().lower() == "active"
lang = info[7].text.strip().lower()
self.add_comic(name, (comicurl, active, lang), count)
return count nextlink = data.cssselect('div.search-next-page a')
if nextlink:
return nextlink[0].attrib['href']
else:
return None
def collect_results(self): def collect_results(self):
"""Parse all search result pages.""" """Parse all search result pages."""
# Sort by page count, so we can abort when we get under some threshold. # Sort by page count, so we can abort when we get under some threshold.
baseUrl = ('https://comicfury.com/search.php?search=1&webcomics=1&' + url = ('https://comicfury.com/search.php?query=&lastupdate=0&' +
'query=&worder=1&asc=0&incvi=2&incnu=2&incla=2&incse=2&' + 'completed=1&fn=2&fv=2&fs=2&fl=2&sort=0')
'all_ge=1&all_st=1&all_la=1&page=%d')
last_count = 999
page = 1
print("Parsing search result pages...", file=sys.stderr) print("Parsing search result pages...", file=sys.stderr)
while last_count >= self.MIN_COMICS: while url:
last_count = self.handle_url(baseUrl % page) url = self.handle_url(url)
page += 1
print(last_count, file=sys.stderr, end=" ")
def get_entry(self, name, entry): def get_entry(self, name, entry):
url, active, lang = entry url = entry
langopt = ''
if lang != "english":
if lang in self.langmap:
langopt = ", '%s'" % self.langmap[lang]
else:
print("WARNING:", "Unknown language:", lang)
sub = urlsplit(url).hostname.split('.', 1)[0] sub = urlsplit(url).hostname.split('.', 1)[0]
return u"cls('%s', '%s'%s)," % (name, sub, langopt) return f"cls('{name}', '{sub}'),"
if __name__ == '__main__': if __name__ == '__main__':