27d28b8eef
The default encoding for source files is UTF-8 since Python 3, so we can drop all encoding headers. While we are at it, just replace them with SPDX headers.
45 lines
1.4 KiB
Python
Executable file
45 lines
1.4 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# SPDX-License-Identifier: MIT
|
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
|
"""
|
|
Script to get WebComicFactory comics and save the info in a JSON file for
|
|
further processing.
|
|
"""
|
|
from scriptutil import ComicListUpdater
|
|
|
|
|
|
class WebComicFactoryUpdater(ComicListUpdater):
|
|
|
|
def find_first(self, url):
|
|
data = self.get_url(url)
|
|
|
|
firstlinks = data.cssselect('a.comic-nav-first')
|
|
if not firstlinks:
|
|
print("INFO:", "No first link on »%s«, already first page?" %
|
|
(url))
|
|
return url
|
|
return firstlinks[0].attrib['href']
|
|
|
|
def collect_results(self):
|
|
"""Parse start page for supported comics."""
|
|
url = 'http://www.thewebcomicfactory.com/'
|
|
data = self.get_url(url)
|
|
|
|
for comicdiv in data.cssselect('div.ceo_thumbnail_widget'):
|
|
comicname = comicdiv.cssselect('h2')[0]
|
|
comiclink = comicdiv.cssselect('a')[0]
|
|
comicurl = comiclink.attrib['href']
|
|
name = comicname.text
|
|
if 'comic-color-key' in comicurl:
|
|
continue
|
|
comicurl = self.find_first(comicurl)
|
|
self.add_comic(name, comicurl)
|
|
|
|
def get_entry(self, name, url):
|
|
return (u"cls('%s',\n '%s')," % (name, url))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
WebComicFactoryUpdater(__file__).run()
|