2016-04-03 19:31:56 +00:00
|
|
|
#!/usr/bin/env python
|
2020-04-18 11:45:44 +00:00
|
|
|
# SPDX-License-Identifier: MIT
|
2016-10-28 22:21:41 +00:00
|
|
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
2016-04-12 22:52:16 +00:00
|
|
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
2016-04-03 19:31:56 +00:00
|
|
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
|
|
|
"""
|
|
|
|
Script to get WebComicFactory comics and save the info in a JSON file for
|
|
|
|
further processing.
|
|
|
|
"""
|
2016-04-14 20:22:37 +00:00
|
|
|
from scriptutil import ComicListUpdater
|
2016-04-03 19:31:56 +00:00
|
|
|
|
|
|
|
|
2016-04-14 20:22:37 +00:00
|
|
|
class WebComicFactoryUpdater(ComicListUpdater):
|
2016-04-03 19:31:56 +00:00
|
|
|
|
2016-04-14 20:22:37 +00:00
|
|
|
def find_first(self, url):
|
|
|
|
data = self.get_url(url)
|
2016-04-03 19:31:56 +00:00
|
|
|
|
2016-04-14 20:22:37 +00:00
|
|
|
firstlinks = data.cssselect('a.comic-nav-first')
|
|
|
|
if not firstlinks:
|
|
|
|
print("INFO:", "No first link on »%s«, already first page?" %
|
|
|
|
(url))
|
|
|
|
return url
|
|
|
|
return firstlinks[0].attrib['href']
|
2016-04-03 19:31:56 +00:00
|
|
|
|
2016-04-14 20:22:37 +00:00
|
|
|
def collect_results(self):
|
|
|
|
"""Parse start page for supported comics."""
|
|
|
|
url = 'http://www.thewebcomicfactory.com/'
|
|
|
|
data = self.get_url(url)
|
2016-04-03 19:31:56 +00:00
|
|
|
|
2016-04-14 20:22:37 +00:00
|
|
|
for comicdiv in data.cssselect('div.ceo_thumbnail_widget'):
|
|
|
|
comicname = comicdiv.cssselect('h2')[0]
|
|
|
|
comiclink = comicdiv.cssselect('a')[0]
|
|
|
|
comicurl = comiclink.attrib['href']
|
|
|
|
name = comicname.text
|
|
|
|
if 'comic-color-key' in comicurl:
|
|
|
|
continue
|
|
|
|
comicurl = self.find_first(comicurl)
|
|
|
|
self.add_comic(name, comicurl)
|
2016-04-03 19:31:56 +00:00
|
|
|
|
2016-05-22 20:55:06 +00:00
|
|
|
def get_entry(self, name, url):
|
2016-05-22 21:40:58 +00:00
|
|
|
return (u"cls('%s',\n '%s')," % (name, url))
|
2016-04-03 19:31:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2016-04-14 20:22:37 +00:00
|
|
|
WebComicFactoryUpdater(__file__).run()
|