dosage/scripts/arcamax.py

44 lines
1.3 KiB
Python
Raw Normal View History

2013-01-23 18:34:11 +00:00
#!/usr/bin/env python
2016-04-12 22:52:16 +00:00
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
2013-01-23 18:34:11 +00:00
"""
2016-04-12 22:52:16 +00:00
Script to get arcamax comics and save the info in a JSON file for further
processing.
2013-01-23 18:34:11 +00:00
"""
2016-04-12 22:52:16 +00:00
from __future__ import absolute_import, division, print_function
from scriptutil import ComicListUpdater
2016-04-12 22:52:16 +00:00
class ArcamaxUpdater(ComicListUpdater):
dup_templates = ("Creators/%s", "DrunkDuck/%s", "GoComics/%s",
"KeenSpot/%s", "ComicGenesis/%s", "SmackJeeves/%s")
2013-01-23 18:34:11 +00:00
# names of comics to exclude
excluded_comics = (
"HagartheHorrible", # better source available
)
2013-01-23 18:34:11 +00:00
def handle_url(self, url):
"""Parse one search result page."""
data = self.get_url(url)
2013-01-23 18:34:11 +00:00
for comiclink in data.cssselect('a.comic-icon'):
path = comiclink.attrib['href']
name = comiclink.attrib['title']
2013-01-23 18:34:11 +00:00
self.add_comic(name, path.rsplit('/', 2)[1])
def collect_results(self):
"""Parse all search result pages."""
self.handle_url('http://www.arcamax.com/comics')
2013-01-23 18:34:11 +00:00
def get_classdef(self, name, entry):
return u"class %s(_Arcamax):\n path = %r" % (name, entry)
2013-01-23 18:34:11 +00:00
if __name__ == '__main__':
ArcamaxUpdater(__file__).run()