2012-06-20 20:41:04 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
|
|
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
2012-10-11 17:52:52 +00:00
|
|
|
import os
|
2012-06-20 19:58:13 +00:00
|
|
|
import time
|
2013-04-03 18:32:43 +00:00
|
|
|
try:
|
2013-04-05 04:56:33 +00:00
|
|
|
from urllib.parse import quote as url_quote
|
2013-04-03 18:32:43 +00:00
|
|
|
except ImportError:
|
|
|
|
from urllib import quote as url_quote
|
2013-03-07 17:21:05 +00:00
|
|
|
import codecs
|
2013-03-10 17:03:28 +00:00
|
|
|
import json
|
2012-10-11 17:52:52 +00:00
|
|
|
from . import rss, util, configuration
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
class EventHandler(object):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Base class for writing events to files. The currently defined events are
|
|
|
|
start(), comicDownloaded() and end()."""
|
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
def __init__(self, basepath, baseurl):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Initialize base path and url."""
|
2012-06-20 19:58:13 +00:00
|
|
|
self.basepath = basepath
|
|
|
|
self.baseurl = baseurl or self.getBaseUrl()
|
|
|
|
|
|
|
|
def getBaseUrl(self):
|
|
|
|
'''Return a file: URL that probably points to the basedir.
|
|
|
|
|
|
|
|
This is used as a halfway sane default when the base URL is not
|
|
|
|
provided; not perfect, but should work in most cases.'''
|
|
|
|
components = util.splitpath(os.path.abspath(self.basepath))
|
2013-04-03 18:32:43 +00:00
|
|
|
url = '/'.join([url_quote(component, '') for component in components])
|
2012-06-20 19:58:13 +00:00
|
|
|
return 'file:///' + url + '/'
|
|
|
|
|
|
|
|
def getUrlFromFilename(self, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Construct URL from filename."""
|
2012-06-20 19:58:13 +00:00
|
|
|
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
2013-04-03 18:32:43 +00:00
|
|
|
url = '/'.join([url_quote(component, '') for component in components])
|
2012-06-20 19:58:13 +00:00
|
|
|
return self.baseurl + url
|
|
|
|
|
|
|
|
def start(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Emit a start event. Should be overridden in subclass."""
|
2012-06-20 19:58:13 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
def comicDownloaded(self, comic, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Emit a comic downloaded event. Should be overridden in subclass."""
|
2012-06-20 19:58:13 +00:00
|
|
|
pass
|
|
|
|
|
2013-03-10 15:23:04 +00:00
|
|
|
def comicPageLink(self, comic, url, prevUrl):
|
|
|
|
"""Emit an event to inform the handler about links between comic pages. Should be overridden in subclass."""
|
|
|
|
pass
|
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
def end(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Emit an end event. Should be overridden in subclass."""
|
2012-06-20 19:58:13 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class RSSEventHandler(EventHandler):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Output in RSS format."""
|
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
name = 'rss'
|
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
def getFilename(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Return RSS filename."""
|
2012-06-20 19:58:13 +00:00
|
|
|
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
|
|
|
|
|
|
|
|
def start(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Log start event."""
|
2012-06-20 19:58:13 +00:00
|
|
|
today = time.time()
|
|
|
|
yesterday = today - 86400
|
|
|
|
today = time.localtime(today)
|
|
|
|
yesterday = time.localtime(yesterday)
|
|
|
|
|
2012-10-11 17:52:52 +00:00
|
|
|
link = configuration.Url
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
self.rssfn = self.getFilename()
|
|
|
|
|
|
|
|
if os.path.exists(self.rssfn):
|
|
|
|
self.newfile = False
|
|
|
|
self.rss = rss.parseFeed(self.rssfn, yesterday)
|
|
|
|
else:
|
|
|
|
self.newfile = True
|
|
|
|
self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today))
|
|
|
|
|
|
|
|
def comicDownloaded(self, comic, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Write RSS entry for downloaded comic."""
|
2013-03-09 20:39:43 +00:00
|
|
|
imageUrl = self.getUrlFromFilename(filename)
|
|
|
|
title = '%s - %s' % (comic.name, os.path.basename(filename))
|
|
|
|
pageUrl = comic.referrer
|
2013-11-25 20:20:48 +00:00
|
|
|
description = '<img src="%s"/><br/><a href="%s">View Comic Online</a>' % (imageUrl, pageUrl)
|
2012-06-20 19:58:13 +00:00
|
|
|
args = (
|
2013-02-10 07:00:32 +00:00
|
|
|
title,
|
2013-03-09 20:39:43 +00:00
|
|
|
imageUrl,
|
2013-02-10 07:00:32 +00:00
|
|
|
description,
|
2012-12-12 16:41:29 +00:00
|
|
|
util.rfc822date(time.time())
|
2012-06-20 19:58:13 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
if self.newfile:
|
|
|
|
self.newfile = False
|
|
|
|
self.rss.addItem(*args)
|
|
|
|
else:
|
2013-02-10 07:00:32 +00:00
|
|
|
self.rss.addItem(*args, append=False)
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
def end(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Write RSS data to file."""
|
2012-06-20 19:58:13 +00:00
|
|
|
self.rss.write(self.rssfn)
|
|
|
|
|
2012-09-26 14:47:39 +00:00
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
class HtmlEventHandler(EventHandler):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Output in HTML format."""
|
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
name = 'html'
|
2013-04-11 16:27:43 +00:00
|
|
|
encoding = 'utf-8'
|
2013-03-11 16:33:59 +00:00
|
|
|
|
2012-06-20 19:58:13 +00:00
|
|
|
def fnFromDate(self, date):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Get filename from date."""
|
2012-06-20 19:58:13 +00:00
|
|
|
fn = time.strftime('comics-%Y%m%d.html', date)
|
|
|
|
fn = os.path.join(self.basepath, 'html', fn)
|
|
|
|
fn = os.path.abspath(fn)
|
|
|
|
return fn
|
|
|
|
|
|
|
|
def start(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Start HTML output."""
|
2012-06-20 19:58:13 +00:00
|
|
|
today = time.time()
|
|
|
|
yesterday = today - 86400
|
|
|
|
tomorrow = today + 86400
|
|
|
|
today = time.localtime(today)
|
|
|
|
yesterday = time.localtime(yesterday)
|
|
|
|
tomorrow = time.localtime(tomorrow)
|
|
|
|
|
|
|
|
fn = self.fnFromDate(today)
|
2013-02-07 19:48:03 +00:00
|
|
|
if os.path.exists(fn):
|
|
|
|
raise ValueError('output file %r already exists' % fn)
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
d = os.path.dirname(fn)
|
|
|
|
if not os.path.isdir(d):
|
|
|
|
os.makedirs(d)
|
|
|
|
|
|
|
|
yesterdayUrl = self.getUrlFromFilename(self.fnFromDate(yesterday))
|
|
|
|
tomorrowUrl = self.getUrlFromFilename(self.fnFromDate(tomorrow))
|
|
|
|
|
2013-04-11 16:27:43 +00:00
|
|
|
self.html = codecs.open(fn, 'w', self.encoding)
|
2013-03-07 17:21:05 +00:00
|
|
|
self.html.write(u'''<!DOCTYPE html>
|
|
|
|
<html lang="en">
|
2012-06-20 19:58:13 +00:00
|
|
|
<head>
|
2013-04-11 16:27:43 +00:00
|
|
|
<meta http-equiv="Content-Type" content="text/html; charset=%s"/>
|
2013-03-09 20:39:43 +00:00
|
|
|
<meta name="generator" content="%s"/>
|
2012-06-20 19:58:13 +00:00
|
|
|
<title>Comics for %s</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<a href="%s">Previous Day</a> | <a href="%s">Next Day</a>
|
|
|
|
<ul>
|
2013-04-11 16:27:43 +00:00
|
|
|
''' % (self.encoding, configuration.App, time.strftime('%Y/%m/%d', today),
|
2013-03-09 20:39:43 +00:00
|
|
|
yesterdayUrl, tomorrowUrl))
|
2013-03-11 16:33:59 +00:00
|
|
|
# last comic name (eg. CalvinAndHobbes)
|
2012-06-20 19:58:13 +00:00
|
|
|
self.lastComic = None
|
2013-03-11 16:33:59 +00:00
|
|
|
# last comic strip URL (eg. http://example.com/page42)
|
|
|
|
self.lastUrl = None
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
def comicDownloaded(self, comic, filename):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Write HTML entry for downloaded comic."""
|
2013-03-09 20:39:43 +00:00
|
|
|
if self.lastComic != comic.name:
|
2012-06-20 19:58:13 +00:00
|
|
|
self.newComic(comic)
|
2013-03-09 20:39:43 +00:00
|
|
|
imageUrl = self.getUrlFromFilename(filename)
|
|
|
|
pageUrl = comic.referrer
|
2013-03-11 16:33:59 +00:00
|
|
|
if pageUrl != self.lastUrl:
|
|
|
|
self.html.write(u'<li><a href="%s">%s</a>\n' % (pageUrl, pageUrl))
|
|
|
|
self.html.write(u'<br/><img src="%s"/>\n' % imageUrl)
|
|
|
|
self.lastComic = comic.name
|
|
|
|
self.lastUrl = pageUrl
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
def newComic(self, comic):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Start new comic list in HTML."""
|
2013-03-11 16:33:59 +00:00
|
|
|
if self.lastUrl is not None:
|
|
|
|
self.html.write(u'</li>\n')
|
2012-06-20 19:58:13 +00:00
|
|
|
if self.lastComic is not None:
|
2013-03-09 20:39:43 +00:00
|
|
|
self.html.write(u'</ul>\n')
|
|
|
|
self.html.write(u'<li>%s</li>\n' % comic.name)
|
|
|
|
self.html.write(u'<ul>\n')
|
2012-06-20 19:58:13 +00:00
|
|
|
|
|
|
|
def end(self):
|
2012-09-26 14:47:39 +00:00
|
|
|
"""End HTML output."""
|
2013-03-11 16:33:59 +00:00
|
|
|
if self.lastUrl is not None:
|
|
|
|
self.html.write(u'</li>\n')
|
2012-06-20 19:58:13 +00:00
|
|
|
if self.lastComic is not None:
|
2013-03-11 16:33:59 +00:00
|
|
|
self.html.write(u'</ul>\n')
|
2013-03-07 17:21:05 +00:00
|
|
|
self.html.write(u'''</ul>
|
2012-06-20 19:58:13 +00:00
|
|
|
</body>
|
|
|
|
</html>''')
|
|
|
|
self.html.close()
|
|
|
|
|
|
|
|
|
2013-03-10 17:03:28 +00:00
|
|
|
class JSONEventHandler(EventHandler):
|
|
|
|
"""Output metadata for comics in JSON format."""
|
|
|
|
|
|
|
|
name = 'json'
|
2013-04-11 16:27:43 +00:00
|
|
|
encoding = 'utf-8'
|
2013-03-10 17:03:28 +00:00
|
|
|
|
|
|
|
def start(self):
|
|
|
|
"""Start with empty data."""
|
|
|
|
self.data = {}
|
|
|
|
|
|
|
|
def jsonFn(self, comic):
|
|
|
|
"""Get filename for the JSON file for a comic."""
|
|
|
|
fn = os.path.join(self.basepath, comic, 'dosage.json')
|
|
|
|
fn = os.path.abspath(fn)
|
|
|
|
return fn
|
|
|
|
|
|
|
|
def getComicData(self, comic):
|
2013-03-25 18:47:29 +00:00
|
|
|
"""Return dictionary with comic info."""
|
2013-03-10 17:03:28 +00:00
|
|
|
if comic not in self.data:
|
|
|
|
if os.path.exists(self.jsonFn(comic)):
|
2013-04-11 16:27:43 +00:00
|
|
|
with codecs.open(self.jsonFn(comic), 'r', self.encoding) as f:
|
2013-03-10 17:03:28 +00:00
|
|
|
self.data[comic] = json.load(f)
|
|
|
|
else:
|
|
|
|
self.data[comic] = {'pages':{}}
|
|
|
|
return self.data[comic]
|
|
|
|
|
|
|
|
def getPageInfo(self, comic, url):
|
2013-03-25 18:47:29 +00:00
|
|
|
"""Return dictionary with comic page info."""
|
2013-03-10 17:03:28 +00:00
|
|
|
comicData = self.getComicData(comic)
|
|
|
|
if url not in comicData['pages']:
|
|
|
|
comicData['pages'][url] = {'images':{}}
|
|
|
|
return comicData['pages'][url]
|
|
|
|
|
|
|
|
def comicDownloaded(self, comic, filename):
|
|
|
|
"""Add URL-to-filename mapping into JSON."""
|
|
|
|
pageInfo = self.getPageInfo(comic.name, comic.referrer)
|
|
|
|
pageInfo['images'][comic.url] = os.path.basename(filename)
|
|
|
|
|
|
|
|
def comicPageLink(self, comic, url, prevUrl):
|
|
|
|
"""Write previous link into JSON."""
|
|
|
|
pageInfo = self.getPageInfo(comic, url)
|
|
|
|
pageInfo['prev'] = prevUrl
|
|
|
|
|
|
|
|
def end(self):
|
|
|
|
"""Write all JSON data to files."""
|
|
|
|
for comic in self.data:
|
2013-04-11 16:27:43 +00:00
|
|
|
with codecs.open(self.jsonFn(comic), 'w', self.encoding) as f:
|
2013-03-10 17:03:28 +00:00
|
|
|
json.dump(self.data[comic], f, indent=2, separators=(',', ': '), sort_keys=True)
|
|
|
|
|
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
_handler_classes = {}
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
def addHandlerClass(clazz):
|
2013-03-11 19:14:27 +00:00
|
|
|
"""Register handler class."""
|
2013-03-11 16:33:59 +00:00
|
|
|
if not issubclass(clazz, EventHandler):
|
|
|
|
raise ValueError("%s must be subclassed from %s" % (clazz, EventHandler))
|
|
|
|
_handler_classes[clazz.name] = clazz
|
|
|
|
|
|
|
|
addHandlerClass(HtmlEventHandler)
|
|
|
|
addHandlerClass(RSSEventHandler)
|
2013-03-10 17:03:28 +00:00
|
|
|
addHandlerClass(JSONEventHandler)
|
2013-03-11 16:33:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
def getHandlerNames():
|
2012-09-26 14:47:39 +00:00
|
|
|
"""Get sorted handler names."""
|
2013-03-11 16:33:59 +00:00
|
|
|
return sorted(_handler_classes.keys())
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2012-10-12 20:07:50 +00:00
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
_handlers = []
|
|
|
|
|
|
|
|
def addHandler(name, basepath=None, baseurl=None):
|
2013-04-11 16:27:43 +00:00
|
|
|
"""Add an event handler with given name."""
|
2012-06-20 19:58:13 +00:00
|
|
|
if basepath is None:
|
|
|
|
basepath = '.'
|
2013-03-11 16:33:59 +00:00
|
|
|
_handlers.append(_handler_classes[name](basepath, baseurl))
|
|
|
|
|
|
|
|
|
|
|
|
class MultiHandler(object):
|
|
|
|
"""Encapsulate a list of handlers."""
|
|
|
|
|
|
|
|
def start(self):
|
2013-03-25 18:47:29 +00:00
|
|
|
"""Emit start events for handlers."""
|
2013-03-11 16:33:59 +00:00
|
|
|
for handler in _handlers:
|
|
|
|
handler.start()
|
|
|
|
|
|
|
|
def comicDownloaded(self, comic, filename):
|
2013-03-25 18:47:29 +00:00
|
|
|
"""Emit comic downloaded events for handlers."""
|
2013-03-11 16:33:59 +00:00
|
|
|
for handler in _handlers:
|
|
|
|
handler.comicDownloaded(comic, filename)
|
|
|
|
|
2013-03-10 15:23:04 +00:00
|
|
|
def comicPageLink(self, comic, url, prevUrl):
|
|
|
|
"""Emit an event to inform the handler about links between comic pages. Should be overridden in subclass."""
|
|
|
|
for handler in _handlers:
|
|
|
|
handler.comicPageLink(comic, url, prevUrl)
|
|
|
|
|
2013-03-11 16:33:59 +00:00
|
|
|
def end(self):
|
2013-03-25 18:47:29 +00:00
|
|
|
"""Emit end events for handlers."""
|
2013-03-11 16:33:59 +00:00
|
|
|
for handler in _handlers:
|
|
|
|
handler.end()
|
|
|
|
|
|
|
|
|
|
|
|
multihandler = MultiHandler()
|
2012-06-20 19:58:13 +00:00
|
|
|
|
2012-10-12 20:07:50 +00:00
|
|
|
def getHandler():
|
2012-12-12 16:41:29 +00:00
|
|
|
"""Get installed event handler."""
|
2013-03-11 16:33:59 +00:00
|
|
|
return multihandler
|