dosage/dosagelib/events.py

381 lines
12 KiB
Python
Raw Normal View History

# SPDX-License-Identifier: MIT
2016-10-28 22:21:41 +00:00
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
2012-10-11 17:52:52 +00:00
import os
2012-06-20 19:58:13 +00:00
import time
from urllib.parse import quote as url_quote
import codecs
import json
import imagesize
2012-10-11 17:52:52 +00:00
from . import rss, util, configuration
2013-12-22 12:38:29 +00:00
from .output import out
2012-06-20 19:58:13 +00:00
# Maximum width or height to display an image in exported pages.
# Note that only the displayed size is adjusted, not the image itself.
MaxImageSize = (800, 800)
2012-06-20 19:58:13 +00:00
class EventHandler(object):
2012-09-26 14:47:39 +00:00
"""Base class for writing events to files. The currently defined events are
start(), comicDownloaded() and end()."""
2015-05-20 10:38:29 +00:00
def __init__(self, basepath, baseurl, allowdownscale):
2012-09-26 14:47:39 +00:00
"""Initialize base path and url."""
2012-06-20 19:58:13 +00:00
self.basepath = basepath
self.baseurl = baseurl or self.getBaseUrl()
2015-05-20 10:38:29 +00:00
self.allowdownscale = allowdownscale
2012-06-20 19:58:13 +00:00
def getBaseUrl(self):
'''Return a file: URL that probably points to the basedir.
This is used as a halfway sane default when the base URL is not
provided; not perfect, but should work in most cases.'''
components = util.splitpath(os.path.abspath(self.basepath))
2013-04-03 18:32:43 +00:00
url = '/'.join([url_quote(component, '') for component in components])
2012-06-20 19:58:13 +00:00
return 'file:///' + url + '/'
def getUrlFromFilename(self, filename):
2012-09-26 14:47:39 +00:00
"""Construct URL from filename."""
2012-06-20 19:58:13 +00:00
components = util.splitpath(util.getRelativePath(self.basepath, filename))
2013-04-03 18:32:43 +00:00
url = '/'.join([url_quote(component, '') for component in components])
2012-06-20 19:58:13 +00:00
return self.baseurl + url
def start(self):
2012-09-26 14:47:39 +00:00
"""Emit a start event. Should be overridden in subclass."""
2012-06-20 19:58:13 +00:00
pass
def comicDownloaded(self, comic, filename):
"""Emit a comic downloaded event. Should be overridden in subclass.
Parameters are:
comic: The ComicImage class calling this event
filename: The target filename
"""
2012-06-20 19:58:13 +00:00
pass
def comicPageLink(self, scraper, url, prevUrl):
"""Emit an event to inform the handler about links between comic pages.
Should be overridden in subclass. Parameters are:
scraper: The Scraper class calling this event
url: The current page url
prevUrl: The previous page url
"""
pass
2012-06-20 19:58:13 +00:00
def end(self):
2012-09-26 14:47:39 +00:00
"""Emit an end event. Should be overridden in subclass."""
2012-06-20 19:58:13 +00:00
pass
class RSSEventHandler(EventHandler):
2012-09-26 14:47:39 +00:00
"""Output in RSS format."""
name = 'rss'
2012-06-20 19:58:13 +00:00
def getFilename(self):
2012-09-26 14:47:39 +00:00
"""Return RSS filename."""
2012-06-20 19:58:13 +00:00
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
def start(self):
2012-09-26 14:47:39 +00:00
"""Log start event."""
2012-06-20 19:58:13 +00:00
today = time.time()
yesterday = today - 86400
today = time.localtime(today)
yesterday = time.localtime(yesterday)
2012-10-11 17:52:52 +00:00
link = configuration.Url
2012-06-20 19:58:13 +00:00
self.rssfn = self.getFilename()
if os.path.exists(self.rssfn):
self.newfile = False
self.rss = rss.parseFeed(self.rssfn, yesterday)
else:
self.newfile = True
2020-01-12 23:36:46 +00:00
self.rss = rss.Feed('Daily Dosage', link,
'Comics for %s' % time.strftime('%Y/%m/%d', today))
2012-06-20 19:58:13 +00:00
def comicDownloaded(self, comic, filename):
2012-09-26 14:47:39 +00:00
"""Write RSS entry for downloaded comic."""
2013-03-09 20:39:43 +00:00
imageUrl = self.getUrlFromFilename(filename)
2015-05-20 10:38:29 +00:00
size = None
if self.allowdownscale:
size = getDimensionForImage(filename, MaxImageSize)
title = '%s - %s' % (comic.scraper.name, os.path.basename(filename))
2013-03-09 20:39:43 +00:00
pageUrl = comic.referrer
description = '<img src="%s"' % imageUrl
if size:
description += ' width="%d" height="%d"' % size
description += '/>'
if comic.text:
description += '<br/>%s' % comic.text
2013-11-29 19:32:54 +00:00
description += '<br/><a href="%s">View Comic Online</a>' % pageUrl
2012-06-20 19:58:13 +00:00
args = (
title,
2013-03-09 20:39:43 +00:00
imageUrl,
description,
util.rfc822date(time.time()),
2012-06-20 19:58:13 +00:00
)
if self.newfile:
self.newfile = False
self.rss.addItem(*args)
else:
self.rss.addItem(*args, append=False)
2012-06-20 19:58:13 +00:00
def end(self):
2012-09-26 14:47:39 +00:00
"""Write RSS data to file."""
2012-06-20 19:58:13 +00:00
self.rss.write(self.rssfn)
2012-09-26 14:47:39 +00:00
def getDimensionForImage(filename, maxsize):
"""Return scaled image size in (width, height) format.
The scaling preserves the aspect ratio."""
try:
origsize = imagesize.get(filename)
except Exception as e:
out.warn("Could not get image size of {}: {}".format(os.path.basename(filename), e))
return None
width, height = origsize
if width > maxsize[0]:
height = max(round(height * maxsize[0] / width), 1)
width = round(maxsize[0])
if height > maxsize[1]:
width = max(round(width * maxsize[1] / height), 1)
height = round(maxsize[1])
if width < origsize[0] or height < origsize[1]:
out.info("Downscaled display size from %s to %s" % (origsize, (width, height)))
return (width, height)
2012-06-20 19:58:13 +00:00
class HtmlEventHandler(EventHandler):
2012-09-26 14:47:39 +00:00
"""Output in HTML format."""
name = 'html'
2013-04-11 16:27:43 +00:00
encoding = 'utf-8'
2012-06-20 19:58:13 +00:00
def fnFromDate(self, date):
2012-09-26 14:47:39 +00:00
"""Get filename from date."""
fn = time.strftime('comics-%Y%m%d', date)
fn = os.path.join(self.basepath, 'html', fn + ".html")
return os.path.abspath(fn)
2012-06-20 19:58:13 +00:00
2014-12-09 23:57:17 +00:00
def addNavLinks(self):
if self.yesterdayUrl:
self.html.write(u'<a href="%s">Previous Day</a> | ' % self.yesterdayUrl)
self.html.write(u'<a href="%s">Next Day</a>\n' % self.tomorrowUrl)
2012-06-20 19:58:13 +00:00
def start(self):
2012-09-26 14:47:39 +00:00
"""Start HTML output."""
2012-06-20 19:58:13 +00:00
today = time.time()
yesterday = today - 86400
tomorrow = today + 86400
today = time.localtime(today)
yesterday = time.localtime(yesterday)
tomorrow = time.localtime(tomorrow)
fn = self.fnFromDate(today)
if os.path.exists(fn):
out.warn('HTML output file %r already exists' % fn)
out.warn('the page link of previous run will skip this file')
out.warn('try to generate HTML output only once per day')
fn = util.getNonexistingFile(fn)
2012-06-20 19:58:13 +00:00
d = os.path.dirname(fn)
if not os.path.isdir(d):
os.makedirs(d)
try:
fn_yesterday = self.fnFromDate(yesterday)
fn_yesterday = util.getExistingFile(fn_yesterday)
2014-12-09 23:57:17 +00:00
self.yesterdayUrl = self.getUrlFromFilename(fn_yesterday)
except ValueError:
2014-12-09 23:57:17 +00:00
self.yesterdayUrl = None
self.tomorrowUrl = self.getUrlFromFilename(self.fnFromDate(tomorrow))
2012-06-20 19:58:13 +00:00
2013-04-11 16:27:43 +00:00
self.html = codecs.open(fn, 'w', self.encoding)
self.html.write(u'''<!DOCTYPE html>
<html lang="en">
2012-06-20 19:58:13 +00:00
<head>
2013-04-11 16:27:43 +00:00
<meta http-equiv="Content-Type" content="text/html; charset=%s"/>
2013-03-09 20:39:43 +00:00
<meta name="generator" content="%s"/>
2012-06-20 19:58:13 +00:00
<title>Comics for %s</title>
</head>
<body>
''' % (self.encoding, configuration.App, time.strftime('%Y/%m/%d', today)))
2014-12-09 23:57:17 +00:00
self.addNavLinks()
self.html.write(u'<ul>\n')
# last comic name (eg. CalvinAndHobbes)
2012-06-20 19:58:13 +00:00
self.lastComic = None
# last comic strip URL (eg. http://example.com/page42)
self.lastUrl = None
2012-06-20 19:58:13 +00:00
2013-11-29 19:32:54 +00:00
def comicDownloaded(self, comic, filename, text=None):
2012-09-26 14:47:39 +00:00
"""Write HTML entry for downloaded comic."""
if self.lastComic != comic.scraper.name:
2012-06-20 19:58:13 +00:00
self.newComic(comic)
2015-05-20 10:38:29 +00:00
size = None
if self.allowdownscale:
size = getDimensionForImage(filename, MaxImageSize)
2013-03-09 20:39:43 +00:00
imageUrl = self.getUrlFromFilename(filename)
pageUrl = comic.referrer
if pageUrl != self.lastUrl:
self.html.write(u'<li><a href="%s">%s</a>\n' % (pageUrl, pageUrl))
2013-12-10 18:59:19 +00:00
self.html.write(u'<br/><img src="%s"' % imageUrl)
if size:
self.html.write(' width="%d" height="%d"' % size)
self.html.write('/>\n')
2013-11-29 19:32:54 +00:00
if text:
self.html.write(u'<br/>%s\n' % text)
self.lastComic = comic.scraper.name
self.lastUrl = pageUrl
2012-06-20 19:58:13 +00:00
def newComic(self, comic):
2012-09-26 14:47:39 +00:00
"""Start new comic list in HTML."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
2012-06-20 19:58:13 +00:00
if self.lastComic is not None:
2013-03-09 20:39:43 +00:00
self.html.write(u'</ul>\n')
self.html.write(u'<li>%s</li>\n' % comic.scraper.name)
2013-03-09 20:39:43 +00:00
self.html.write(u'<ul>\n')
2012-06-20 19:58:13 +00:00
def end(self):
2012-09-26 14:47:39 +00:00
"""End HTML output."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
2012-06-20 19:58:13 +00:00
if self.lastComic is not None:
self.html.write(u'</ul>\n')
2014-12-09 23:57:17 +00:00
self.html.write(u'</ul>\n')
self.addNavLinks()
2012-06-20 19:58:13 +00:00
self.html.close()
class JSONEventHandler(EventHandler):
"""Output metadata for comics in JSON format."""
name = 'json'
2013-04-11 16:27:43 +00:00
encoding = 'utf-8'
def start(self):
"""Start with empty data."""
self.data = {}
def jsonFn(self, scraper):
"""Get filename for the JSON file for a comic."""
fn = os.path.join(scraper.get_download_dir(self.basepath), 'dosage.json')
return os.path.abspath(fn)
def getComicData(self, scraper):
2013-03-25 18:47:29 +00:00
"""Return dictionary with comic info."""
if scraper not in self.data:
if os.path.exists(self.jsonFn(scraper)):
with codecs.open(self.jsonFn(scraper), 'r', self.encoding) as f:
self.data[scraper] = json.load(f)
else:
self.data[scraper] = {'pages': {}}
return self.data[scraper]
def getPageInfo(self, scraper, url):
2013-03-25 18:47:29 +00:00
"""Return dictionary with comic page info."""
comicData = self.getComicData(scraper)
if url not in comicData['pages']:
comicData['pages'][url] = {'images': {}}
return comicData['pages'][url]
def comicDownloaded(self, comic, filename):
"""Add URL-to-filename mapping into JSON."""
pageInfo = self.getPageInfo(comic.scraper, comic.referrer)
# If there's already an image for this page start keeping track of their order
if len(pageInfo['images'].keys()) == 1:
pageInfo['imagesOrder'] = list(pageInfo['images'].keys())
if 'imagesOrder' in pageInfo.keys():
pageInfo['imagesOrder'].append(comic.url)
pageInfo['images'][comic.url] = os.path.basename(filename)
def comicPageLink(self, scraper, url, prevUrl):
"""Write previous link into JSON."""
pageInfo = self.getPageInfo(scraper, url)
pageInfo['prev'] = prevUrl
def end(self):
"""Write all JSON data to files."""
for scraper in self.data:
with codecs.open(self.jsonFn(scraper), 'w', self.encoding) as f:
json.dump(self.data[scraper], f, indent=2, separators=(',', ': '), sort_keys=True)
_handler_classes = {}
2012-06-20 19:58:13 +00:00
def addHandlerClass(clazz):
2013-03-11 19:14:27 +00:00
"""Register handler class."""
if not issubclass(clazz, EventHandler):
raise ValueError("%s must be subclassed from %s" % (clazz, EventHandler))
_handler_classes[clazz.name] = clazz
2017-05-14 22:54:02 +00:00
addHandlerClass(HtmlEventHandler)
addHandlerClass(RSSEventHandler)
addHandlerClass(JSONEventHandler)
def getHandlerNames():
2012-09-26 14:47:39 +00:00
"""Get sorted handler names."""
return sorted(_handler_classes.keys())
2012-06-20 19:58:13 +00:00
2012-10-12 20:07:50 +00:00
2017-10-12 21:47:59 +00:00
# FIXME: Hidden singleton :(
_handlers = []
2015-05-20 10:38:29 +00:00
def addHandler(name, basepath=None, baseurl=None, allowDownscale=False):
2013-04-11 16:27:43 +00:00
"""Add an event handler with given name."""
2012-06-20 19:58:13 +00:00
if basepath is None:
basepath = '.'
2015-05-20 10:38:29 +00:00
_handlers.append(_handler_classes[name](basepath, baseurl, allowDownscale))
2017-10-12 21:47:59 +00:00
def clear_handlers():
del _handlers[:]
class MultiHandler(object):
"""Encapsulate a list of handlers."""
def start(self):
2013-03-25 18:47:29 +00:00
"""Emit start events for handlers."""
for handler in _handlers:
handler.start()
def comicDownloaded(self, comic, filename):
2013-03-25 18:47:29 +00:00
"""Emit comic downloaded events for handlers."""
for handler in _handlers:
handler.comicDownloaded(comic, filename)
def comicPageLink(self, scraper, url, prevUrl):
2020-01-12 23:36:46 +00:00
"""Emit an event to inform the handler about links between comic pages.
Should be overridden in subclass."""
for handler in _handlers:
handler.comicPageLink(scraper, url, prevUrl)
def end(self):
2013-03-25 18:47:29 +00:00
"""Emit end events for handlers."""
for handler in _handlers:
handler.end()
multihandler = MultiHandler()
2012-06-20 19:58:13 +00:00
2012-10-12 20:07:50 +00:00
def getHandler():
2012-12-12 16:41:29 +00:00
"""Get installed event handler."""
return multihandler