Allow multiple event output and improve HTML output.

This commit is contained in:
Bastian Kleineidam 2013-03-11 17:33:59 +01:00
parent fb466833db
commit 7ee73caf3c
7 changed files with 71 additions and 25 deletions

View file

@ -7,7 +7,8 @@ Features:
Changes:
- cmdline: Comic lists are displayed one page at a time.
- events: HTML output embeds the images in the page.
- output: HTML output embeds the images in the page and show the page URLs.
- output: The --output option can be given multiple times.
Fixes:
- cmdline: Catch error when piping output to another

View file

@ -15,7 +15,7 @@ for updating and maintaining collections.
Specifies a base path to put comic subdirectories. The default is \fBComics\fP.
.TP
\fB\-\-baseurl=\fP\fIPATH\fP
Specifies the base URL for output events. The default is a local file URI.
Specifies the base URL for output handlers. The default is a local file URI.
.TP
\fB\-a\fP, \fB\-\-all\fP
Traverses all available strips backwards from the current one.
@ -61,6 +61,7 @@ current run, named by date (ala dailystrips). The files can be found in the
Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in \fBComics/dailydose.xml\fP.
.RE
This option can be given multiple times.
.TP
\fB\-t\fP, \fB\-\-timestamps\fP
Print timestamps for all output at any level.

View file

@ -30,7 +30,7 @@ for updating and maintaining collections.
<DT><B>-b</B> <I>PATH</I>, <B>--basepath=</B><I>PATH</I><DD>
Specifies a base path to put comic subdirectories. The default is <B>Comics</B>.
<DT><B>--baseurl=</B><I>PATH</I><DD>
Specifies the base URL for output events. The default is a local file URI.
Specifies the base URL for output handlers. The default is a local file URI.
<DT><B>-a</B>, <B>--all</B><DD>
Traverses all available strips backwards from the current one.
This can be useful you want a full collection of a new comic strip,
@ -77,6 +77,7 @@ Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in <B>Comics/dailydose.xml</B>.
</DL>
This option can be given multiple times.
<DL COMPACT>
<DT><B>-t</B>, <B>--timestamps</B><DD>
Print timestamps for all output at any level.

View file

@ -20,8 +20,8 @@ OPTIONS
default is Comics.
--baseurl=PATH
Specifies the base URL for output events. The default is
a local file URI.
Specifies the base URL for output handlers. The default
is a local file URI.
-a, --all
Traverses all available strips backwards from the cur
@ -61,6 +61,7 @@ OPTIONS
rss - Writes out an RSS feed detailing what strips were
downloaded in the last 24 hours. The feed can be found
in Comics/dailydose.xml.
This option can be given multiple times.
-t, --timestamps
Print timestamps for all output at any level.

7
dosage
View file

@ -82,7 +82,7 @@ def setupOptions():
parser.add_argument('--version', action='store_true', help='display the version number')
parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules')
parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level')
parser.add_argument('-o', '--output', action='store', choices=events.getHandlers(), help='output formatting for downloaded comics')
parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics')
parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content')
parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)')
@ -150,8 +150,9 @@ def displayComicHelp(scraperobj):
def getComics(options):
"""Retrieve comics."""
errors = 0
if options.output:
events.installHandler(options.output, options.basepath, options.baseurl)
if options.handler:
for name in options.handler:
events.addHandler(name, options.basepath, options.baseurl)
events.getHandler().start()
try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):

View file

@ -46,6 +46,8 @@ class EventHandler(object):
class RSSEventHandler(EventHandler):
"""Output in RSS format."""
name = 'rss'
def getFilename(self):
"""Return RSS filename."""
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
@ -95,6 +97,8 @@ class RSSEventHandler(EventHandler):
class HtmlEventHandler(EventHandler):
"""Output in HTML format."""
name = 'html'
def fnFromDate(self, date):
"""Get filename from date."""
fn = time.strftime('comics-%Y%m%d.html', date)
@ -135,8 +139,10 @@ class HtmlEventHandler(EventHandler):
<ul>
''' % (configuration.App, time.strftime('%Y/%m/%d', today),
yesterdayUrl, tomorrowUrl))
# last comic name (eg. CalvinAndHobbes)
self.lastComic = None
# last comic strip URL (eg. http://example.com/page42)
self.lastUrl = None
def comicDownloaded(self, comic, filename):
"""Write HTML entry for downloaded comic."""
@ -144,44 +150,79 @@ class HtmlEventHandler(EventHandler):
self.newComic(comic)
imageUrl = self.getUrlFromFilename(filename)
pageUrl = comic.referrer
self.html.write(u'<li><a href="%s"><img src="%s"/></a></li>\n' % (pageUrl, imageUrl))
if pageUrl != self.lastUrl:
self.html.write(u'<li><a href="%s">%s</a>\n' % (pageUrl, pageUrl))
self.html.write(u'<br/><img src="%s"/>\n' % imageUrl)
self.lastComic = comic.name
self.lastUrl = pageUrl
def newComic(self, comic):
"""Start new comic list in HTML."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
if self.lastComic is not None:
self.html.write(u'</ul>\n')
self.lastComic = comic.name
self.html.write(u'<li>%s</li>\n' % comic.name)
self.html.write(u'<ul>\n')
def end(self):
"""End HTML output."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
if self.lastComic is not None:
self.html.write(u' </ul>\n')
self.html.write(u'</ul>\n')
self.html.write(u'''</ul>
</body>
</html>''')
self.html.close()
handlers = {
'html': HtmlEventHandler,
'rss': RSSEventHandler,
}
_handler_classes = {}
def getHandlers():
def addHandlerClass(clazz):
if not issubclass(clazz, EventHandler):
raise ValueError("%s must be subclassed from %s" % (clazz, EventHandler))
_handler_classes[clazz.name] = clazz
addHandlerClass(HtmlEventHandler)
addHandlerClass(RSSEventHandler)
def getHandlerNames():
"""Get sorted handler names."""
return sorted(handlers.keys())
return sorted(_handler_classes.keys())
_handler = EventHandler(".", None)
def installHandler(name, basepath=None, baseurl=None):
_handlers = []
def addHandler(name, basepath=None, baseurl=None):
"""Install a global handler with given name."""
global _handler
if basepath is None:
basepath = '.'
_handler = handlers[name](basepath, baseurl)
_handlers.append(_handler_classes[name](basepath, baseurl))
class MultiHandler(object):
"""Encapsulate a list of handlers."""
def start(self):
"""Emit a start event. Should be overridden in subclass."""
for handler in _handlers:
handler.start()
def comicDownloaded(self, comic, filename):
"""Emit a comic downloaded event. Should be overridden in subclass."""
for handler in _handlers:
handler.comicDownloaded(comic, filename)
def end(self):
"""Emit an end event. Should be overridden in subclass."""
for handler in _handlers:
handler.end()
multihandler = MultiHandler()
def getHandler():
"""Get installed event handler."""
return _handler
return multihandler

View file

@ -53,10 +53,10 @@ class TestDosage (unittest.TestCase):
self.assertRaises(OSError, run_with_options, ['--imadoofus'])
def test_fetch_html(self):
run_with_options(["-n", "2", "-b", self.tmpdir, "-o", "html", "calvinandhobbes"])
run_with_options(["-n", "2", "-b", self.tmpdir, "-o", "html", "-o", "rss", "calvinandhobbes"])
def test_fetch_rss(self):
run_with_options(["--numstrips", "2", "--baseurl", "bla", "--basepath", self.tmpdir, "--output", "rss", "--adult", "sexyloser"])
run_with_options(["--numstrips", "2", "--baseurl", "bla", "--basepath", self.tmpdir, "--output", "rss", "--output", "html", "--adult", "sexyloser"])
def test_fetch_indexed(self):
run_with_options(["-n", "2", "-b", self.tmpdir, "calvinandhobbes:2012/02/02"])