Allow multiple event output and improve HTML output.

This commit is contained in:
Bastian Kleineidam 2013-03-11 17:33:59 +01:00
parent fb466833db
commit 7ee73caf3c
7 changed files with 71 additions and 25 deletions

View file

@ -7,7 +7,8 @@ Features:
Changes: Changes:
- cmdline: Comic lists are displayed one page at a time. - cmdline: Comic lists are displayed one page at a time.
- events: HTML output embeds the images in the page. - output: HTML output embeds the images in the page and show the page URLs.
- output: The --output option can be given multiple times.
Fixes: Fixes:
- cmdline: Catch error when piping output to another - cmdline: Catch error when piping output to another

View file

@ -15,7 +15,7 @@ for updating and maintaining collections.
Specifies a base path to put comic subdirectories. The default is \fBComics\fP. Specifies a base path to put comic subdirectories. The default is \fBComics\fP.
.TP .TP
\fB\-\-baseurl=\fP\fIPATH\fP \fB\-\-baseurl=\fP\fIPATH\fP
Specifies the base URL for output events. The default is a local file URI. Specifies the base URL for output handlers. The default is a local file URI.
.TP .TP
\fB\-a\fP, \fB\-\-all\fP \fB\-a\fP, \fB\-\-all\fP
Traverses all available strips backwards from the current one. Traverses all available strips backwards from the current one.
@ -61,6 +61,7 @@ current run, named by date (ala dailystrips). The files can be found in the
Writes out an RSS feed detailing what strips were downloaded in the last 24 Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in \fBComics/dailydose.xml\fP. hours. The feed can be found in \fBComics/dailydose.xml\fP.
.RE .RE
This option can be given multiple times.
.TP .TP
\fB\-t\fP, \fB\-\-timestamps\fP \fB\-t\fP, \fB\-\-timestamps\fP
Print timestamps for all output at any level. Print timestamps for all output at any level.

View file

@ -30,7 +30,7 @@ for updating and maintaining collections.
<DT><B>-b</B> <I>PATH</I>, <B>--basepath=</B><I>PATH</I><DD> <DT><B>-b</B> <I>PATH</I>, <B>--basepath=</B><I>PATH</I><DD>
Specifies a base path to put comic subdirectories. The default is <B>Comics</B>. Specifies a base path to put comic subdirectories. The default is <B>Comics</B>.
<DT><B>--baseurl=</B><I>PATH</I><DD> <DT><B>--baseurl=</B><I>PATH</I><DD>
Specifies the base URL for output events. The default is a local file URI. Specifies the base URL for output handlers. The default is a local file URI.
<DT><B>-a</B>, <B>--all</B><DD> <DT><B>-a</B>, <B>--all</B><DD>
Traverses all available strips backwards from the current one. Traverses all available strips backwards from the current one.
This can be useful you want a full collection of a new comic strip, This can be useful you want a full collection of a new comic strip,
@ -77,6 +77,7 @@ Writes out an RSS feed detailing what strips were downloaded in the last 24
hours. The feed can be found in <B>Comics/dailydose.xml</B>. hours. The feed can be found in <B>Comics/dailydose.xml</B>.
</DL> </DL>
This option can be given multiple times.
<DL COMPACT> <DL COMPACT>
<DT><B>-t</B>, <B>--timestamps</B><DD> <DT><B>-t</B>, <B>--timestamps</B><DD>
Print timestamps for all output at any level. Print timestamps for all output at any level.

View file

@ -20,8 +20,8 @@ OPTIONS
default is Comics. default is Comics.
--baseurl=PATH --baseurl=PATH
Specifies the base URL for output events. The default is Specifies the base URL for output handlers. The default
a local file URI. is a local file URI.
-a, --all -a, --all
Traverses all available strips backwards from the cur Traverses all available strips backwards from the cur
@ -61,6 +61,7 @@ OPTIONS
rss - Writes out an RSS feed detailing what strips were rss - Writes out an RSS feed detailing what strips were
downloaded in the last 24 hours. The feed can be found downloaded in the last 24 hours. The feed can be found
in Comics/dailydose.xml. in Comics/dailydose.xml.
This option can be given multiple times.
-t, --timestamps -t, --timestamps
Print timestamps for all output at any level. Print timestamps for all output at any level.

7
dosage
View file

@ -82,7 +82,7 @@ def setupOptions():
parser.add_argument('--version', action='store_true', help='display the version number') parser.add_argument('--version', action='store_true', help='display the version number')
parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules') parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules')
parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level') parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level')
parser.add_argument('-o', '--output', action='store', choices=events.getHandlers(), help='output formatting for downloaded comics') parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics')
parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content') parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content')
parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS) parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)') parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)')
@ -150,8 +150,9 @@ def displayComicHelp(scraperobj):
def getComics(options): def getComics(options):
"""Retrieve comics.""" """Retrieve comics."""
errors = 0 errors = 0
if options.output: if options.handler:
events.installHandler(options.output, options.basepath, options.baseurl) for name in options.handler:
events.addHandler(name, options.basepath, options.baseurl)
events.getHandler().start() events.getHandler().start()
try: try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch): for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):

View file

@ -46,6 +46,8 @@ class EventHandler(object):
class RSSEventHandler(EventHandler): class RSSEventHandler(EventHandler):
"""Output in RSS format.""" """Output in RSS format."""
name = 'rss'
def getFilename(self): def getFilename(self):
"""Return RSS filename.""" """Return RSS filename."""
return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss')) return os.path.abspath(os.path.join(self.basepath, 'dailydose.rss'))
@ -95,6 +97,8 @@ class RSSEventHandler(EventHandler):
class HtmlEventHandler(EventHandler): class HtmlEventHandler(EventHandler):
"""Output in HTML format.""" """Output in HTML format."""
name = 'html'
def fnFromDate(self, date): def fnFromDate(self, date):
"""Get filename from date.""" """Get filename from date."""
fn = time.strftime('comics-%Y%m%d.html', date) fn = time.strftime('comics-%Y%m%d.html', date)
@ -135,8 +139,10 @@ class HtmlEventHandler(EventHandler):
<ul> <ul>
''' % (configuration.App, time.strftime('%Y/%m/%d', today), ''' % (configuration.App, time.strftime('%Y/%m/%d', today),
yesterdayUrl, tomorrowUrl)) yesterdayUrl, tomorrowUrl))
# last comic name (eg. CalvinAndHobbes)
self.lastComic = None self.lastComic = None
# last comic strip URL (eg. http://example.com/page42)
self.lastUrl = None
def comicDownloaded(self, comic, filename): def comicDownloaded(self, comic, filename):
"""Write HTML entry for downloaded comic.""" """Write HTML entry for downloaded comic."""
@ -144,18 +150,25 @@ class HtmlEventHandler(EventHandler):
self.newComic(comic) self.newComic(comic)
imageUrl = self.getUrlFromFilename(filename) imageUrl = self.getUrlFromFilename(filename)
pageUrl = comic.referrer pageUrl = comic.referrer
self.html.write(u'<li><a href="%s"><img src="%s"/></a></li>\n' % (pageUrl, imageUrl)) if pageUrl != self.lastUrl:
self.html.write(u'<li><a href="%s">%s</a>\n' % (pageUrl, pageUrl))
self.html.write(u'<br/><img src="%s"/>\n' % imageUrl)
self.lastComic = comic.name
self.lastUrl = pageUrl
def newComic(self, comic): def newComic(self, comic):
"""Start new comic list in HTML.""" """Start new comic list in HTML."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
if self.lastComic is not None: if self.lastComic is not None:
self.html.write(u'</ul>\n') self.html.write(u'</ul>\n')
self.lastComic = comic.name
self.html.write(u'<li>%s</li>\n' % comic.name) self.html.write(u'<li>%s</li>\n' % comic.name)
self.html.write(u'<ul>\n') self.html.write(u'<ul>\n')
def end(self): def end(self):
"""End HTML output.""" """End HTML output."""
if self.lastUrl is not None:
self.html.write(u'</li>\n')
if self.lastComic is not None: if self.lastComic is not None:
self.html.write(u'</ul>\n') self.html.write(u'</ul>\n')
self.html.write(u'''</ul> self.html.write(u'''</ul>
@ -164,24 +177,52 @@ class HtmlEventHandler(EventHandler):
self.html.close() self.html.close()
handlers = { _handler_classes = {}
'html': HtmlEventHandler,
'rss': RSSEventHandler,
}
def getHandlers(): def addHandlerClass(clazz):
if not issubclass(clazz, EventHandler):
raise ValueError("%s must be subclassed from %s" % (clazz, EventHandler))
_handler_classes[clazz.name] = clazz
addHandlerClass(HtmlEventHandler)
addHandlerClass(RSSEventHandler)
def getHandlerNames():
"""Get sorted handler names.""" """Get sorted handler names."""
return sorted(handlers.keys()) return sorted(_handler_classes.keys())
_handler = EventHandler(".", None)
def installHandler(name, basepath=None, baseurl=None): _handlers = []
def addHandler(name, basepath=None, baseurl=None):
"""Install a global handler with given name.""" """Install a global handler with given name."""
global _handler
if basepath is None: if basepath is None:
basepath = '.' basepath = '.'
_handler = handlers[name](basepath, baseurl) _handlers.append(_handler_classes[name](basepath, baseurl))
class MultiHandler(object):
"""Encapsulate a list of handlers."""
def start(self):
"""Emit a start event. Should be overridden in subclass."""
for handler in _handlers:
handler.start()
def comicDownloaded(self, comic, filename):
"""Emit a comic downloaded event. Should be overridden in subclass."""
for handler in _handlers:
handler.comicDownloaded(comic, filename)
def end(self):
"""Emit an end event. Should be overridden in subclass."""
for handler in _handlers:
handler.end()
multihandler = MultiHandler()
def getHandler(): def getHandler():
"""Get installed event handler.""" """Get installed event handler."""
return _handler return multihandler

View file

@ -53,10 +53,10 @@ class TestDosage (unittest.TestCase):
self.assertRaises(OSError, run_with_options, ['--imadoofus']) self.assertRaises(OSError, run_with_options, ['--imadoofus'])
def test_fetch_html(self): def test_fetch_html(self):
run_with_options(["-n", "2", "-b", self.tmpdir, "-o", "html", "calvinandhobbes"]) run_with_options(["-n", "2", "-b", self.tmpdir, "-o", "html", "-o", "rss", "calvinandhobbes"])
def test_fetch_rss(self): def test_fetch_rss(self):
run_with_options(["--numstrips", "2", "--baseurl", "bla", "--basepath", self.tmpdir, "--output", "rss", "--adult", "sexyloser"]) run_with_options(["--numstrips", "2", "--baseurl", "bla", "--basepath", self.tmpdir, "--output", "rss", "--output", "html", "--adult", "sexyloser"])
def test_fetch_indexed(self): def test_fetch_indexed(self):
run_with_options(["-n", "2", "-b", self.tmpdir, "calvinandhobbes:2012/02/02"]) run_with_options(["-n", "2", "-b", self.tmpdir, "calvinandhobbes:2012/02/02"])