dosage/dosagelib/rss.py

90 lines
3.1 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2012-06-20 19:58:13 +00:00
# TODO: Not sure if this RSS output is "valid", should be though.
# Might also be nice categorise Comics under one Item
import xml.dom.minidom
import time
class Feed(object):
2012-12-12 16:41:29 +00:00
"""Write an RSS feed with comic strip images."""
2012-06-20 19:58:13 +00:00
def __init__(self, title, link, description, lang='en-us'):
2012-12-12 16:41:29 +00:00
"""Initialize RSS writer with given title, link and description."""
2012-06-20 19:58:13 +00:00
self.rss = xml.dom.minidom.Document()
rss_root = self.rss.appendChild(self.rss.createElement('rss'))
rss_root.setAttribute('version', '2.0')
self.channel = rss_root.appendChild(self.rss.createElement('channel'))
self.addElement(self.channel, 'title', title)
self.addElement(self.channel, 'link', link)
self.addElement(self.channel, 'language', lang)
self.addElement(self.channel, 'description', description)
def addElement(self, parent, tag, value):
2012-12-12 16:41:29 +00:00
"""Add an RSS item."""
2012-06-20 19:58:13 +00:00
return parent.appendChild(self.rss.createElement(tag)).appendChild(self.rss.createTextNode(value))
def insertHead(self, title, link, description, date):
2012-12-12 16:41:29 +00:00
"""Insert an item head."""
2012-06-20 19:58:13 +00:00
item = self.rss.createElement('item')
self.addElement(item, 'title', title)
self.addElement(item, 'link', link)
self.addElement(item, 'description', description)
self.addElement(item, 'pubDate', date)
elems = self.rss.getElementsByTagName('item')
if elems:
self.channel.insertBefore(item, elems[0])
else:
self.channel.appendChild(item)
def addItem(self, title, link, description, date):
2012-12-12 16:41:29 +00:00
"""Insert an item."""
2012-06-20 19:58:13 +00:00
item = self.rss.createElement('item')
self.addElement(item, 'title', title)
self.addElement(item, 'link', link)
self.addElement(item, 'description', description)
self.addElement(item, 'pubDate', date)
self.channel.appendChild(item)
def write(self, path):
2012-12-12 16:41:29 +00:00
"""Write RSS content to file."""
2012-06-20 19:58:13 +00:00
file = open(path, 'w')
file.write(self.getXML())
file.close()
def getXML(self):
2012-12-12 16:41:29 +00:00
"""Get RSS content in XML format."""
2012-06-20 19:58:13 +00:00
return self.rss.toxml()
2012-12-12 16:41:29 +00:00
2012-06-20 19:58:13 +00:00
def parseFeed(filename, yesterday):
2012-12-12 16:41:29 +00:00
"""Parse an RSS feed and filter only entries that are newer than yesterday."""
2012-06-20 19:58:13 +00:00
dom = xml.dom.minidom.parse(filename)
getText = lambda node, tag: node.getElementsByTagName(tag)[0].childNodes[0].data
getNode = lambda tag: dom.getElementsByTagName(tag)
content = getNode('channel')[0] # Only one channel node
feedTitle = getText(content, 'title')
feedLink = getText(content, 'link')
feedDesc = getText(content, 'description')
feed = Feed(feedTitle, feedLink, feedDesc)
for item in getNode('item'):
itemDate = time.strptime(getText(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S GMT')
if (itemDate > yesterday): # If newer than yesterday
feed.addItem(getText(item, 'title'),
getText(item, 'link'),
getText(item, 'description'),
getText(item, 'pubDate'))
return feed