0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031"""Syndication feed generation library -- used for generating RSS, etc.
0032
0033Sample usage:
0034
0035>>> feed = feedgenerator.Rss201rev2Feed(
0036... title=u"Poynter E-Media Tidbits",
0037... link=u"http://www.poynter.org/column.asp?id=31",
0038... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
0039... language=u"en",
0040... )
0041>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.")
0042>>> fp = open('test.rss', 'w')
0043>>> feed.write(fp, 'utf-8')
0044>>> fp.close()
0045
0046For definitions of the different versions of RSS, see:
0047http://diveintomark.org/archives/2004/02/04/incompatible-rss
0048
0049"""
0050
0051from util import SimplerXMLGenerator
0052import datetime, re, time
0053import email.Utils
0054from xml.dom import minidom
0055from xml.parsers.expat import ExpatError
0056
0057def rfc2822_date(date):
0058 return email.Utils.formatdate(time.mktime(date.timetuple()))
0059
0060def rfc3339_date(date):
0061 return date.strftime('%Y-%m-%dT%H:%M:%SZ')
0062
0063def get_tag_uri(url, date):
0064 """Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"""
0065 tag = re.sub('^http://', '', url)
0066 if date is not None:
0067 tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
0068 tag = re.sub('#', '/', tag)
0069 return 'tag:' + tag
0070
0071class SyndicationFeed(object):
0072 """Base class for all syndication feeds. Subclasses should provide write()"""
0073 def __init__(self, title, link, description, language=None, author_email=None,
0074 author_name=None, author_link=None, subtitle=None, categories=None,
0075 feed_url=None):
0076 self.feed = {
0077 'title': title,
0078 'link': link,
0079 'description': description,
0080 'language': language,
0081 'author_email': author_email,
0082 'author_name': author_name,
0083 'author_link': author_link,
0084 'subtitle': subtitle,
0085 'categories': categories or (),
0086 'feed_url': feed_url,
0087 }
0088 self.items = []
0089
0090 def add_item(self, title, link, description, author_email=None,
0091 author_name=None, author_link=None, pubdate=None, comments=None,
0092 unique_id=None, enclosure=None, categories=()):
0093 """Adds an item to the feed.
0094
0095 All args are expected to be Python Unicode
0096 objects except pubdate, which is a datetime.datetime object, and
0097 enclosure, which is an instance of the Enclosure class.
0098
0099 """
0100 self.items.append({
0101 'title': title,
0102 'link': link,
0103 'description': description,
0104 'author_email': author_email,
0105 'author_name': author_name,
0106 'author_link': author_link,
0107 'pubdate': pubdate,
0108 'comments': comments,
0109 'unique_id': unique_id,
0110 'enclosure': enclosure,
0111 'categories': categories or (),
0112 })
0113
0114 def num_items(self):
0115 return len(self.items)
0116
0117 def write(self, outfile, encoding):
0118 """Outputs the feed in the given encoding to outfile, which is a file-like
0119 object.
0120
0121 Subclasses should override this.
0122
0123 """
0124 raise NotImplementedError
0125
0126 def writeString(self, encoding):
0127 """Returns the feed in the given encoding as a string."""
0128 from StringIO import StringIO
0129 s = StringIO()
0130 self.write(s, encoding)
0131 return s.getvalue()
0132
0133 def latest_post_date(self):
0134 """Returns the latest item's pubdate.
0135
0136 If none of them have a pubdate, this returns the current date/time.
0137
0138 """
0139 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
0140 if len(updates) > 0:
0141 updates.sort()
0142 return updates[-1]
0143 else:
0144 return datetime.datetime.now()
0145
0146class Enclosure(object):
0147 """Represents an RSS enclosure"""
0148 def __init__(self, url, length, mime_type):
0149 "All args are expected to be Python Unicode objects"
0150 self.url, self.length, self.mime_type = url, length, mime_type
0151
0152class RssFeed(SyndicationFeed):
0153 mime_type = 'application/rss+xml'
0154 def write(self, outfile, encoding):
0155 handler = SimplerXMLGenerator(outfile, encoding)
0156 handler.startDocument()
0157 handler.startElement(u"rss", {u"version": self._version})
0158 handler.startElement(u"channel", {})
0159 handler.addQuickElement(u"title", self.feed['title'])
0160 handler.addQuickElement(u"link", self.feed['link'])
0161 handler.addQuickElement(u"description", self.feed['description'])
0162 if self.feed['language'] is not None:
0163 handler.addQuickElement(u"language", self.feed['language'])
0164 for cat in self.feed['categories']:
0165 handler.addQuickElement(u"category", cat)
0166 self.write_items(handler)
0167 self.endChannelElement(handler)
0168 handler.endElement(u"rss")
0169
0170 def endChannelElement(self, handler):
0171 handler.endElement(u"channel")
0172
0173class RssUserland091Feed(RssFeed):
0174 _version = u"0.91"
0175 def write_items(self, handler):
0176 for item in self.items:
0177 handler.startElement(u"item", {})
0178 handler.addQuickElement(u"title", item['title'])
0179 handler.addQuickElement(u"link", item['link'])
0180 if item['description'] is not None:
0181 handler.addQuickElement(u"description", item['description'])
0182 handler.endElement(u"item")
0183
0184class Rss201rev2Feed(RssFeed):
0185
0186 _version = u"2.0"
0187 def write_items(self, handler):
0188 for item in self.items:
0189 handler.startElement(u"item", {})
0190 handler.addQuickElement(u"title", item['title'])
0191 handler.addQuickElement(u"link", item['link'])
0192 if item['description'] is not None:
0193 handler.addQuickElement(u"description", item['description'])
0194
0195
0196 if item["author_name"] and item["author_email"]:
0197 handler.addQuickElement(u"author", "%s (%s)" % (item['author_email'], item['author_name']))
0199 elif item["author_email"]:
0200 handler.addQuickElement(u"author", item["author_email"])
0201
0202 if item['pubdate'] is not None:
0203 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii'))
0204 if item['comments'] is not None:
0205 handler.addQuickElement(u"comments", item['comments'])
0206 if item['unique_id'] is not None:
0207 handler.addQuickElement(u"guid", item['unique_id'])
0208
0209
0210 if item['enclosure'] is not None:
0211 handler.addQuickElement(u"enclosure", '',
0212 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
0213 u"type": item['enclosure'].mime_type})
0214
0215
0216 for cat in item['categories']:
0217 handler.addQuickElement(u"category", cat)
0218
0219 handler.endElement(u"item")
0220
0221class Atom1Feed(SyndicationFeed):
0222
0223 mime_type = 'application/atom+xml'
0224 ns = u"http://www.w3.org/2005/Atom"
0225 def write(self, outfile, encoding):
0226 handler = SimplerXMLGenerator(outfile, encoding)
0227 handler.startDocument()
0228 if self.feed['language'] is not None:
0229 handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']})
0230 else:
0231 handler.startElement(u"feed", {u"xmlns": self.ns})
0232 handler.addQuickElement(u"title", self.feed['title'])
0233 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
0234 if self.feed['feed_url'] is not None:
0235 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
0236 handler.addQuickElement(u"id", self.feed['link'])
0237 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii'))
0238 if self.feed['author_name'] is not None:
0239 handler.startElement(u"author", {})
0240 handler.addQuickElement(u"name", self.feed['author_name'])
0241 if self.feed['author_email'] is not None:
0242 handler.addQuickElement(u"email", self.feed['author_email'])
0243 if self.feed['author_link'] is not None:
0244 handler.addQuickElement(u"uri", self.feed['author_link'])
0245 handler.endElement(u"author")
0246 if self.feed['subtitle'] is not None:
0247 handler.addQuickElement(u"subtitle", self.feed['subtitle'])
0248 for cat in self.feed['categories']:
0249 handler.addQuickElement(u"category", "", {u"term": cat})
0250 self.write_items(handler)
0251 handler.endElement(u"feed")
0252
0253 def write_items(self, handler):
0254 for item in self.items:
0255 handler.startElement(u"entry", {})
0256 handler.addQuickElement(u"title", item['title'])
0257 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
0258 if item['pubdate'] is not None:
0259 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii'))
0260
0261
0262 if item['author_name'] is not None:
0263 handler.startElement(u"author", {})
0264 handler.addQuickElement(u"name", item['author_name'])
0265 if item['author_email'] is not None:
0266 handler.addQuickElement(u"email", item['author_email'])
0267 if item['author_link'] is not None:
0268 handler.addQuickElement(u"uri", item['author_link'])
0269 handler.endElement(u"author")
0270
0271
0272 if item['unique_id'] is not None:
0273 unique_id = item['unique_id']
0274 else:
0275 unique_id = get_tag_uri(item['link'], item['pubdate'])
0276 handler.addQuickElement(u"id", unique_id)
0277
0278
0279 if item['description'] is not None:
0280 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
0281
0282
0283 if item['enclosure'] is not None:
0284 handler.addQuickElement(u"link", '',
0285 {u"rel": u"enclosure",
0286 u"href": item['enclosure'].url,
0287 u"length": item['enclosure'].length,
0288 u"type": item['enclosure'].mime_type})
0289
0290
0291 for cat in item['categories']:
0292 handler.addQuickElement(u"category", u"", {u"term": cat})
0293
0294 handler.endElement(u"entry")
0295
0296
0297
0298DefaultFeed = Rss201rev2Feed