0001# Copyright (c) 2005, the Lawrence Journal-World
0002# All rights reserved.
0003#
0004# Redistribution and use in source and binary forms, with or without modification,
0005# are permitted provided that the following conditions are met:
0006#
0007# 1. Redistributions of source code must retain the above copyright notice,
0008# this list of conditions and the following disclaimer.
0009#
0010# 2. Redistributions in binary form must reproduce the above copyright
0011# notice, this list of conditions and the following disclaimer in the
0012# documentation and/or other materials provided with the distribution.
0013#
0014# 3. Neither the name of Django nor the names of its contributors may be used
0015# to endorse or promote products derived from this software without
0016# specific prior written permission.
0017#
0018# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0019# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0020# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0021# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
0022# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0023# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0024# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
0025# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0026# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0027# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0028
0029# LAST SYNCED WITH DJANGO SOURCE - JULY 12th, 2006 - DJANGO REVISION 3143
0030# http://code.djangoproject.com/log/django/trunk/django/utils/feedgenerator.py
0031"""Syndication feed generation library -- used for generating RSS, etc.
0032
0033Sample usage:
0034
0035>>> feed = feedgenerator.Rss201rev2Feed(
0036... title=u"Poynter E-Media Tidbits",
0037... link=u"http://www.poynter.org/column.asp?id=31",
0038... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
0039... language=u"en",
0040... )
0041>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.")
0042>>> fp = open('test.rss', 'w')
0043>>> feed.write(fp, 'utf-8')
0044>>> fp.close()
0045
0046For definitions of the different versions of RSS, see:
0047http://diveintomark.org/archives/2004/02/04/incompatible-rss
0048
0049"""
0050
0051from util import SimplerXMLGenerator
0052import datetime, re, time
0053import email.Utils
0054from xml.dom import minidom
0055from xml.parsers.expat import ExpatError
0056
0057def rfc2822_date(date):
0058 return email.Utils.formatdate(time.mktime(date.timetuple()))
0059
0060def rfc3339_date(date):
0061 return date.strftime('%Y-%m-%dT%H:%M:%SZ')
0062
0063def get_tag_uri(url, date):
0064 """Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"""
0065 tag = re.sub('^http://', '', url)
0066 if date is not None:
0067 tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
0068 tag = re.sub('#', '/', tag)
0069 return 'tag:' + tag
0070
0071class SyndicationFeed(object):
0072 """Base class for all syndication feeds. Subclasses should provide write()"""
0073 def __init__(self, title, link, description, language=None, author_email=None,
0074 author_name=None, author_link=None, subtitle=None, categories=None,
0075 feed_url=None):
0076 self.feed = {
0077 'title': title,
0078 'link': link,
0079 'description': description,
0080 'language': language,
0081 'author_email': author_email,
0082 'author_name': author_name,
0083 'author_link': author_link,
0084 'subtitle': subtitle,
0085 'categories': categories or (),
0086 'feed_url': feed_url,
0087 }
0088 self.items = []
0089
0090 def add_item(self, title, link, description, author_email=None,
0091 author_name=None, author_link=None, pubdate=None, comments=None,
0092 unique_id=None, enclosure=None, categories=()):
0093 """Adds an item to the feed.
0094
0095 All args are expected to be Python Unicode
0096 objects except pubdate, which is a datetime.datetime object, and
0097 enclosure, which is an instance of the Enclosure class.
0098
0099 """
0100 self.items.append({
0101 'title': title,
0102 'link': link,
0103 'description': description,
0104 'author_email': author_email,
0105 'author_name': author_name,
0106 'author_link': author_link,
0107 'pubdate': pubdate,
0108 'comments': comments,
0109 'unique_id': unique_id,
0110 'enclosure': enclosure,
0111 'categories': categories or (),
0112 })
0113
0114 def num_items(self):
0115 return len(self.items)
0116
0117 def write(self, outfile, encoding):
0118 """Outputs the feed in the given encoding to outfile, which is a file-like
0119 object.
0120
0121 Subclasses should override this.
0122
0123 """
0124 raise NotImplementedError
0125
0126 def writeString(self, encoding):
0127 """Returns the feed in the given encoding as a string."""
0128 from StringIO import StringIO
0129 s = StringIO()
0130 self.write(s, encoding)
0131 return s.getvalue()
0132
0133 def latest_post_date(self):
0134 """Returns the latest item's pubdate.
0135
0136 If none of them have a pubdate, this returns the current date/time.
0137
0138 """
0139 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
0140 if len(updates) > 0:
0141 updates.sort()
0142 return updates[-1]
0143 else:
0144 return datetime.datetime.now()
0145
0146class Enclosure(object):
0147 """Represents an RSS enclosure"""
0148 def __init__(self, url, length, mime_type):
0149 "All args are expected to be Python Unicode objects"
0150 self.url, self.length, self.mime_type = url, length, mime_type
0151
0152class RssFeed(SyndicationFeed):
0153 mime_type = 'application/rss+xml'
0154 def write(self, outfile, encoding):
0155 handler = SimplerXMLGenerator(outfile, encoding)
0156 handler.startDocument()
0157 handler.startElement(u"rss", {u"version": self._version})
0158 handler.startElement(u"channel", {})
0159 handler.addQuickElement(u"title", self.feed['title'])
0160 handler.addQuickElement(u"link", self.feed['link'])
0161 handler.addQuickElement(u"description", self.feed['description'])
0162 if self.feed['language'] is not None:
0163 handler.addQuickElement(u"language", self.feed['language'])
0164 for cat in self.feed['categories']:
0165 handler.addQuickElement(u"category", cat)
0166 self.write_items(handler)
0167 self.endChannelElement(handler)
0168 handler.endElement(u"rss")
0169
0170 def endChannelElement(self, handler):
0171 handler.endElement(u"channel")
0172
0173class RssUserland091Feed(RssFeed):
0174 _version = u"0.91"
0175 def write_items(self, handler):
0176 for item in self.items:
0177 handler.startElement(u"item", {})
0178 handler.addQuickElement(u"title", item['title'])
0179 handler.addQuickElement(u"link", item['link'])
0180 if item['description'] is not None:
0181 handler.addQuickElement(u"description", item['description'])
0182 handler.endElement(u"item")
0183
0184class Rss201rev2Feed(RssFeed):
0185 # Spec: http://blogs.law.harvard.edu/tech/rss
0186 _version = u"2.0"
0187 def write_items(self, handler):
0188 for item in self.items:
0189 handler.startElement(u"item", {})
0190 handler.addQuickElement(u"title", item['title'])
0191 handler.addQuickElement(u"link", item['link'])
0192 if item['description'] is not None:
0193 handler.addQuickElement(u"description", item['description'])
0194
0195 # Author information.
0196 if item["author_name"] and item["author_email"]:
0197 handler.addQuickElement(u"author", "%s (%s)" % (item['author_email'], item['author_name']))
0199 elif item["author_email"]:
0200 handler.addQuickElement(u"author", item["author_email"])
0201
0202 if item['pubdate'] is not None:
0203 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii'))
0204 if item['comments'] is not None:
0205 handler.addQuickElement(u"comments", item['comments'])
0206 if item['unique_id'] is not None:
0207 handler.addQuickElement(u"guid", item['unique_id'])
0208
0209 # Enclosure.
0210 if item['enclosure'] is not None:
0211 handler.addQuickElement(u"enclosure", '',
0212 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
0213 u"type": item['enclosure'].mime_type})
0214
0215 # Categories.
0216 for cat in item['categories']:
0217 handler.addQuickElement(u"category", cat)
0218
0219 handler.endElement(u"item")
0220
0221class Atom1Feed(SyndicationFeed):
0222 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
0223 mime_type = 'application/atom+xml'
0224 ns = u"http://www.w3.org/2005/Atom"
0225 def write(self, outfile, encoding):
0226 handler = SimplerXMLGenerator(outfile, encoding)
0227 handler.startDocument()
0228 if self.feed['language'] is not None:
0229 handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']})
0230 else:
0231 handler.startElement(u"feed", {u"xmlns": self.ns})
0232 handler.addQuickElement(u"title", self.feed['title'])
0233 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
0234 if self.feed['feed_url'] is not None:
0235 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
0236 handler.addQuickElement(u"id", self.feed['link'])
0237 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii'))
0238 if self.feed['author_name'] is not None:
0239 handler.startElement(u"author", {})
0240 handler.addQuickElement(u"name", self.feed['author_name'])
0241 if self.feed['author_email'] is not None:
0242 handler.addQuickElement(u"email", self.feed['author_email'])
0243 if self.feed['author_link'] is not None:
0244 handler.addQuickElement(u"uri", self.feed['author_link'])
0245 handler.endElement(u"author")
0246 if self.feed['subtitle'] is not None:
0247 handler.addQuickElement(u"subtitle", self.feed['subtitle'])
0248 for cat in self.feed['categories']:
0249 handler.addQuickElement(u"category", "", {u"term": cat})
0250 self.write_items(handler)
0251 handler.endElement(u"feed")
0252
0253 def write_items(self, handler):
0254 for item in self.items:
0255 handler.startElement(u"entry", {})
0256 handler.addQuickElement(u"title", item['title'])
0257 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
0258 if item['pubdate'] is not None:
0259 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii'))
0260
0261 # Author information.
0262 if item['author_name'] is not None:
0263 handler.startElement(u"author", {})
0264 handler.addQuickElement(u"name", item['author_name'])
0265 if item['author_email'] is not None:
0266 handler.addQuickElement(u"email", item['author_email'])
0267 if item['author_link'] is not None:
0268 handler.addQuickElement(u"uri", item['author_link'])
0269 handler.endElement(u"author")
0270
0271 # Unique ID.
0272 if item['unique_id'] is not None:
0273 unique_id = item['unique_id']
0274 else:
0275 unique_id = get_tag_uri(item['link'], item['pubdate'])
0276 handler.addQuickElement(u"id", unique_id)
0277
0278 # Summary.
0279 if item['description'] is not None:
0280 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
0281
0282 # Enclosure.
0283 if item['enclosure'] is not None:
0284 handler.addQuickElement(u"link", '',
0285 {u"rel": u"enclosure",
0286 u"href": item['enclosure'].url,
0287 u"length": item['enclosure'].length,
0288 u"type": item['enclosure'].mime_type})
0289
0290 # Categories:
0291 for cat in item['categories']:
0292 handler.addQuickElement(u"category", u"", {u"term": cat})
0293
0294 handler.endElement(u"entry")
0295
0296# This isolates the decision of what the system default is, so calling code can
0297# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
0298DefaultFeed = Rss201rev2Feed