0001# Copyright (c) 2005, the Lawrence Journal-World
0002# All rights reserved.
0003
0004# Redistribution and use in source and binary forms, with or without modification,
0005# are permitted provided that the following conditions are met:
0006
0007#     1. Redistributions of source code must retain the above copyright notice, 
0008#        this list of conditions and the following disclaimer.
0009#     
0010#     2. Redistributions in binary form must reproduce the above copyright 
0011#        notice, this list of conditions and the following disclaimer in the
0012#        documentation and/or other materials provided with the distribution.
0013
0014#     3. Neither the name of Django nor the names of its contributors may be used
0015#        to endorse or promote products derived from this software without
0016#        specific prior written permission.
0017
0018# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0019# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0020# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0021# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
0022# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0023# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0024# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
0025# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0026# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0027# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0028
0029# LAST SYNCED WITH DJANGO SOURCE - JULY 12th, 2006 - DJANGO REVISION 3143
0030# http://code.djangoproject.com/log/django/trunk/django/utils/feedgenerator.py
0031"""Syndication feed generation library -- used for generating RSS, etc.
0032
0033Sample usage:
0034
0035>>> feed = feedgenerator.Rss201rev2Feed(
0036...     title=u"Poynter E-Media Tidbits",
0037...     link=u"http://www.poynter.org/column.asp?id=31",
0038...     description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
0039...     language=u"en",
0040... )
0041>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.")
0042>>> fp = open('test.rss', 'w')
0043>>> feed.write(fp, 'utf-8')
0044>>> fp.close()
0045
0046For definitions of the different versions of RSS, see:
0047http://diveintomark.org/archives/2004/02/04/incompatible-rss
0048
0049"""
0050
0051from util import SimplerXMLGenerator
0052import datetime, re, time
0053import email.Utils
0054from xml.dom import minidom
0055from xml.parsers.expat import ExpatError
0056
0057def rfc2822_date(date):
0058    return email.Utils.formatdate(time.mktime(date.timetuple()))
0059
0060def rfc3339_date(date):
0061    return date.strftime('%Y-%m-%dT%H:%M:%SZ')
0062
0063def get_tag_uri(url, date):
0064    """Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"""
0065    tag = re.sub('^http://', '', url)
0066    if date is not None:
0067        tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
0068    tag = re.sub('#', '/', tag)
0069    return 'tag:' + tag
0070
0071class SyndicationFeed(object):
0072    """Base class for all syndication feeds. Subclasses should provide write()"""
0073    def __init__(self, title, link, description, language=None, author_email=None,
0074            author_name=None, author_link=None, subtitle=None, categories=None,
0075            feed_url=None):
0076        self.feed = {
0077            'title': title,
0078            'link': link,
0079            'description': description,
0080            'language': language,
0081            'author_email': author_email,
0082            'author_name': author_name,
0083            'author_link': author_link,
0084            'subtitle': subtitle,
0085            'categories': categories or (),
0086            'feed_url': feed_url,
0087        }
0088        self.items = []
0089
0090    def add_item(self, title, link, description, author_email=None,
0091        author_name=None, author_link=None, pubdate=None, comments=None,
0092        unique_id=None, enclosure=None, categories=()):
0093        """Adds an item to the feed. 
0094        
0095        All args are expected to be Python Unicode
0096        objects except pubdate, which is a datetime.datetime object, and
0097        enclosure, which is an instance of the Enclosure class.
0098        
0099        """
0100        self.items.append({
0101            'title': title,
0102            'link': link,
0103            'description': description,
0104            'author_email': author_email,
0105            'author_name': author_name,
0106            'author_link': author_link,
0107            'pubdate': pubdate,
0108            'comments': comments,
0109            'unique_id': unique_id,
0110            'enclosure': enclosure,
0111            'categories': categories or (),
0112        })
0113
0114    def num_items(self):
0115        return len(self.items)
0116
0117    def write(self, outfile, encoding):
0118        """Outputs the feed in the given encoding to outfile, which is a file-like
0119        object. 
0120        
0121        Subclasses should override this.
0122        
0123        """
0124        raise NotImplementedError
0125
0126    def writeString(self, encoding):
0127        """Returns the feed in the given encoding as a string."""
0128        from StringIO import StringIO
0129        s = StringIO()
0130        self.write(s, encoding)
0131        return s.getvalue()
0132
0133    def latest_post_date(self):
0134        """Returns the latest item's pubdate. 
0135        
0136        If none of them have a pubdate, this returns the current date/time.
0137        
0138        """
0139        updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
0140        if len(updates) > 0:
0141            updates.sort()
0142            return updates[-1]
0143        else:
0144            return datetime.datetime.now()
0145
0146class Enclosure(object):
0147    """Represents an RSS enclosure"""
0148    def __init__(self, url, length, mime_type):
0149        "All args are expected to be Python Unicode objects"
0150        self.url, self.length, self.mime_type = url, length, mime_type
0151
0152class RssFeed(SyndicationFeed):
0153    mime_type = 'application/rss+xml'
0154    def write(self, outfile, encoding):
0155        handler = SimplerXMLGenerator(outfile, encoding)
0156        handler.startDocument()
0157        handler.startElement(u"rss", {u"version": self._version})
0158        handler.startElement(u"channel", {})
0159        handler.addQuickElement(u"title", self.feed['title'])
0160        handler.addQuickElement(u"link", self.feed['link'])
0161        handler.addQuickElement(u"description", self.feed['description'])
0162        if self.feed['language'] is not None:
0163            handler.addQuickElement(u"language", self.feed['language'])
0164        for cat in self.feed['categories']:
0165            handler.addQuickElement(u"category", cat)
0166        self.write_items(handler)
0167        self.endChannelElement(handler)
0168        handler.endElement(u"rss")
0169
0170    def endChannelElement(self, handler):
0171        handler.endElement(u"channel")
0172
0173class RssUserland091Feed(RssFeed):
0174    _version = u"0.91"
0175    def write_items(self, handler):
0176        for item in self.items:
0177            handler.startElement(u"item", {})
0178            handler.addQuickElement(u"title", item['title'])
0179            handler.addQuickElement(u"link", item['link'])
0180            if item['description'] is not None:
0181                handler.addQuickElement(u"description", item['description'])
0182            handler.endElement(u"item")
0183
0184class Rss201rev2Feed(RssFeed):
0185    # Spec: http://blogs.law.harvard.edu/tech/rss
0186    _version = u"2.0"
0187    def write_items(self, handler):
0188        for item in self.items:
0189            handler.startElement(u"item", {})
0190            handler.addQuickElement(u"title", item['title'])
0191            handler.addQuickElement(u"link", item['link'])
0192            if item['description'] is not None:
0193                handler.addQuickElement(u"description", item['description'])
0194
0195            # Author information.
0196            if item["author_name"] and item["author_email"]:
0197                handler.addQuickElement(u"author", "%s (%s)" %                       (item['author_email'], item['author_name']))
0199            elif item["author_email"]:
0200                handler.addQuickElement(u"author", item["author_email"])
0201
0202            if item['pubdate'] is not None:
0203                handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii'))
0204            if item['comments'] is not None:
0205                handler.addQuickElement(u"comments", item['comments'])
0206            if item['unique_id'] is not None:
0207                handler.addQuickElement(u"guid", item['unique_id'])
0208
0209            # Enclosure.
0210            if item['enclosure'] is not None:
0211                handler.addQuickElement(u"enclosure", '',
0212                    {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
0213                        u"type": item['enclosure'].mime_type})
0214
0215            # Categories.
0216            for cat in item['categories']:
0217                handler.addQuickElement(u"category", cat)
0218
0219            handler.endElement(u"item")
0220
0221class Atom1Feed(SyndicationFeed):
0222    # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
0223    mime_type = 'application/atom+xml'
0224    ns = u"http://www.w3.org/2005/Atom"
0225    def write(self, outfile, encoding):
0226        handler = SimplerXMLGenerator(outfile, encoding)
0227        handler.startDocument()
0228        if self.feed['language'] is not None:
0229            handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']})
0230        else:
0231            handler.startElement(u"feed", {u"xmlns": self.ns})
0232        handler.addQuickElement(u"title", self.feed['title'])
0233        handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
0234        if self.feed['feed_url'] is not None:
0235            handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
0236        handler.addQuickElement(u"id", self.feed['link'])
0237        handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii'))
0238        if self.feed['author_name'] is not None:
0239            handler.startElement(u"author", {})
0240            handler.addQuickElement(u"name", self.feed['author_name'])
0241            if self.feed['author_email'] is not None:
0242                handler.addQuickElement(u"email", self.feed['author_email'])
0243            if self.feed['author_link'] is not None:
0244                handler.addQuickElement(u"uri", self.feed['author_link'])
0245            handler.endElement(u"author")
0246        if self.feed['subtitle'] is not None:
0247            handler.addQuickElement(u"subtitle", self.feed['subtitle'])
0248        for cat in self.feed['categories']:
0249            handler.addQuickElement(u"category", "", {u"term": cat})
0250        self.write_items(handler)
0251        handler.endElement(u"feed")
0252
0253    def write_items(self, handler):
0254        for item in self.items:
0255            handler.startElement(u"entry", {})
0256            handler.addQuickElement(u"title", item['title'])
0257            handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
0258            if item['pubdate'] is not None:
0259                handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii'))
0260
0261            # Author information.
0262            if item['author_name'] is not None:
0263                handler.startElement(u"author", {})
0264                handler.addQuickElement(u"name", item['author_name'])
0265                if item['author_email'] is not None:
0266                    handler.addQuickElement(u"email", item['author_email'])
0267                if item['author_link'] is not None:
0268                    handler.addQuickElement(u"uri", item['author_link'])
0269                handler.endElement(u"author")
0270
0271            # Unique ID.
0272            if item['unique_id'] is not None:
0273                unique_id = item['unique_id']
0274            else:
0275                unique_id = get_tag_uri(item['link'], item['pubdate'])
0276            handler.addQuickElement(u"id", unique_id)
0277
0278            # Summary.
0279            if item['description'] is not None:
0280                handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
0281
0282            # Enclosure.
0283            if item['enclosure'] is not None:
0284                handler.addQuickElement(u"link", '',
0285                    {u"rel": u"enclosure",
0286                     u"href": item['enclosure'].url,
0287                     u"length": item['enclosure'].length,
0288                     u"type": item['enclosure'].mime_type})
0289
0290            # Categories:
0291            for cat in item['categories']:
0292                handler.addQuickElement(u"category", u"", {u"term": cat})
0293
0294            handler.endElement(u"entry")
0295
0296# This isolates the decision of what the system default is, so calling code can
0297# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
0298DefaultFeed = Rss201rev2Feed