0001# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
0002# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
0003# (c) 2005 Ian Bicking, Clark C. Evans and contributors
0004# This module is part of the Python Paste Project and is released under
0005# the MIT License: http://www.opensource.org/licenses/mit-license.php
0006"""
0007This module handles sending static content such as in-memory data or
0008files. At this time it has cache helpers and understands the
0009if-modified-since request header.
0010"""
0011
0012import os, time, mimetypes, zipfile, tarfile
0013from paste.httpexceptions import *
0014from paste.httpheaders import *
0015
0016CACHE_SIZE = 4096
0017BLOCK_SIZE = 4096 * 16
0018
0019__all__ = ['DataApp', 'FileApp', 'ArchiveStore']
0020
0021class DataApp(object):
0022 """
0023 Returns an application that will send content in a single chunk,
0024 this application has support for setting cache-control and for
0025 responding to conditional (or HEAD) requests.
0026
0027 Constructor Arguments:
0028
0029 ``content`` the content being sent to the client
0030
0031 ``headers`` the headers to send /w the response
0032
0033 The remaining ``kwargs`` correspond to headers, where the
0034 underscore is replaced with a dash. These values are only
0035 added to the headers if they are not already provided; thus,
0036 they can be used for default values. Examples include, but
0037 are not limited to:
0038
0039 ``content_type``
0040 ``content_encoding``
0041 ``content_location``
0042
0043 ``cache_control()``
0044
0045 This method provides validated construction of the ``Cache-Control``
0046 header as well as providing for automated filling out of the
0047 ``EXPIRES`` header for HTTP/1.0 clients.
0048
0049 ``set_content()``
0050
0051 This method provides a mechanism to set the content after the
0052 application has been constructed. This method does things
0053 like changing ``Last-Modified`` and ``Content-Length`` headers.
0054
0055 """
0056
0057 allowed_methods = ('GET', 'HEAD')
0058
0059 def __init__(self, content, headers=None, allowed_methods=None,
0060 **kwargs):
0061 assert isinstance(headers, (type(None), list))
0062 self.expires = None
0063 self.content = None
0064 self.content_length = None
0065 self.last_modified = 0
0066 if allowed_methods is not None:
0067 self.allowed_methods = allowed_methods
0068 self.headers = headers or []
0069 for (k, v) in kwargs.items():
0070 header = get_header(k)
0071 header.update(self.headers, v)
0072 ACCEPT_RANGES.update(self.headers, bytes=True)
0073 if not CONTENT_TYPE(self.headers):
0074 CONTENT_TYPE.update(self.headers)
0075 if content is not None:
0076 self.set_content(content)
0077
0078 def cache_control(self, **kwargs):
0079 self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
0080 return self
0081
0082 def set_content(self, content, last_modified=None):
0083 assert content is not None
0084 if last_modified is None:
0085 self.last_modified = time.time()
0086 else:
0087 self.last_modified = last_modified
0088 self.content = content
0089 self.content_length = len(content)
0090 LAST_MODIFIED.update(self.headers, time=self.last_modified)
0091 return self
0092
0093 def content_disposition(self, **kwargs):
0094 CONTENT_DISPOSITION.apply(self.headers, **kwargs)
0095 return self
0096
0097 def __call__(self, environ, start_response):
0098 method = environ['REQUEST_METHOD'].upper()
0099 if method not in self.allowed_methods:
0100 exc = HTTPMethodNotAllowed(
0101 'You cannot %s a file' % method,
0102 headers=[('Allow', ','.join(self.allowed_methods))])
0103 return exc(environ, start_response)
0104 return self.get(environ, start_response)
0105
0106 def calculate_etag(self):
0107 return str(self.last_modified) + '-' + str(self.content_length)
0108
0109 def get(self, environ, start_response):
0110 headers = self.headers[:]
0111 current_etag = self.calculate_etag()
0112 ETAG.update(headers, current_etag)
0113 if self.expires is not None:
0114 EXPIRES.update(headers, delta=self.expires)
0115
0116 try:
0117 client_etags = IF_NONE_MATCH.parse(environ)
0118 if client_etags:
0119 for etag in client_etags:
0120 if etag == current_etag or etag == '*':
0121 # horribly inefficient, n^2 performance, yuck!
0122 for head in list_headers(entity=True):
0123 head.delete(headers)
0124 start_response('304 Not Modified', headers)
0125 return ['']
0126 except HTTPBadRequest, exce:
0127 return exce.wsgi_application(environ, start_response)
0128
0129 # If we get If-None-Match and If-Modified-Since, and
0130 # If-None-Match doesn't match, then we should not try to
0131 # figure out If-Modified-Since (which has 1-second granularity
0132 # and just isn't as accurate)
0133 if not client_etags:
0134 try:
0135 client_clock = IF_MODIFIED_SINCE.parse(environ)
0136 if client_clock >= int(self.last_modified):
0137 # horribly inefficient, n^2 performance, yuck!
0138 for head in list_headers(entity=True):
0139 head.delete(headers)
0140 start_response('304 Not Modified', headers)
0141 return [''] # empty body
0142 except HTTPBadRequest, exce:
0143 return exce.wsgi_application(environ, start_response)
0144
0145 (lower, upper) = (0, self.content_length - 1)
0146 range = RANGE.parse(environ)
0147 if range and 'bytes' == range[0] and 1 == len(range[1]):
0148 (lower, upper) = range[1][0]
0149 upper = upper or (self.content_length - 1)
0150 if upper >= self.content_length or lower > upper:
0151 return HTTPRequestRangeNotSatisfiable((
0152 "Range request was made beyond the end of the content,\r\n"
0153 "which is %s long.\r\n Range: %s\r\n") % (
0154 self.content_length, RANGE(environ))
0155 ).wsgi_application(environ, start_response)
0156
0157 content_length = upper - lower + 1
0158 CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
0159 total_length = self.content_length)
0160 CONTENT_LENGTH.update(headers, content_length)
0161 if content_length == self.content_length:
0162 start_response('200 OK', headers)
0163 else:
0164 start_response('206 Partial Content', headers)
0165 if self.content is not None:
0166 return [self.content[lower:upper+1]]
0167 return (lower, content_length)
0168
0169class FileApp(DataApp):
0170 """
0171 Returns an application that will send the file at the given
0172 filename. Adds a mime type based on ``mimetypes.guess_type()``.
0173 See DataApp for the arguments beyond ``filename``.
0174 """
0175
0176 def __init__(self, filename, headers=None, **kwargs):
0177 self.filename = filename
0178 content_type, content_encoding = self.guess_type()
0179 if content_type and 'content_type' not in kwargs:
0180 kwargs['content_type'] = content_type
0181 if content_encoding and 'content_encoding' not in kwargs:
0182 kwargs['content_encoding'] = content_encoding
0183 DataApp.__init__(self, None, headers, **kwargs)
0184
0185 def guess_type(self):
0186 return mimetypes.guess_type(self.filename)
0187
0188 def update(self, force=False):
0189 stat = os.stat(self.filename)
0190 if not force and stat.st_mtime == self.last_modified:
0191 return
0192 self.last_modified = stat.st_mtime
0193 if stat.st_size < CACHE_SIZE:
0194 fh = open(self.filename,"rb")
0195 self.set_content(fh.read(), stat.st_mtime)
0196 fh.close()
0197 else:
0198 self.content = None
0199 self.content_length = stat.st_size
0200 # This is updated automatically if self.set_content() is
0201 # called
0202 LAST_MODIFIED.update(self.headers, time=self.last_modified)
0203
0204 def get(self, environ, start_response):
0205 is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
0206 if 'max-age=0' in CACHE_CONTROL(environ).lower():
0207 self.update(force=True) # RFC 2616 13.2.6
0208 else:
0209 self.update()
0210 if not self.content:
0211 if not os.path.exists(self.filename):
0212 exc = HTTPNotFound(
0213 'The resource does not exist',
0214 comment="No file at %r" % self.filename)
0215 return exc(environ, start_response)
0216 try:
0217 file = open(self.filename, 'rb')
0218 except (IOError, OSError), e:
0219 exc = HTTPForbidden(
0220 'You are not permitted to view this file (%s)' % e)
0221 return exc.wsgi_application(
0222 environ, start_response)
0223 retval = DataApp.get(self, environ, start_response)
0224 if isinstance(retval, list):
0225 # cached content, exception, or not-modified
0226 if is_head:
0227 return ['']
0228 return retval
0229 (lower, content_length) = retval
0230 if is_head:
0231 return ['']
0232 file.seek(lower)
0233 return _FileIter(file, size=content_length)
0234
0235class _FileIter(object):
0236
0237 def __init__(self, file, block_size=None, size=None):
0238 self.file = file
0239 self.size = size
0240 self.block_size = block_size or BLOCK_SIZE
0241
0242 def __iter__(self):
0243 return self
0244
0245 def next(self):
0246 chunk_size = self.block_size
0247 if self.size is not None:
0248 if chunk_size > self.size:
0249 chunk_size = self.size
0250 self.size -= chunk_size
0251 data = self.file.read(chunk_size)
0252 if not data:
0253 raise StopIteration
0254 return data
0255
0256 def close(self):
0257 self.file.close()
0258
0259class ArchiveStore(object):
0260 """
0261 Returns an application that serves up a DataApp for items requested
0262 in a given zip or tar archive.
0263
0264 Constructor Arguments:
0265
0266 ``filepath`` the path to the archive being served
0267
0268 ``cache_control()``
0269
0270 This method provides validated construction of the ``Cache-Control``
0271 header as well as providing for automated filling out of the
0272 ``EXPIRES`` header for HTTP/1.0 clients.
0273 """
0274
0275 def __init__(self, filepath):
0276 if zipfile.is_zipfile(filepath):
0277 self.archive = zipfile.ZipFile(filepath,"r")
0278 elif tarfile.is_tarfile(filepath):
0279 self.archive = tarfile.TarFileCompat(filepath,"r")
0280 else:
0281 raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
0282 self.expires = None
0283 self.last_modified = time.time()
0284 self.cache = {}
0285
0286 def cache_control(self, **kwargs):
0287 self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
0288 return self
0289
0290 def __call__(self, environ, start_response):
0291 path = environ.get("PATH_INFO","")
0292 if path.startswith("/"):
0293 path = path[1:]
0294 application = self.cache.get(path)
0295 if application:
0296 return application(environ, start_response)
0297 try:
0298 info = self.archive.getinfo(path)
0299 except KeyError:
0300 exc = HTTPNotFound("The file requested, '%s', was not found." % path)
0301 return exc.wsgi_application(environ, start_response)
0302 if info.filename.endswith("/"):
0303 exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
0304 return exc.wsgi_application(environ, start_response)
0305 content_type, content_encoding = mimetypes.guess_type(info.filename)
0306 app = DataApp(None, content_type = content_type,
0307 content_encoding = content_encoding)
0308 app.set_content(self.archive.read(path),
0309 time.mktime(info.date_time + (0,0,0)))
0310 self.cache[path] = app
0311 app.expires = self.expires
0312 return app(environ, start_response)