0001"""
0002Text Helpers
0003
0004Provides a set of methods for filtering, formatting and transforming strings.
0005"""
0006# Last synced with Rails copy at Revision 6096 on Feb 8th, 2007.
0007# Purposely left out sanitize and strip_tags, should be included at some point likely using
0008# BeautifulSoup.
0009
0010import itertools
0011import re
0012import textwrap
0013import warnings
0014import webhelpers.textile as textile
0015import webhelpers.markdown as _markdown
0016from routes import request_config
0017from webhelpers.rails.tags import content_tag, tag_options
0018
0019AUTO_LINK_RE = re.compile(r"""
0020 ( # leading text
0021 <\w+.*?>| # leading HTML tag, or
0022 [^=!:'"/]| # leading punctuation, or
0023 ^ # beginning of line
0024 )
0025 (
0026 (?:https?://)| # protocol spec, or
0027 (?:www\.) # www.*
0028 )
0029 (
0030 [-\w]+ # subdomain or domain
0031 (?:\.[-\w]+)* # remaining subdomains or domain
0032 (?::\d+)? # port
0033 (?:/(?:(?:[~\w\+%-]|(?:[,.;:][^\s$]))+)?)* # path
0034 (?:\?[\w\+%&=.;-]+)? # query string
0035 (?:\#[\w\-]*)? # trailing anchor
0036 )
0037 ([\.,"'?!;:]|\s|<|$) # trailing text
0038 """, re.X)
0039
0040def iterdict(items):
0041 return dict(items=items, iter=itertools.cycle(items))
0042
0043def cycle(*args, **kargs):
0044 """
0045 Returns the next cycle of the given list
0046
0047 Everytime ``cycle`` is called, the value returned will be the next item
0048 in the list passed to it. This list is reset on every request, but can
0049 also be reset by calling ``reset_cycle()``.
0050
0051 You may specify the list as either arguments, or as a single list argument.
0052
0053 This can be used to alternate classes for table rows::
0054
0055 # In Myghty...
0056 % for item in items:
0057 <tr class="<% cycle("even", "odd") %>">
0058 ... use item ...
0059 </tr>
0060 % #endfor
0061
0062 You can use named cycles to prevent clashes in nested loops. You'll
0063 have to reset the inner cycle, manually::
0064
0065 % for item in items:
0066 <tr class="<% cycle("even", "odd", name="row_class") %>
0067 <td>
0068 % for value in item.values:
0069 <span style="color:'<% cycle("red", "green", "blue",
0070 name="colors") %>'">
0071 item
0072 </span>
0073 % #endfor
0074 <% reset_cycle("colors") %>
0075 </td>
0076 </tr>
0077 % #endfor
0078 """
0079 if len(args) > 1:
0080 items = args
0081 else:
0082 items = args[0]
0083 name = kargs.get('name', 'default')
0084 cycles = request_config().environ.setdefault('railshelpers.cycles', {})
0085
0086 cycle = cycles.setdefault(name, iterdict(items))
0087
0088 if cycles[name].get('items') != items:
0089 cycle = cycles[name] = iterdict(items)
0090 return cycle['iter'].next()
0091
0092def reset_cycle(name='default'):
0093 """
0094 Resets a cycle
0095
0096 Resets the cycle so that it starts from the first element in the array
0097 the next time it is used.
0098 """
0099 try:
0100 del request_config().environ['railshelpers.cycles'][name]
0101 except KeyError:
0102 pass
0103
0104def counter(name='default', start=1, step=1):
0105 """Return the next cardinal in a sequence.
0106
0107 Every time ``counter`` is called, the value returned will be the next
0108 counting number in that sequence. This is reset to ``start`` on every
0109 request, but can also be reset by calling ``reset_counter()``.
0110
0111 You can optionally specify the number you want to start at by passing
0112 in the ``start`` argument (defaults to 1).
0113
0114 You can also optionally specify the step size you want by passing in
0115 the ``step`` argument (defaults to 1).
0116
0117 Sequences will increase monotonically by ``step`` each time it is
0118 called, until the heat death of the universe or python explodes.
0119
0120 This can be used to count rows in a table::
0121
0122 # In Myghty
0123 % for item in items:
0124 <tr>
0125 <td><% h.counter() %></td>
0126 </tr>
0127 % #endfor
0128
0129 You can use named counters to prevent clashes in nested loops.
0130 You'll have to reset the inner cycle manually though. See the
0131 documentation for ``webhelpers.text.cycle()`` for a similar
0132 example.
0133 """
0134 counters = request_config().environ.setdefault('railshelpers.counters', {})
0135
0136 # ripped off of itertools.count
0137 def do_counter(start, step):
0138 while True:
0139 yield start
0140 start += step
0141
0142 counter = counters.setdefault(name, do_counter(start, step))
0143
0144 return counter.next()
0145
0146def reset_counter(name='default'):
0147 """Resets a counter.
0148
0149 Resets the counter so that it starts from the ``start`` cardinal in
0150 the sequence next time it is used.
0151 """
0152 try:
0153 del request_config().environ['railshelpers.counters'][name]
0154 except KeyError:
0155 pass
0156
0157def truncate(text, length=30, truncate_string='...'):
0158 """
0159 Truncates ``text`` with replacement characters
0160
0161 ``length``
0162 The maximum length of ``text`` before replacement
0163 ``truncate_string``
0164 If ``text`` exceeds the ``length``, this string will replace
0165 the end of the string
0166
0167 Example::
0168
0169 >>> truncate('Once upon a time in a world far far away', 14)
0170 'Once upon a...'
0171 """
0172 if not text: return ''
0173
0174 new_len = length-len(truncate_string)
0175 if len(text) > length:
0176 return text[:new_len] + truncate_string
0177 else:
0178 return text
0179
0180def highlight(text, phrase, highlighter='<strong class="highlight">\\1</strong>',
0181 hilighter=None):
0182 """
0183 Highlights the ``phrase`` where it is found in the ``text``
0184
0185 The highlighted phrase will be surrounded by the highlighter, by default::
0186
0187 <strong class="highlight">I'm a highlight phrase</strong>
0188
0189 ``highlighter``
0190 Defines the highlighting phrase. This argument should be a single-quoted string
0191 with ``\\1`` where the phrase is supposed to be inserted.
0192
0193 Note: The ``phrase`` is sanitized to include only letters, digits, and spaces before use.
0194
0195 Example::
0196
0197 >>> highlight('You searched for: Pylons', 'Pylons')
0198 'You searched for: <strong class="highlight">Pylons</strong>'
0199 """
0200 if hilighter is not None:
0201 warnings.warn("The highlight function's hilight keyword argument is deprecated: "
0202 "Please use the highlight keyword argument instead.",
0203 DeprecationWarning, 2)
0204 highlighter = hilighter
0205 if not phrase or not text:
0206 return text
0207 highlight_re = re.compile('(%s)' % re.escape(phrase), re.I)
0208 return highlight_re.sub(highlighter, text)
0209
0210def excerpt(text, phrase, radius=100, excerpt_string="..."):
0211 """
0212 Extracts an excerpt from the ``text``. Returns an empty string if the phrase
0213 isn't found.
0214
0215 ``phrase``
0216 Phrase to excerpt from ``text``
0217 ``radius``
0218 How many surrounding characters to include
0219 ``excerpt_string``
0220 Characters surrounding entire excerpt
0221
0222 Example::
0223
0224 >>> excerpt("hello my world", "my", 3)
0225 '...lo my wo...'
0226 """
0227 if not text or not phrase:
0228 return text
0229
0230 pat = re.compile('(.{0,%s}%s.{0,%s})' % (radius, re.escape(phrase), radius), re.I)
0231 match = pat.search(text)
0232 if not match:
0233 return ""
0234 excerpt = match.expand(r'\1')
0235 if match.start(1) > 0:
0236 excerpt = excerpt_string + excerpt
0237 if match.end(1) < len(text):
0238 excerpt = excerpt + excerpt_string
0239 return excerpt
0240
0241def word_wrap(text, line_width=80):
0242 """
0243 Wraps ``text`` into lines no longer than ``line_width`` width. This function
0244 breaks on the first whitespace character that does not exceed ``line_width``.
0245
0246 Deprecated: Use python's builtin textwrap.fill instead.
0247 """
0248 warnings.warn("The word_wrap function has been deprecated: Use python's builtin "
0249 "textwrap.fill function instead.", DeprecationWarning, 2)
0250 return textwrap.fill(text, line_width)
0251
0252def simple_format(text):
0253 """
0254 Returns ``text`` transformed into HTML using very simple formatting rules
0255
0256 Two or more consecutive newlines(``\\n\\n``) are considered as a paragraph
0257 and wrapped in ``<p>`` tags. One newline (``\\n``) is considered a
0258 linebreak and a ``<br />`` tag is appended. This method does not remove the
0259 newlines from the text.
0260 """
0261 if text is None:
0262 text = ''
0263 text = re.sub(r'(\r\n|\n|\r)', r'\n', text)
0264 text = re.sub(r'\n\n+', r'\n\n', text)
0265 text = re.sub(r'(\n\n)', r'</p>\1<p>', text)
0266 text = re.sub(r'([^\n])(\n)(?=[^\n])', r'\1\2<br />', text)
0267 text = content_tag("p", text).replace('</p><p></p>', '</p>')
0268 text = re.sub(r'</p><p>', r'</p>\n<p>', text)
0269 return text
0270
0271def auto_link(text, link="all", **href_options):
0272 """
0273 Turns all urls and email addresses into clickable links.
0274
0275 ``link``
0276 Used to determine what to link. Options are "all", "email_addresses", or "urls"
0277
0278 Example::
0279
0280 >>> auto_link("Go to http://www.planetpython.com and say hello to guido@python.org")
0281 'Go to <a href="http://www.planetpython.com">http://www.planetpython.com</a> and say hello to <a href="mailto:guido@python.org">guido@python.org</a>'
0282 """
0283 if not text:
0284 return ""
0285 if link == "all":
0286 return auto_link_urls(auto_link_email_addresses(text), **href_options)
0287 elif link == "email_addresses":
0288 return auto_link_email_addresses(text)
0289 else:
0290 return auto_link_urls(text, **href_options)
0291
0292def auto_link_urls(text, **href_options):
0293 extra_options = tag_options(**href_options)
0294 def handle_match(matchobj):
0295 all = matchobj.group()
0296 a, b, c, d = matchobj.group(1, 2, 3, 4)
0297 if re.match(r'<a\s', a, re.I):
0298 return all
0299 text = b + c
0300 if b == "www.":
0301 b = "http://www."
0302 return '%s<a href="%s%s"%s>%s</a>%s' % (a, b, c, extra_options, text, d)
0303 return re.sub(AUTO_LINK_RE, handle_match, text)
0304
0305def auto_link_email_addresses(text):
0306 return re.sub(r'([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)',
0307 r'<a href="mailto:\1">\1</a>', text)
0308
0309def strip_links(text):
0310 """
0311 Strips link tags from ``text`` leaving just the link label.
0312
0313 Example::
0314
0315 >>> strip_links('<a href="something">else</a>')
0316 'else'
0317 """
0318 strip_re = re.compile(r'<a\b.*?>(.*?)<\/a>', re.I | re.M)
0319 return strip_re.sub(r'\1', text)
0320
0321def textilize(text, sanitize=False):
0322 """Format the text with Textile formatting
0323
0324 This function uses the `PyTextile library <http://dealmeida.net/>`_ which is included with WebHelpers.
0325
0326 Additionally, the output can be sanitized which will fix tags like <img />,
0327 <br /> and <hr /> for proper XHTML output.
0328
0329 """
0330 texer = textile.Textiler(text)
0331 return texer.process(sanitize=sanitize)
0332
0333def markdown(text, **kwargs):
0334 """Format the text with MarkDown formatting
0335
0336 This function uses the `Python MarkDown library <http://www.freewisdom.org/projects/python-markdown/>`_
0337 which is included with WebHelpers.
0338
0339 """
0340 return _markdown.markdown(text, **kwargs)
0341
0342__all__ = ['cycle', 'reset_cycle', 'counter', 'reset_counter', 'truncate', 'highlight', 'excerpt',
0343 'word_wrap', 'simple_format', 'auto_link', 'strip_links', 'textilize', 'markdown']