Upgrade to typogrify 2.0.7 and add this version to the repository
- Legacy-Id: 8988
This commit is contained in:
parent
bcacf99a60
commit
a6ff751f20
1
typogrify/__init__.py
Normal file
1
typogrify/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
__version__ = "2.0.7"
|
372
typogrify/filters.py
Normal file
372
typogrify/filters.py
Normal file
|
@ -0,0 +1,372 @@
|
|||
import re
|
||||
from typogrify.packages.titlecase import titlecase # NOQA
|
||||
|
||||
class TypogrifyError(Exception):
|
||||
""" A base error class so we can catch or scilence typogrify's errors in templates """
|
||||
pass
|
||||
|
||||
def process_ignores(text, ignore_tags=None):
|
||||
""" Creates a list of tuples based on tags to be ignored.
|
||||
Tags can be added as a list in the `ignore_tags`. Tags
|
||||
can also be filtered on id and class using CSS notation.
|
||||
For example, div#test (div with id='test'), div.test
|
||||
(div with class='test'), #test (any tag with id='test')
|
||||
or .test (any tag with class='test').
|
||||
|
||||
Returns in the following format:
|
||||
|
||||
[
|
||||
('Text here', <should text be processed? True|False>),
|
||||
('Text here', <should text be processed? True|False>),
|
||||
]
|
||||
|
||||
>>> process_ignores('<pre>processed</pre><p>processed</p>')
|
||||
[('<pre>processed</pre>', False), ('<p>processed</p>', True)]
|
||||
>>> process_ignores('<code>processed</code><p>processed<pre>processed</pre></p>')
|
||||
[('<code>processed</code>', False), ('<p>processed', True), ('<pre>processed</pre>', False), ('</p>', True)]
|
||||
>>> process_ignores('<code>processed</code><p>processed<pre>processed</pre></p>',['p'])
|
||||
[('<code>processed</code>', False), ('<p>processed<pre>processed</pre></p>', False)]
|
||||
>>> process_ignores('<span class="test">processed</span><span>processed<div>processed</div></span>',['div', 'span.test'])
|
||||
[('<span class="test">processed</span>', False), ('<span>processed', True), ('<div>processed</div>', False), ('</span>', True)]
|
||||
>>> process_ignores('<span class="TeSt">processed</span><span>processed<div>processed</div></span>',['div', '.test'])
|
||||
[('<span class="TeSt">processed</span>', False), ('<span>processed', True), ('<div>processed</div>', False), ('</span>', True)]
|
||||
>>> process_ignores('<span class="test">processed</span><span>processed<div>processed</div></span>',['div', '#test'])
|
||||
[('<span class="test">processed</span><span>processed', True), ('<div>processed</div>', False), ('</span>', True)]
|
||||
>>> process_ignores('<span id = "test">processed</span><span>processed<div>processed</div></span>',['div', 'span#test'])
|
||||
[('<span id = "test">processed</span>', False), ('<span>processed', True), ('<div>processed</div>', False), ('</span>', True)]
|
||||
>>> process_ignores('<span extra class="test" extra>processed</span><span>processed<div class="test">processed</div></span>',['div', 'span.test'])
|
||||
[('<span extra class="test" extra>processed</span>', False), ('<span>processed', True), ('<div class="test">processed</div>', False), ('</span>', True)]
|
||||
>>> process_ignores('<span e x t r a class="test" extra>processed</span><span>processed<div class="test">processed</div></span>',['.test'])
|
||||
[('<span e x t r a class="test" extra>processed</span>', False), ('<span>processed', True), ('<div class="test">processed</div>', False), ('</span>', True)]
|
||||
"""
|
||||
|
||||
def _filter_tag(match):
|
||||
"""Process user tag filters in regex sub"""
|
||||
|
||||
tag = match.group(1) if match.group(1) != '' else '[^\s.#<>]+'
|
||||
attribute = 'class' if match.group(2)[0] == '.' else 'id'
|
||||
attribute_value = match.group(2)[1:]
|
||||
_filter_tag.group += 1
|
||||
|
||||
result = r"""
|
||||
(?: {tag}
|
||||
(?= [^>]*?
|
||||
{attribute} \s*=\s*
|
||||
(['"]) {attribute_value} \{0}
|
||||
))""".format(_filter_tag.group, **locals())
|
||||
|
||||
return result
|
||||
|
||||
_filter_tag.group = 1
|
||||
position = 0
|
||||
sections = []
|
||||
|
||||
if ignore_tags is None:
|
||||
ignore_tags = []
|
||||
|
||||
# make ignore_tags unique and have 'pre' and 'code' as default
|
||||
ignore_tags = set(map(lambda x: x.strip(), ignore_tags)) | set(['pre', 'code'])
|
||||
|
||||
# classify tags
|
||||
non_filtered_tags = set(filter(lambda x: '.' not in x and '#' not in x, ignore_tags))
|
||||
generic_filtered_tags = set(filter(lambda x: x.startswith(('.','#')), ignore_tags))
|
||||
filtered_tags = ignore_tags-(non_filtered_tags | generic_filtered_tags)
|
||||
|
||||
# remove redundancy from filtered_tags
|
||||
filtered_tags = filter(lambda x: not any(tag in x for tag in generic_filtered_tags),
|
||||
filtered_tags)
|
||||
filtered_tags = filter(lambda x: not any(tag in x for tag in non_filtered_tags),
|
||||
filtered_tags)
|
||||
|
||||
# alter the tags that must be filtered for the regex
|
||||
sub = lambda tag: re.sub(r'^([^\s.#<>]*)([.#][^\s.#<>]+)$', _filter_tag, tag)
|
||||
generic_filtered_tags = list(map(sub, generic_filtered_tags))
|
||||
filtered_tags = list(map(sub, filtered_tags))
|
||||
|
||||
# create regex
|
||||
ignore_tags = list(non_filtered_tags | set(generic_filtered_tags) | set(filtered_tags))
|
||||
ignore_regex = r'(?:<(%s)[^>]*>.*?</\1>)' % '|'.join(ignore_tags)
|
||||
ignore_finder = re.compile(ignore_regex, re.IGNORECASE | re.DOTALL | re.VERBOSE)
|
||||
|
||||
# process regex
|
||||
for section in ignore_finder.finditer(text):
|
||||
start, end = section.span()
|
||||
|
||||
if position != start:
|
||||
# if the current position isn't the match we
|
||||
# need to process everything in between
|
||||
sections.append((text[position:start], True))
|
||||
|
||||
# now we mark the matched section as ignored
|
||||
sections.append((text[start:end], False))
|
||||
|
||||
position = end
|
||||
|
||||
# match the rest of the text if necessary
|
||||
# (this could in fact be the entire string)
|
||||
if position < len(text):
|
||||
sections.append((text[position:len(text)], True))
|
||||
|
||||
return sections
|
||||
|
||||
def amp(text):
|
||||
"""Wraps apersands in HTML with ``<span class="amp">`` so they can be
|
||||
styled with CSS. Apersands are also normalized to ``&``. Requires
|
||||
ampersands to have whitespace or an `` `` on both sides.
|
||||
|
||||
>>> amp('One & two')
|
||||
'One <span class="amp">&</span> two'
|
||||
>>> amp('One & two')
|
||||
'One <span class="amp">&</span> two'
|
||||
>>> amp('One & two')
|
||||
'One <span class="amp">&</span> two'
|
||||
|
||||
>>> amp('One & two')
|
||||
'One <span class="amp">&</span> two'
|
||||
|
||||
It won't mess up & that are already wrapped, in entities or URLs
|
||||
|
||||
>>> amp('One <span class="amp">&</span> two')
|
||||
'One <span class="amp">&</span> two'
|
||||
>>> amp('“this” & <a href="/?that&test">that</a>')
|
||||
'“this” <span class="amp">&</span> <a href="/?that&test">that</a>'
|
||||
|
||||
It should ignore standalone amps that are in attributes
|
||||
>>> amp('<link href="xyz.html" title="One & Two">xyz</link>')
|
||||
'<link href="xyz.html" title="One & Two">xyz</link>'
|
||||
"""
|
||||
# tag_pattern from http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx
|
||||
# it kinda sucks but it fixes the standalone amps in attributes bug
|
||||
tag_pattern = '</?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
|
||||
amp_finder = re.compile(r"(\s| )(&|&|&\#38;)(\s| )")
|
||||
intra_tag_finder = re.compile(r'(?P<prefix>(%s)?)(?P<text>([^<]*))(?P<suffix>(%s)?)' % (tag_pattern, tag_pattern))
|
||||
|
||||
def _amp_process(groups):
|
||||
prefix = groups.group('prefix') or ''
|
||||
text = amp_finder.sub(r"""\1<span class="amp">&</span>\3""", groups.group('text'))
|
||||
suffix = groups.group('suffix') or ''
|
||||
return prefix + text + suffix
|
||||
|
||||
output = intra_tag_finder.sub(_amp_process, text)
|
||||
return output
|
||||
|
||||
|
||||
def caps(text):
|
||||
"""Wraps multiple capital letters in ``<span class="caps">``
|
||||
so they can be styled with CSS.
|
||||
|
||||
>>> caps("A message from KU")
|
||||
'A message from <span class="caps">KU</span>'
|
||||
|
||||
Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
|
||||
|
||||
>>> caps("<SCRIPT>CAPS</script> more CAPS")
|
||||
'<SCRIPT>CAPS</script> more <span class="caps">CAPS</span>'
|
||||
|
||||
>>> caps("A message from 2KU2 with digits")
|
||||
'A message from <span class="caps">2KU2</span> with digits'
|
||||
|
||||
>>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.")
|
||||
'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.'
|
||||
|
||||
All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
|
||||
>>> caps("JIMMY'S")
|
||||
'<span class="caps">JIMMY\\'S</span>'
|
||||
|
||||
>>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
|
||||
'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
|
||||
"""
|
||||
try:
|
||||
import smartypants
|
||||
except ImportError:
|
||||
raise TypogrifyError("Error in {% caps %} filter: The Python SmartyPants library isn't installed.")
|
||||
|
||||
tokens = smartypants._tokenize(text)
|
||||
result = []
|
||||
in_skipped_tag = False
|
||||
|
||||
cap_finder = re.compile(r"""(
|
||||
(\b[A-Z\d]* # Group 2: Any amount of caps and digits
|
||||
[A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them)
|
||||
[A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes
|
||||
| (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space
|
||||
(?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more
|
||||
(?:\s|\b|$))
|
||||
""", re.VERBOSE)
|
||||
|
||||
def _cap_wrapper(matchobj):
|
||||
"""This is necessary to keep dotted cap strings to pick up extra spaces"""
|
||||
if matchobj.group(2):
|
||||
return """<span class="caps">%s</span>""" % matchobj.group(2)
|
||||
else:
|
||||
if matchobj.group(3)[-1] == " ":
|
||||
caps = matchobj.group(3)[:-1]
|
||||
tail = ' '
|
||||
else:
|
||||
caps = matchobj.group(3)
|
||||
tail = ''
|
||||
return """<span class="caps">%s</span>%s""" % (caps, tail)
|
||||
|
||||
# Add additional tags whose content should be
|
||||
# ignored here. Note - <pre> and <code> tag are
|
||||
# ignored by default and therefore are not here
|
||||
tags_to_skip_regex = re.compile("<(/)?(?:kbd|script)[^>]*>", re.IGNORECASE)
|
||||
|
||||
for token in tokens:
|
||||
if token[0] == "tag":
|
||||
# Don't mess with tags.
|
||||
result.append(token[1])
|
||||
close_match = tags_to_skip_regex.match(token[1])
|
||||
if close_match and close_match.group(1) == None:
|
||||
in_skipped_tag = True
|
||||
else:
|
||||
in_skipped_tag = False
|
||||
else:
|
||||
if in_skipped_tag:
|
||||
result.append(token[1])
|
||||
else:
|
||||
result.append(cap_finder.sub(_cap_wrapper, token[1]))
|
||||
output = "".join(result)
|
||||
return output
|
||||
|
||||
|
||||
def initial_quotes(text):
|
||||
"""Wraps initial quotes in ``class="dquo"`` for double quotes or
|
||||
``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li, dt, dd)``
|
||||
and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
|
||||
|
||||
>>> initial_quotes('"With primes"')
|
||||
'<span class="dquo">"</span>With primes"'
|
||||
>>> initial_quotes("'With single primes'")
|
||||
'<span class="quo">\\'</span>With single primes\\''
|
||||
|
||||
>>> initial_quotes('<a href="#">"With primes and a link"</a>')
|
||||
'<a href="#"><span class="dquo">"</span>With primes and a link"</a>'
|
||||
|
||||
>>> initial_quotes('“With smartypanted quotes”')
|
||||
'<span class="dquo">“</span>With smartypanted quotes”'
|
||||
"""
|
||||
quote_finder = re.compile(r"""((<(p|h[1-6]|li|dt|dd)[^>]*>|^) # start with an opening p, h1-6, li, dd, dt or the start of the string
|
||||
\s* # optional white space!
|
||||
(<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each.
|
||||
(("|“|&\#8220;)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes)
|
||||
# double quotes are in group 7, singles in group 8
|
||||
""", re.VERBOSE)
|
||||
|
||||
def _quote_wrapper(matchobj):
|
||||
if matchobj.group(7):
|
||||
classname = "dquo"
|
||||
quote = matchobj.group(7)
|
||||
else:
|
||||
classname = "quo"
|
||||
quote = matchobj.group(8)
|
||||
return """%s<span class="%s">%s</span>""" % (matchobj.group(1), classname, quote)
|
||||
output = quote_finder.sub(_quote_wrapper, text)
|
||||
return output
|
||||
|
||||
|
||||
def smartypants(text):
|
||||
"""Applies smarty pants to curl quotes.
|
||||
|
||||
>>> smartypants('The "Green" man')
|
||||
'The “Green” man'
|
||||
"""
|
||||
try:
|
||||
import smartypants
|
||||
except ImportError:
|
||||
raise TypogrifyError("Error in {% smartypants %} filter: The Python smartypants library isn't installed.")
|
||||
else:
|
||||
output = smartypants.smartypants(text)
|
||||
return output
|
||||
|
||||
|
||||
def widont(text):
|
||||
"""Replaces the space between the last two words in a string with `` ``
|
||||
Works in these block tags ``(h1-h6, p, li, dd, dt)`` and also accounts for
|
||||
potential closing inline elements ``a, em, strong, span, b, i``
|
||||
|
||||
>>> widont('A very simple test')
|
||||
'A very simple test'
|
||||
|
||||
Single word items shouldn't be changed
|
||||
>>> widont('Test')
|
||||
'Test'
|
||||
>>> widont(' Test')
|
||||
' Test'
|
||||
>>> widont('<ul><li>Test</p></li><ul>')
|
||||
'<ul><li>Test</p></li><ul>'
|
||||
>>> widont('<ul><li> Test</p></li><ul>')
|
||||
'<ul><li> Test</p></li><ul>'
|
||||
|
||||
>>> widont('<p>In a couple of paragraphs</p><p>paragraph two</p>')
|
||||
'<p>In a couple of paragraphs</p><p>paragraph two</p>'
|
||||
|
||||
>>> widont('<h1><a href="#">In a link inside a heading</i> </a></h1>')
|
||||
'<h1><a href="#">In a link inside a heading</i> </a></h1>'
|
||||
|
||||
>>> widont('<h1><a href="#">In a link</a> followed by other text</h1>')
|
||||
'<h1><a href="#">In a link</a> followed by other text</h1>'
|
||||
|
||||
Empty HTMLs shouldn't error
|
||||
>>> widont('<h1><a href="#"></a></h1>')
|
||||
'<h1><a href="#"></a></h1>'
|
||||
|
||||
>>> widont('<div>Divs get no love!</div>')
|
||||
'<div>Divs get no love!</div>'
|
||||
|
||||
>>> widont('<pre>Neither do PREs</pre>')
|
||||
'<pre>Neither do PREs</pre>'
|
||||
|
||||
>>> widont('<div><p>But divs with paragraphs do!</p></div>')
|
||||
'<div><p>But divs with paragraphs do!</p></div>'
|
||||
"""
|
||||
|
||||
widont_finder = re.compile(r"""((?:</?(?:a|em|span|strong|i|b)[^>]*>)|[^<>\s]) # must be proceeded by an approved inline opening or closing tag or a nontag/nonspace
|
||||
\s+ # the space to replace
|
||||
([^<>\s]+ # must be flollowed by non-tag non-space characters
|
||||
\s* # optional white space!
|
||||
(</(a|em|span|strong|i|b)>\s*)* # optional closing inline tags with optional white space after each
|
||||
((</(p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
|
||||
""", re.VERBOSE)
|
||||
output = widont_finder.sub(r'\1 \2', text)
|
||||
|
||||
return output
|
||||
|
||||
def applyfilters(text):
|
||||
"""Applies the following filters: smartypants, caps, amp, initial_quotes
|
||||
|
||||
>>> typogrify('<h2>"Jayhawks" & KU fans act extremely obnoxiously</h2>')
|
||||
'<h2><span class="dquo">“</span>Jayhawks” <span class="amp">&</span> <span class="caps">KU</span> fans act extremely obnoxiously</h2>'
|
||||
"""
|
||||
text = amp(text)
|
||||
text = smartypants(text)
|
||||
text = caps(text)
|
||||
text = initial_quotes(text)
|
||||
|
||||
return text
|
||||
|
||||
def typogrify(text, ignore_tags=None):
|
||||
"""The super typography filter
|
||||
|
||||
Applies filters to text that are not in tags contained in the
|
||||
ignore_tags list.
|
||||
"""
|
||||
|
||||
section_list = process_ignores(text, ignore_tags)
|
||||
|
||||
rendered_text = ""
|
||||
for text_item, should_process in section_list:
|
||||
if should_process:
|
||||
rendered_text += applyfilters(text_item)
|
||||
else:
|
||||
rendered_text += text_item
|
||||
|
||||
# apply widont at the end, as its already smart about tags. Hopefully.
|
||||
return widont(rendered_text)
|
||||
|
||||
def _test():
|
||||
import doctest
|
||||
doctest.testmod(verbose=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
1
typogrify/packages/__init__.py
Normal file
1
typogrify/packages/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
# Packages live here.
|
101
typogrify/packages/titlecase/__init__.py
Executable file
101
typogrify/packages/titlecase/__init__.py
Executable file
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# titlecase v0.5.1
|
||||
# Copyright (C) 2008-2010, Stuart Colville.
|
||||
# https://pypi.python.org/pypi/titlecase
|
||||
|
||||
"""
|
||||
Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008
|
||||
Python version by Stuart Colville http://muffinresearch.co.uk
|
||||
License: http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
__all__ = ['titlecase']
|
||||
__version__ = '0.5.1'
|
||||
|
||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
||||
PUNCT = r"""!"#$%&'‘()*+,\-./:;?@[\\\]_`{|}~"""
|
||||
|
||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
||||
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
|
||||
CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT)
|
||||
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I)
|
||||
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I)
|
||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
||||
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
||||
ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT)
|
||||
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
||||
MAC_MC = re.compile(r"^([Mm]a?c)(\w+)")
|
||||
|
||||
def titlecase(text):
|
||||
|
||||
"""
|
||||
Titlecases input text
|
||||
|
||||
This filter changes all words to Title Caps, and attempts to be clever
|
||||
about *un*capitalizing SMALL words like a/an/the in the input.
|
||||
|
||||
The list of "SMALL words" which are not capped comes from
|
||||
the New York Times Manual of Style, plus 'vs' and 'v'.
|
||||
|
||||
"""
|
||||
|
||||
lines = re.split('[\r\n]+', text)
|
||||
processed = []
|
||||
for line in lines:
|
||||
all_caps = ALL_CAPS.match(line)
|
||||
words = re.split('[\t ]', line)
|
||||
tc_line = []
|
||||
for word in words:
|
||||
if all_caps:
|
||||
if UC_INITIALS.match(word):
|
||||
tc_line.append(word)
|
||||
continue
|
||||
else:
|
||||
word = word.lower()
|
||||
|
||||
if APOS_SECOND.match(word):
|
||||
word = word.replace(word[0], word[0].upper())
|
||||
word = word.replace(word[2], word[2].upper())
|
||||
tc_line.append(word)
|
||||
continue
|
||||
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
|
||||
tc_line.append(word)
|
||||
continue
|
||||
if SMALL_WORDS.match(word):
|
||||
tc_line.append(word.lower())
|
||||
continue
|
||||
|
||||
match = MAC_MC.match(word)
|
||||
if match:
|
||||
tc_line.append("%s%s" % (match.group(1).capitalize(),
|
||||
match.group(2).capitalize()))
|
||||
continue
|
||||
|
||||
hyphenated = []
|
||||
for item in word.split('-'):
|
||||
hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item))
|
||||
tc_line.append("-".join(hyphenated))
|
||||
|
||||
|
||||
result = " ".join(tc_line)
|
||||
|
||||
result = SMALL_FIRST.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
m.group(2).capitalize()
|
||||
), result)
|
||||
|
||||
result = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), result)
|
||||
|
||||
result = SUBPHRASE.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
m.group(2).capitalize()
|
||||
), result)
|
||||
|
||||
processed.append(result)
|
||||
|
||||
return "\n".join(processed)
|
||||
|
174
typogrify/packages/titlecase/tests.py
Normal file
174
typogrify/packages/titlecase/tests.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Tests for titlecase"""
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../'))
|
||||
|
||||
from titlecase import titlecase
|
||||
|
||||
TEST_DATA = (
|
||||
(
|
||||
"Q&A with steve jobs: 'that's what happens in technology'",
|
||||
"Q&A With Steve Jobs: 'That's What Happens in Technology'"
|
||||
),
|
||||
(
|
||||
"What is AT&T's problem?",
|
||||
"What Is AT&T's Problem?"
|
||||
),
|
||||
(
|
||||
"Apple deal with AT&T falls through",
|
||||
"Apple Deal With AT&T Falls Through"
|
||||
),
|
||||
(
|
||||
"this v that",
|
||||
"This v That"
|
||||
),
|
||||
(
|
||||
"this v. that",
|
||||
"This v. That"
|
||||
),
|
||||
(
|
||||
"this vs that",
|
||||
"This vs That"
|
||||
),
|
||||
(
|
||||
"this vs. that",
|
||||
"This vs. That"
|
||||
),
|
||||
(
|
||||
"The SEC's Apple probe: what you need to know",
|
||||
"The SEC's Apple Probe: What You Need to Know"
|
||||
),
|
||||
(
|
||||
"'by the Way, small word at the start but within quotes.'",
|
||||
"'By the Way, Small Word at the Start but Within Quotes.'"
|
||||
),
|
||||
(
|
||||
"Small word at end is nothing to be afraid of",
|
||||
"Small Word at End Is Nothing to Be Afraid Of"
|
||||
),
|
||||
(
|
||||
"Starting Sub-Phrase With a Small Word: a Trick, Perhaps?",
|
||||
"Starting Sub-Phrase With a Small Word: A Trick, Perhaps?"
|
||||
),
|
||||
(
|
||||
"Sub-Phrase With a Small Word in Quotes: 'a Trick, Perhaps?'",
|
||||
"Sub-Phrase With a Small Word in Quotes: 'A Trick, Perhaps?'"
|
||||
),
|
||||
(
|
||||
'sub-phrase with a small word in quotes: "a trick, perhaps?"',
|
||||
'Sub-Phrase With a Small Word in Quotes: "A Trick, Perhaps?"'
|
||||
),
|
||||
(
|
||||
'"Nothing to Be Afraid of?"',
|
||||
'"Nothing to Be Afraid Of?"'
|
||||
),
|
||||
(
|
||||
'"Nothing to be Afraid Of?"',
|
||||
'"Nothing to Be Afraid Of?"'
|
||||
),
|
||||
(
|
||||
'a thing',
|
||||
'A Thing'
|
||||
),
|
||||
(
|
||||
"2lmc Spool: 'gruber on OmniFocus and vapo(u)rware'",
|
||||
"2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'"
|
||||
),
|
||||
(
|
||||
'this is just an example.com',
|
||||
'This Is Just an example.com'
|
||||
),
|
||||
(
|
||||
'this is something listed on del.icio.us',
|
||||
'This Is Something Listed on del.icio.us'
|
||||
),
|
||||
(
|
||||
'iTunes should be unmolested',
|
||||
'iTunes Should Be Unmolested'
|
||||
),
|
||||
(
|
||||
'reading between the lines of steve jobs’s ‘thoughts on music’',
|
||||
'Reading Between the Lines of Steve Jobs’s ‘Thoughts on Music’'
|
||||
),
|
||||
(
|
||||
'seriously, ‘repair permissions’ is voodoo',
|
||||
'Seriously, ‘Repair Permissions’ Is Voodoo'
|
||||
),
|
||||
(
|
||||
'generalissimo francisco franco: still dead; kieren McCarthy: still a jackass',
|
||||
'Generalissimo Francisco Franco: Still Dead; Kieren McCarthy: Still a Jackass'
|
||||
),
|
||||
(
|
||||
"O'Reilly should be untouched",
|
||||
"O'Reilly Should Be Untouched"
|
||||
),
|
||||
(
|
||||
"my name is o'reilly",
|
||||
"My Name Is O'Reilly"
|
||||
),
|
||||
(
|
||||
"WASHINGTON, D.C. SHOULD BE FIXED BUT MIGHT BE A PROBLEM",
|
||||
"Washington, D.C. Should Be Fixed but Might Be a Problem"
|
||||
),
|
||||
(
|
||||
"THIS IS ALL CAPS AND SHOULD BE ADDRESSED",
|
||||
"This Is All Caps and Should Be Addressed"
|
||||
),
|
||||
(
|
||||
"Mr McTavish went to MacDonalds",
|
||||
"Mr McTavish Went to MacDonalds"
|
||||
),
|
||||
(
|
||||
"this shouldn't\nget mangled",
|
||||
"This Shouldn't\nGet Mangled"
|
||||
),
|
||||
(
|
||||
"this is http://foo.com",
|
||||
"This Is http://foo.com"
|
||||
)
|
||||
)
|
||||
|
||||
def test_all_caps_regex():
|
||||
"""Test - all capitals regex"""
|
||||
from titlecase import ALL_CAPS
|
||||
assert bool(ALL_CAPS.match('THIS IS ALL CAPS')) is True
|
||||
|
||||
def test_initials_regex():
|
||||
"""Test - uppercase initals regex with A.B"""
|
||||
from titlecase import UC_INITIALS
|
||||
assert bool(UC_INITIALS.match('A.B')) is True
|
||||
|
||||
def test_initials_regex_2():
|
||||
"""Test - uppercase initals regex with A.B."""
|
||||
from titlecase import UC_INITIALS
|
||||
assert bool(UC_INITIALS.match('A.B.')) is True
|
||||
|
||||
def test_initials_regex_3():
|
||||
"""Test - uppercase initals regex with ABCD"""
|
||||
from titlecase import UC_INITIALS
|
||||
assert bool(UC_INITIALS.match('ABCD')) is False
|
||||
|
||||
def check_input_matches_expected_output(in_, out):
|
||||
"""Function yielded by test generator"""
|
||||
try :
|
||||
assert titlecase(in_) == out
|
||||
except AssertionError:
|
||||
print("%s != %s" % (titlecase(in_), out))
|
||||
raise
|
||||
|
||||
|
||||
def test_input_output():
|
||||
"""Generated tests"""
|
||||
for data in TEST_DATA:
|
||||
yield check_input_matches_expected_output, data[0], data[1]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import nose
|
||||
nose.main()
|
||||
|
0
typogrify/templatetags/__init__.py
Normal file
0
typogrify/templatetags/__init__.py
Normal file
36
typogrify/templatetags/jinja_filters.py
Normal file
36
typogrify/templatetags/jinja_filters.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
from typogrify.filters import amp, caps, initial_quotes, smartypants, titlecase, typogrify, widont, TypogrifyError
|
||||
from functools import wraps
|
||||
import jinja2
|
||||
from jinja2.exceptions import TemplateError
|
||||
|
||||
|
||||
def make_safe(f):
|
||||
"""
|
||||
A function wrapper to make typogrify play nice with jinja2's
|
||||
unicode support.
|
||||
|
||||
"""
|
||||
@wraps(f)
|
||||
def wrapper(text):
|
||||
f.is_safe = True
|
||||
out = text
|
||||
try:
|
||||
out = f(text)
|
||||
except TypogrifyError as e:
|
||||
raise TemplateError(e.message)
|
||||
return jinja2.Markup(out)
|
||||
wrapper.is_safe = True
|
||||
return wrapper
|
||||
|
||||
|
||||
def register(env):
|
||||
"""
|
||||
Call this to register the template filters for jinj2.
|
||||
"""
|
||||
env.filters['amp'] = make_safe(amp)
|
||||
env.filters['caps'] = make_safe(caps)
|
||||
env.filters['initial_quotes'] = make_safe(initial_quotes)
|
||||
env.filters['smartypants'] = make_safe(smartypants)
|
||||
env.filters['titlecase'] = make_safe(titlecase)
|
||||
env.filters['typogrify'] = make_safe(typogrify)
|
||||
env.filters['widont'] = make_safe(widont)
|
40
typogrify/templatetags/typogrify_tags.py
Normal file
40
typogrify/templatetags/typogrify_tags.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
from typogrify.filters import amp, caps, initial_quotes, smartypants, titlecase, typogrify, widont, TypogrifyError
|
||||
from functools import wraps
|
||||
from django.conf import settings
|
||||
from django import template
|
||||
from django.utils.safestring import mark_safe
|
||||
from django.utils.encoding import force_str
|
||||
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
||||
def make_safe(f):
|
||||
"""
|
||||
A function wrapper to make typogrify play nice with django's
|
||||
unicode support.
|
||||
|
||||
"""
|
||||
@wraps(f)
|
||||
def wrapper(text):
|
||||
text = force_str(text)
|
||||
f.is_safe = True
|
||||
out = text
|
||||
try:
|
||||
out = f(text)
|
||||
except TypogrifyError as e:
|
||||
if settings.DEBUG:
|
||||
raise e
|
||||
return text
|
||||
return mark_safe(out)
|
||||
wrapper.is_safe = True
|
||||
return wrapper
|
||||
|
||||
|
||||
register.filter('amp', make_safe(amp))
|
||||
register.filter('caps', make_safe(caps))
|
||||
register.filter('initial_quotes', make_safe(initial_quotes))
|
||||
register.filter('smartypants', make_safe(smartypants))
|
||||
register.filter('titlecase', make_safe(titlecase))
|
||||
register.filter('typogrify', make_safe(typogrify))
|
||||
register.filter('widont', make_safe(widont))
|
Loading…
Reference in a new issue