diff --git a/ietf/tests.py b/ietf/tests.py
index 8237b9c3b..51d37680f 100644
--- a/ietf/tests.py
+++ b/ietf/tests.py
@@ -178,7 +178,8 @@ class UrlTestCase(TestCase):
print "OK cmp %s" % (url)
else:
contextlines = 0
- diff = "\n".join(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
+ difflist = list(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
+ diff = "\n".join(difflist)
for chunk in self.diffchunks:
#print "*** Checking for chunk:", chunk[:24]
while re.search(chunk, diff):
@@ -201,7 +202,9 @@ class UrlTestCase(TestCase):
print "OK cmp %s" % (url)
else:
print "Diff: %s" % (url)
- print diff
+ print "\n".join(difflist[:100])
+ if len(difflist) > 100:
+ print "... (skipping %s lines of diff)" % (len(difflist)-100)
else:
print "OK cmp %s" % (url)
diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py
index dc8c91b47..28e11d862 100755
--- a/ietf/utils/soup2text.py
+++ b/ietf/utils/soup2text.py
@@ -7,7 +7,8 @@ try:
except:
from BeautifulSoup import Tag, BeautifulSoup, NavigableString
-block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", ]
+block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"]
+space_tags = ["th", "td", "br"]
ignore_tags = ["head", "script", "style"]
pre_tags = ["pre"]
entities = [("<", "<"), (">", ">"),
@@ -86,7 +87,10 @@ def render(node, encoding='latin-1', pre=False):
blocks.append(child.text+"\n\n")
node.is_block = True
else:
- words.append(child.text)
+ if child.text:
+ if child.name in space_tags and not words[-1][-1] in [" ", "\t", "\n"]:
+ words.append(" ")
+ words.append(child.text)
else:
raise ValueError("Unexpected node type: '%s'" % child)
if words:
@@ -111,7 +115,6 @@ def soup2text(html):
# some preprocessing to handle common pathological cases
html = re.sub("
[ \t\n]*(
)+", "