From dd37257c0c2153027bcb0ee3beeb0840d29d2698 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Tue, 12 Jun 2007 17:52:07 +0000 Subject: [PATCH] Only print the first 100 lines of a long diff. New soup2html code for spacing associated with certain tags. - Legacy-Id: 337 --- ietf/tests.py | 7 +++++-- ietf/utils/soup2text.py | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ietf/tests.py b/ietf/tests.py index 8237b9c3b..51d37680f 100644 --- a/ietf/tests.py +++ b/ietf/tests.py @@ -178,7 +178,8 @@ class UrlTestCase(TestCase): print "OK cmp %s" % (url) else: contextlines = 0 - diff = "\n".join(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm="")) + difflist = list(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm="")) + diff = "\n".join(difflist) for chunk in self.diffchunks: #print "*** Checking for chunk:", chunk[:24] while re.search(chunk, diff): @@ -201,7 +202,9 @@ class UrlTestCase(TestCase): print "OK cmp %s" % (url) else: print "Diff: %s" % (url) - print diff + print "\n".join(difflist[:100]) + if len(difflist) > 100: + print "... (skipping %s lines of diff)" % (len(difflist)-100) else: print "OK cmp %s" % (url) diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py index dc8c91b47..28e11d862 100755 --- a/ietf/utils/soup2text.py +++ b/ietf/utils/soup2text.py @@ -7,7 +7,8 @@ try: except: from BeautifulSoup import Tag, BeautifulSoup, NavigableString -block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", ] +block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"] +space_tags = ["th", "td", "br"] ignore_tags = ["head", "script", "style"] pre_tags = ["pre"] entities = [("<", "<"), (">", ">"), @@ -86,7 +87,10 @@ def render(node, encoding='latin-1', pre=False): blocks.append(child.text+"\n\n") node.is_block = True else: - words.append(child.text) + if child.text: + if child.name in space_tags and not words[-1][-1] in [" ", "\t", "\n"]: + words.append(" ") + words.append(child.text) else: raise ValueError("Unexpected node type: '%s'" % child) if words: @@ -111,7 +115,6 @@ def soup2text(html): # some preprocessing to handle common pathological cases html = re.sub("
[ \t\n]*(
)+", "

", html) html = re.sub("
([^\n])", r"
\n\1", html) - html = re.sub("([^ \t\n])()", r"\1 \2", html) soup = TextSoup(html) return str(soup)