Only print the first 100 lines of a long diff. New soup2html code for spacing associated with certain tags.
- Legacy-Id: 337
This commit is contained in:
parent
9b4b6c5297
commit
dd37257c0c
|
@ -178,7 +178,8 @@ class UrlTestCase(TestCase):
|
||||||
print "OK cmp %s" % (url)
|
print "OK cmp %s" % (url)
|
||||||
else:
|
else:
|
||||||
contextlines = 0
|
contextlines = 0
|
||||||
diff = "\n".join(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
|
difflist = list(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
|
||||||
|
diff = "\n".join(difflist)
|
||||||
for chunk in self.diffchunks:
|
for chunk in self.diffchunks:
|
||||||
#print "*** Checking for chunk:", chunk[:24]
|
#print "*** Checking for chunk:", chunk[:24]
|
||||||
while re.search(chunk, diff):
|
while re.search(chunk, diff):
|
||||||
|
@ -201,7 +202,9 @@ class UrlTestCase(TestCase):
|
||||||
print "OK cmp %s" % (url)
|
print "OK cmp %s" % (url)
|
||||||
else:
|
else:
|
||||||
print "Diff: %s" % (url)
|
print "Diff: %s" % (url)
|
||||||
print diff
|
print "\n".join(difflist[:100])
|
||||||
|
if len(difflist) > 100:
|
||||||
|
print "... (skipping %s lines of diff)" % (len(difflist)-100)
|
||||||
else:
|
else:
|
||||||
print "OK cmp %s" % (url)
|
print "OK cmp %s" % (url)
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,8 @@ try:
|
||||||
except:
|
except:
|
||||||
from BeautifulSoup import Tag, BeautifulSoup, NavigableString
|
from BeautifulSoup import Tag, BeautifulSoup, NavigableString
|
||||||
|
|
||||||
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", ]
|
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"]
|
||||||
|
space_tags = ["th", "td", "br"]
|
||||||
ignore_tags = ["head", "script", "style"]
|
ignore_tags = ["head", "script", "style"]
|
||||||
pre_tags = ["pre"]
|
pre_tags = ["pre"]
|
||||||
entities = [("<", "<"), (">", ">"),
|
entities = [("<", "<"), (">", ">"),
|
||||||
|
@ -86,7 +87,10 @@ def render(node, encoding='latin-1', pre=False):
|
||||||
blocks.append(child.text+"\n\n")
|
blocks.append(child.text+"\n\n")
|
||||||
node.is_block = True
|
node.is_block = True
|
||||||
else:
|
else:
|
||||||
words.append(child.text)
|
if child.text:
|
||||||
|
if child.name in space_tags and not words[-1][-1] in [" ", "\t", "\n"]:
|
||||||
|
words.append(" ")
|
||||||
|
words.append(child.text)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unexpected node type: '%s'" % child)
|
raise ValueError("Unexpected node type: '%s'" % child)
|
||||||
if words:
|
if words:
|
||||||
|
@ -111,7 +115,6 @@ def soup2text(html):
|
||||||
# some preprocessing to handle common pathological cases
|
# some preprocessing to handle common pathological cases
|
||||||
html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html)
|
html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html)
|
||||||
html = re.sub("<br */?>([^\n])", r"<br />\n\1", html)
|
html = re.sub("<br */?>([^\n])", r"<br />\n\1", html)
|
||||||
html = re.sub("([^ \t\n])(</t[hd].*?>)", r"\1 \2", html)
|
|
||||||
soup = TextSoup(html)
|
soup = TextSoup(html)
|
||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue