Only print the first 100 lines of a long diff. New soup2html code for spacing associated with certain tags.
- Legacy-Id: 337
This commit is contained in:
parent
9b4b6c5297
commit
dd37257c0c
|
@ -178,7 +178,8 @@ class UrlTestCase(TestCase):
|
|||
print "OK cmp %s" % (url)
|
||||
else:
|
||||
contextlines = 0
|
||||
diff = "\n".join(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
|
||||
difflist = list(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
|
||||
diff = "\n".join(difflist)
|
||||
for chunk in self.diffchunks:
|
||||
#print "*** Checking for chunk:", chunk[:24]
|
||||
while re.search(chunk, diff):
|
||||
|
@ -201,7 +202,9 @@ class UrlTestCase(TestCase):
|
|||
print "OK cmp %s" % (url)
|
||||
else:
|
||||
print "Diff: %s" % (url)
|
||||
print diff
|
||||
print "\n".join(difflist[:100])
|
||||
if len(difflist) > 100:
|
||||
print "... (skipping %s lines of diff)" % (len(difflist)-100)
|
||||
else:
|
||||
print "OK cmp %s" % (url)
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ try:
|
|||
except:
|
||||
from BeautifulSoup import Tag, BeautifulSoup, NavigableString
|
||||
|
||||
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", ]
|
||||
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"]
|
||||
space_tags = ["th", "td", "br"]
|
||||
ignore_tags = ["head", "script", "style"]
|
||||
pre_tags = ["pre"]
|
||||
entities = [("<", "<"), (">", ">"),
|
||||
|
@ -86,6 +87,9 @@ def render(node, encoding='latin-1', pre=False):
|
|||
blocks.append(child.text+"\n\n")
|
||||
node.is_block = True
|
||||
else:
|
||||
if child.text:
|
||||
if child.name in space_tags and not words[-1][-1] in [" ", "\t", "\n"]:
|
||||
words.append(" ")
|
||||
words.append(child.text)
|
||||
else:
|
||||
raise ValueError("Unexpected node type: '%s'" % child)
|
||||
|
@ -111,7 +115,6 @@ def soup2text(html):
|
|||
# some preprocessing to handle common pathological cases
|
||||
html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html)
|
||||
html = re.sub("<br */?>([^\n])", r"<br />\n\1", html)
|
||||
html = re.sub("([^ \t\n])(</t[hd].*?>)", r"\1 \2", html)
|
||||
soup = TextSoup(html)
|
||||
return str(soup)
|
||||
|
||||
|
|
Loading…
Reference in a new issue