* ietf/tests.py, in reduce(): add ad-hoc fix for pathologic case of not
closing <li> tags. BeautifulSoup can handle it, but the recursive text rendering code in soup2text recurses too deeply with a sufficiently long list... * ietf/tests.py, in setUp(): grab the right tuple element when extracting the URLs from the url test tuples * ietf/tests.py, in read_testurls(): close opened file * ietf/tests.py, in doUrlsTest(): narrower try/except clause, and a new one * soup2text.py, in para(): undo previous change - Legacy-Id: 304
This commit is contained in:
parent
b42e0728c8
commit
0452fca7d2
|
@ -23,11 +23,10 @@ def run_tests(module_list, verbosity=1, extra_tests=[]):
|
||||||
|
|
||||||
def reduce(html):
|
def reduce(html):
|
||||||
html = re.sub(" :", ":", html)
|
html = re.sub(" :", ":", html)
|
||||||
|
if html.count("<li>") > 5*html.count("</li>"):
|
||||||
|
html = html.replace("<li>", "</li><li>")
|
||||||
text = html2text(html)
|
text = html2text(html)
|
||||||
text = re.sub('\."', '".', text)
|
text = re.sub('\."', '".', text)
|
||||||
#text = re.sub("\n\n+", "\n\n", text)
|
|
||||||
#text = "\n\n".join([textwrap.fill(para, 80) for para in text.split("\n\n")])
|
|
||||||
#text = re.sub(" +", " ", text)
|
|
||||||
text = [ line.strip() for line in text.split("\n") ]
|
text = [ line.strip() for line in text.split("\n") ]
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -68,6 +67,7 @@ def read_testurls(filename):
|
||||||
raise ValueError("Expected 'HTTP_CODE TESTURL [GOODURL]' in %s line, found '%s'." % (filename, line))
|
raise ValueError("Expected 'HTTP_CODE TESTURL [GOODURL]' in %s line, found '%s'." % (filename, line))
|
||||||
codes = codes.split(",")
|
codes = codes.split(",")
|
||||||
tuples += [ (codes, testurl, goodurl) ]
|
tuples += [ (codes, testurl, goodurl) ]
|
||||||
|
file.close()
|
||||||
return tuples
|
return tuples
|
||||||
|
|
||||||
class UrlTestCase(TestCase):
|
class UrlTestCase(TestCase):
|
||||||
|
@ -83,7 +83,7 @@ class UrlTestCase(TestCase):
|
||||||
self.testtuples += read_testurls(root+"/testurl.list")
|
self.testtuples += read_testurls(root+"/testurl.list")
|
||||||
if "testurls.list" in files:
|
if "testurls.list" in files:
|
||||||
self.testtuples += read_testurls(root+"/testurls.list")
|
self.testtuples += read_testurls(root+"/testurls.list")
|
||||||
self.testurls = [ tuple[0] for tuple in self.testtuples ]
|
self.testurls = [ tuple[1] for tuple in self.testtuples ]
|
||||||
# Use the default database for the url tests, instead of the test database
|
# Use the default database for the url tests, instead of the test database
|
||||||
self.testdb = settings.DATABASE_NAME
|
self.testdb = settings.DATABASE_NAME
|
||||||
connection.close()
|
connection.close()
|
||||||
|
@ -136,8 +136,10 @@ class UrlTestCase(TestCase):
|
||||||
#print "Fetching", master, "...",
|
#print "Fetching", master, "...",
|
||||||
mfile = urllib.urlopen(master)
|
mfile = urllib.urlopen(master)
|
||||||
goodhtml = mfile.read()
|
goodhtml = mfile.read()
|
||||||
|
except urllib.URLError, e:
|
||||||
|
print "Failed retrieving master text for comparison: %s" % e
|
||||||
|
try:
|
||||||
mfile.close()
|
mfile.close()
|
||||||
print ""
|
|
||||||
if goodhtml and response.content:
|
if goodhtml and response.content:
|
||||||
testtext = reduce(response.content)
|
testtext = reduce(response.content)
|
||||||
goodtext = reduce(goodhtml)
|
goodtext = reduce(goodhtml)
|
||||||
|
@ -158,8 +160,9 @@ class UrlTestCase(TestCase):
|
||||||
else:
|
else:
|
||||||
print "Diff: %s" % (url)
|
print "Diff: %s" % (url)
|
||||||
print diff
|
print diff
|
||||||
except urllib.URLError, e:
|
except:
|
||||||
print "Failed retrieving master text for comparison: %s" % e
|
print "Exception occurred for url %s" % (url)
|
||||||
|
raise
|
||||||
|
|
||||||
if not res in response_count:
|
if not res in response_count:
|
||||||
response_count[res] = 0
|
response_count[res] = 0
|
||||||
|
|
|
@ -27,10 +27,7 @@ def para(words, pre):
|
||||||
text = text.replace(entity, char)
|
text = text.replace(entity, char)
|
||||||
if not pre:
|
if not pre:
|
||||||
text = re.sub("[\r\n\t ]+", " ", text)
|
text = re.sub("[\r\n\t ]+", " ", text)
|
||||||
try: # On OS-X / Python 2.5 textwrap can throw a runtime error
|
text = textwrap.fill(text)
|
||||||
text = textwrap.fill(text)
|
|
||||||
except RuntimeError:
|
|
||||||
pass
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def render(node, encoding='latin-1', pre=False):
|
def render(node, encoding='latin-1', pre=False):
|
||||||
|
|
Loading…
Reference in a new issue