* ietf/tests.py, in reduce(): add ad-hoc fix for pathologic case of not
closing <li> tags. BeautifulSoup can handle it, but the recursive text rendering code in soup2text recurses too deeply with a sufficiently long list... * ietf/tests.py, in setUp(): grab the right tuple element when extracting the URLs from the url test tuples * ietf/tests.py, in read_testurls(): close opened file * ietf/tests.py, in doUrlsTest(): narrower try/except clause, and a new one * soup2text.py, in para(): undo previous change - Legacy-Id: 304
This commit is contained in:
parent
b42e0728c8
commit
0452fca7d2
|
@ -23,11 +23,10 @@ def run_tests(module_list, verbosity=1, extra_tests=[]):
|
|||
|
||||
def reduce(html):
|
||||
html = re.sub(" :", ":", html)
|
||||
if html.count("<li>") > 5*html.count("</li>"):
|
||||
html = html.replace("<li>", "</li><li>")
|
||||
text = html2text(html)
|
||||
text = re.sub('\."', '".', text)
|
||||
#text = re.sub("\n\n+", "\n\n", text)
|
||||
#text = "\n\n".join([textwrap.fill(para, 80) for para in text.split("\n\n")])
|
||||
#text = re.sub(" +", " ", text)
|
||||
text = [ line.strip() for line in text.split("\n") ]
|
||||
return text
|
||||
|
||||
|
@ -68,6 +67,7 @@ def read_testurls(filename):
|
|||
raise ValueError("Expected 'HTTP_CODE TESTURL [GOODURL]' in %s line, found '%s'." % (filename, line))
|
||||
codes = codes.split(",")
|
||||
tuples += [ (codes, testurl, goodurl) ]
|
||||
file.close()
|
||||
return tuples
|
||||
|
||||
class UrlTestCase(TestCase):
|
||||
|
@ -83,7 +83,7 @@ class UrlTestCase(TestCase):
|
|||
self.testtuples += read_testurls(root+"/testurl.list")
|
||||
if "testurls.list" in files:
|
||||
self.testtuples += read_testurls(root+"/testurls.list")
|
||||
self.testurls = [ tuple[0] for tuple in self.testtuples ]
|
||||
self.testurls = [ tuple[1] for tuple in self.testtuples ]
|
||||
# Use the default database for the url tests, instead of the test database
|
||||
self.testdb = settings.DATABASE_NAME
|
||||
connection.close()
|
||||
|
@ -136,8 +136,10 @@ class UrlTestCase(TestCase):
|
|||
#print "Fetching", master, "...",
|
||||
mfile = urllib.urlopen(master)
|
||||
goodhtml = mfile.read()
|
||||
except urllib.URLError, e:
|
||||
print "Failed retrieving master text for comparison: %s" % e
|
||||
try:
|
||||
mfile.close()
|
||||
print ""
|
||||
if goodhtml and response.content:
|
||||
testtext = reduce(response.content)
|
||||
goodtext = reduce(goodhtml)
|
||||
|
@ -158,8 +160,9 @@ class UrlTestCase(TestCase):
|
|||
else:
|
||||
print "Diff: %s" % (url)
|
||||
print diff
|
||||
except urllib.URLError, e:
|
||||
print "Failed retrieving master text for comparison: %s" % e
|
||||
except:
|
||||
print "Exception occurred for url %s" % (url)
|
||||
raise
|
||||
|
||||
if not res in response_count:
|
||||
response_count[res] = 0
|
||||
|
|
|
@ -27,10 +27,7 @@ def para(words, pre):
|
|||
text = text.replace(entity, char)
|
||||
if not pre:
|
||||
text = re.sub("[\r\n\t ]+", " ", text)
|
||||
try: # On OS-X / Python 2.5 textwrap can throw a runtime error
|
||||
text = textwrap.fill(text)
|
||||
except RuntimeError:
|
||||
pass
|
||||
return text
|
||||
|
||||
def render(node, encoding='latin-1', pre=False):
|
||||
|
|
Loading…
Reference in a new issue