diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py index bae2bd321..c86484077 100755 --- a/ietf/utils/soup2text.py +++ b/ietf/utils/soup2text.py @@ -66,6 +66,8 @@ class TextSoup(BeautifulSoup): return str def soup2text(html): + # some preprocessing to handle common pathological cases + html = re.sub("
[ \t\r\n]*(
)+", "

", html) soup = TextSoup(html) return str(soup)