diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py index 9e9a2c1d1..6dde124eb 100755 --- a/ietf/utils/soup2text.py +++ b/ietf/utils/soup2text.py @@ -97,7 +97,7 @@ class TextSoup(BeautifulSoup): node = render(self, encoding) str = node.text str = re.sub("[ \t]+", " ", str) - str = re.sub("\n\n+", "\n\n", str) + str = re.sub("\n\n+ *", "\n\n", str) return str def soup2text(html): @@ -105,6 +105,7 @@ def soup2text(html): html = html.replace("\r\n", "\n").replace("\r", "\n") # some preprocessing to handle common pathological cases html = re.sub("
[ \t\n]*(
)+", "

", html) + html = re.sub("
([^\n])", r"
\n\1", html) soup = TextSoup(html) return str(soup)