From bfcb0e6c789a0af78f092be97b48769daaecc3ea Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz <henrik@levkowetz.com> Date: Mon, 11 Jun 2007 23:52:51 +0000 Subject: [PATCH] Two soup2text tweaks. - Legacy-Id: 324 --- ietf/utils/soup2text.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py index 9e9a2c1d1..6dde124eb 100755 --- a/ietf/utils/soup2text.py +++ b/ietf/utils/soup2text.py @@ -97,7 +97,7 @@ class TextSoup(BeautifulSoup): node = render(self, encoding) str = node.text str = re.sub("[ \t]+", " ", str) - str = re.sub("\n\n+", "\n\n", str) + str = re.sub("\n\n+ *", "\n\n", str) return str def soup2text(html): @@ -105,6 +105,7 @@ def soup2text(html): html = html.replace("\r\n", "\n").replace("\r", "\n") # some preprocessing to handle common pathological cases html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html) + html = re.sub("<br */?>([^\n])", r"<br />\n\1", html) soup = TextSoup(html) return str(soup)