soup2html() tweak to handle table cells.

- Legacy-Id: 326
This commit is contained in:
Henrik Levkowetz 2007-06-12 00:25:45 +00:00
parent 61cdc1e5d1
commit b15c02c830

View file

@ -106,6 +106,7 @@ def soup2text(html):
# some preprocessing to handle common pathological cases # some preprocessing to handle common pathological cases
html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html) html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html)
html = re.sub("<br */?>([^\n])", r"<br />\n\1", html) html = re.sub("<br */?>([^\n])", r"<br />\n\1", html)
html = re.sub("(<t[hd][^>]*>)([^ \t\n])", r"\1 \2", html)
soup = TextSoup(html) soup = TextSoup(html)
return str(soup) return str(soup)