soup2html() tweak to better avoid indentation at paragraph start.
- Legacy-Id: 329
This commit is contained in:
parent
67eb998901
commit
541b041cdc
|
@ -43,7 +43,7 @@ def para(words, pre):
|
||||||
text = "".join(words)
|
text = "".join(words)
|
||||||
text = unescape(text)
|
text = unescape(text)
|
||||||
if not pre:
|
if not pre:
|
||||||
text = re.sub("[\r\n\t ]+", " ", text)
|
text = re.sub("[\r\n\t ]+", " ", text.strip())
|
||||||
text = textwrap.fill(text)
|
text = textwrap.fill(text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ class TextSoup(BeautifulSoup):
|
||||||
node = render(self, encoding)
|
node = render(self, encoding)
|
||||||
str = node.text
|
str = node.text
|
||||||
str = re.sub("[ \t]+", " ", str)
|
str = re.sub("[ \t]+", " ", str)
|
||||||
str = re.sub("\n\n+ *", "\n\n", str)
|
str = re.sub("\n\n+", "\n\n", str)
|
||||||
return str
|
return str
|
||||||
|
|
||||||
def soup2text(html):
|
def soup2text(html):
|
||||||
|
|
Loading…
Reference in a new issue