soup2html() tweak to handle html comments.
- Legacy-Id: 328
This commit is contained in:
parent
9589d3f1c9
commit
67eb998901
|
@ -51,6 +51,8 @@ def normalize(str):
|
||||||
# Normalize whitespace at the beginning and end of the string
|
# Normalize whitespace at the beginning and end of the string
|
||||||
str = re.sub("^[ \t\n]+", " ", str)
|
str = re.sub("^[ \t\n]+", " ", str)
|
||||||
str = re.sub("[ \t\n]+$", " ", str)
|
str = re.sub("[ \t\n]+$", " ", str)
|
||||||
|
# remove comments
|
||||||
|
str = re.sub("(?s)<!--.*?-->", "", str)
|
||||||
# remove xml PIs and metainformation
|
# remove xml PIs and metainformation
|
||||||
str = re.sub("<![^>]*>", "", str)
|
str = re.sub("<![^>]*>", "", str)
|
||||||
str = re.sub("<\?[^>]*\?>", "", str)
|
str = re.sub("<\?[^>]*\?>", "", str)
|
||||||
|
|
Loading…
Reference in a new issue