fix: in htmlization ignore html files that do not parse as html (#4850)
This commit is contained in:
parent
efb9f135c3
commit
44c38abbbb
|
@ -571,7 +571,10 @@ class DocumentInfo(models.Model):
|
|||
return None
|
||||
|
||||
# get body
|
||||
body = etree.HTML(html).xpath("//body")[0]
|
||||
etree_html = etree.HTML(html)
|
||||
if etree_html is None:
|
||||
return None
|
||||
body = etree_html.xpath("//body")[0]
|
||||
body.tag = "div"
|
||||
if classes:
|
||||
body.attrib["class"] = classes
|
||||
|
|
Loading…
Reference in a new issue