Changed html cleaning to differentiate between fragment cleaning and document cleaning. Added an lxml-based cleaner for document cleaning, also permitting <style> tags (but not external style sheets).

- Legacy-Id: 14775
This commit is contained in:
Henrik Levkowetz 2018-03-13 12:48:22 +00:00
parent f3f77dce48
commit 6e9bb00f13
7 changed files with 12 additions and 19 deletions

View file

@ -19,6 +19,7 @@ import debug # pyflakes:ignore
from ietf.doc.models import ConsensusDocEvent
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
from ietf.utils.html import sanitize_fragment
register = template.Library()
@ -130,15 +131,14 @@ def format_textarea(value):
Also calls keep_spacing."""
return keep_spacing(linebreaksbr(escape(value).replace('&lt;b&gt;','<b>').replace('&lt;/b&gt;','</b>').replace('&lt;br&gt;','<br>')))
@register.filter(name='sanitize_html')
def sanitize_html(value):
@register.filter(name='sanitize')
def sanitize(value):
"""Sanitizes an HTML fragment.
This means both fixing broken html and restricting elements and
attributes to those deemed acceptable. See ietf/utils/html.py
for the details.
"""
from ietf.utils.html import sanitize_html
return sanitize_html(value)
return mark_safe(sanitize_fragment(value))
# For use with ballot view
@ -374,7 +374,7 @@ def format_snippet(text, trunc_words=25):
# expressions, for instance [REF](http://example.com/foo)
# Use bleach.linkify instead
text = bleach.linkify(text)
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_html(text)))))
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
snippet = truncatewords_html(full, trunc_words)
if snippet != full:
return mark_safe(u'<div class="snippet">%s<button class="btn btn-xs btn-default show-all"><span class="fa fa-caret-down"></span></button></div><div class="hidden full">%s</div>' % (snippet, full))

View file

@ -8,7 +8,7 @@ from django.utils.encoding import smart_text
import debug # pyflakes:ignore
from ietf.utils.html import sanitize_html
from ietf.utils.html import sanitize_document
def handle_upload_file(file,filename,meeting,subdir, request=None):
'''
@ -40,14 +40,7 @@ def handle_upload_file(file,filename,meeting,subdir, request=None):
text = smart_text(file.read())
# Whole file sanitization; add back what's missing from a complete
# document (sanitize will remove these).
clean = u"""<!DOCTYPE html>
<html lang="en">
<head><title>%s</title></head>
<body>
%s
</body>
</html>
""" % (filename, sanitize_html(text))
clean = sanitize_document(text)
destination.write(clean.encode('utf8'))
if request and clean != text:
messages.warning(request, "Uploaded html content is sanitized to prevent unsafe content. "

View file

@ -204,7 +204,7 @@
{% endif %}
{% if doc.rev != "" %}
{{ content|safe|keep_spacing|sanitize_html|wordwrap:80|safe }}
{{ content|safe|keep_spacing|sanitize|wordwrap:80|safe }}
{% endif %}
{% if not snapshot and chartering %}

View file

@ -142,6 +142,6 @@
{% if doc.rev %}
<p></p>
{{ content|fill:"80"|safe|linebreaksbr|keep_spacing|sanitize_html|safe }}
{{ content|fill:"80"|safe|linebreaksbr|keep_spacing|sanitize }}
{% endif %}
{% endblock %}

View file

@ -124,7 +124,7 @@
<h2>{% if doc.meeting_related %}Meeting{% endif %} {{ doc.type.name }}<br><small>{{ doc.name }}</small></h2>
{% if doc.rev and content != None %}
{{ content|maybewordwrap|safe|linebreaksbr|keep_spacing|sanitize_html|safe }}
{{ content|maybewordwrap|safe|linebreaksbr|keep_spacing|sanitize }}
{% else %}
<p>Not available as plain text.</p>

View file

@ -55,6 +55,6 @@
<h2>{{ doc.type.name }}<br><small>{{ doc.name }}</small></h2>
{% if doc.rev and content != None %}
<pre class="pasted">{{ content|linkify|safe|sanitize_html|safe }}</pre>
<pre class="pasted">{{ content|linkify|safe|sanitize }}</pre>
{% endif %}
{% endblock %}

View file

@ -155,7 +155,7 @@
</p>
{% if doc.rev %}
{{ content|fill:"80"|safe|linebreaksbr|keep_spacing|sanitize_html|safe }}
{{ content|fill:"80"|safe|linebreaksbr|keep_spacing|sanitize }}
{% endif %}
{% endblock %}