Added cleaning of the session request form's 'comments' field, to convert any html entered to text. Related to [17322].

- Legacy-Id: 17324
Note: SVN reference [17322] has been migrated to Git commit eb88abc394
This commit is contained in:
Henrik Levkowetz 2020-02-21 21:36:18 +00:00
parent eb88abc394
commit fa9427769a
7 changed files with 52 additions and 21 deletions

View file

@ -11,7 +11,7 @@ import debug # pyflakes:ignore
from ietf.group.models import Group
from ietf.meeting.models import ResourceAssociation
from ietf.person.fields import SearchablePersonsField
from ietf.utils.html import clean_text_field
# -------------------------------------------------
# Globals
@ -145,6 +145,9 @@ class SessionForm(forms.Form):
check_conflict(conflict, self.group)
return conflict
def clean_comments(self):
return clean_text_field(self.cleaned_data['comments'])
def clean(self):
super(SessionForm, self).clean()
data = self.cleaned_data

View file

@ -289,9 +289,9 @@ def confirm(request, acronym):
new_session = Session.objects.create(
meeting=meeting,
group=group,
attendees=form.data['attendees'],
attendees=form.cleaned_data['attendees'],
requested_duration=datetime.timedelta(0,int(duration)),
comments=form.data['comments'],
comments=form.cleaned_data['comments'],
type_id='regular',
)
SchedulingEvent.objects.create(

View file

@ -1,4 +1,4 @@
# Copyright The IETF Trust 2010-2019, All Rights Reserved
# Copyright The IETF Trust 2010-2020, All Rights Reserved
# -*- coding: utf-8 -*-
# Taken from http://code.google.com/p/soclone/source/browse/trunk/soclone/utils/html.py
"""Utilities for working with HTML."""
@ -8,6 +8,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import bleach
import copy
import html2text
import lxml.etree
import lxml.html
import lxml.html.clean
@ -15,8 +16,11 @@ import six
import debug # pyflakes:ignore
from django import forms
from django.utils.functional import keep_lazy
from ietf.utils.mime import get_mime_type
acceptable_tags = ('a', 'abbr', 'acronym', 'address', 'b', 'big',
'blockquote', 'body', 'br', 'caption', 'center', 'cite', 'code', 'col',
'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'font',
@ -76,3 +80,18 @@ lxml_cleaner = Cleaner(allow_tags=acceptable_tags, remove_unknown_tags=None, sty
def sanitize_document(html):
return lxml_cleaner.clean_html(html)
# ----------------------------------------------------------------------
# Text field cleaning
def clean_text_field(text):
mime_type, encoding = get_mime_type(text.encode('utf8'))
if mime_type == 'text/html': # or re.search(r'<\w+>', text):
text = html2text.html2text(text)
elif mime_type in ['text/plain', 'application/x-empty', ]:
pass
else:
raise forms.ValidationError("Unexpected text field mime type: %s" % mime_type)
return text

21
ietf/utils/mime.py Normal file
View file

@ -0,0 +1,21 @@
# Copyright The IETF Trust 2020, All Rights Reserved
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
import magic
def get_mime_type(content):
# try to fixup encoding
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
return filetype.split('; ', 1)

View file

@ -1,12 +1,10 @@
# Copyright The IETF Trust 2016-2019, All Rights Reserved
# Copyright The IETF Trust 2016-2020, All Rights Reserved
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
import os
import re
import magic
from pyquery import PyQuery
from django.conf import settings
@ -17,6 +15,8 @@ from django.utils.deconstruct import deconstructible
import debug # pyflakes:ignore
from ietf.utils.mime import get_mime_type
# Note that this is an instantiation of the regex validator, _not_ the
# regex-string validator defined right below
validate_no_control_chars = RegexValidator(
@ -55,20 +55,6 @@ class RegexStringValidator(object):
validate_regular_expression_string = RegexStringValidator()
def get_mime_type(content):
# try to fixup encoding
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
return filetype.split('; ', 1)
def validate_file_size(file):
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
raise ValidationError('Please keep filesize under %s. Requested upload size was %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE), filesizeformat(file._size)))

View file

@ -28,6 +28,7 @@ factory-boy>=2.9.0
google-api-python-client
Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode.
hashids>=1.1.0
html2text>=2019.8.11
html5lib>=1.0.1
httplib2>=0.10.3
# jsonfield 3.x and higher requires Django 2.2 or higher

View file

@ -29,6 +29,7 @@ factory-boy>=2.9.0
google-api-python-client
Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode.
hashids>=1.1.0
html2text>=2019.8.11
html5lib>=1.0.1
httplib2>=0.10.3
# jsonfield 3.x and higher requires Django 2.2 or higher