Added sanitization of uploaded html content for session agendas and minutes, and did some refactoring of the upload form classes.
- Legacy-Id: 14738
This commit is contained in:
parent
27914a0a90
commit
b92ad2f992
|
@ -3,6 +3,8 @@ import codecs
|
|||
import datetime
|
||||
|
||||
from django import forms
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.db.models import Q
|
||||
from django.forms import BaseInlineFormSet
|
||||
|
||||
|
@ -17,6 +19,8 @@ from ietf.meeting.helpers import is_meeting_approved, get_next_agenda_name
|
|||
from ietf.message.models import Message
|
||||
from ietf.person.models import Person
|
||||
from ietf.utils.fields import DatepickerDateField, DurationField
|
||||
from ietf.utils.validators import ( validate_file_size, validate_mime_type,
|
||||
validate_file_extension, validate_no_html_frame)
|
||||
|
||||
# need to insert empty option for use in ChoiceField
|
||||
# countries.insert(0, ('', '-'*9 ))
|
||||
|
@ -305,3 +309,38 @@ class InterimCancelForm(forms.Form):
|
|||
super(InterimCancelForm, self).__init__(*args, **kwargs)
|
||||
self.fields['group'].widget.attrs['disabled'] = True
|
||||
self.fields['date'].widget.attrs['disabled'] = True
|
||||
|
||||
class FileUploadForm(forms.Form):
|
||||
file = forms.FileField(label='File to upload')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
doc_type = kwargs.pop('doc_type')
|
||||
assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
|
||||
self.doc_type = doc_type
|
||||
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type]
|
||||
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type]
|
||||
super(FileUploadForm, self).__init__(*args, **kwargs)
|
||||
label = '%s file to upload. ' % (self.doc_type.capitalize(), )
|
||||
if self.mime_types:
|
||||
label += 'Note that you can only upload files with these formats: %s.' % (', '.join(self.mime_types, ))
|
||||
self.fields['file'].label=label
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_file_size(file)
|
||||
ext = validate_file_extension(file, self.extensions)
|
||||
mime_type = None
|
||||
if self.mime_types:
|
||||
mime_type, encoding = validate_mime_type(file, self.mime_types)
|
||||
if mime_type != file.content_type:
|
||||
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))
|
||||
if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
|
||||
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]:
|
||||
raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext))
|
||||
if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
|
||||
# We'll do html sanitization later, but for frames, we fail here,
|
||||
# as the sanitized version will most likely be useless.
|
||||
validate_no_html_frame(file)
|
||||
return file
|
||||
|
||||
|
||||
|
|
|
@ -1680,12 +1680,23 @@ class MaterialsTests(TestCase):
|
|||
q = PyQuery(r.content)
|
||||
self.assertTrue(q('form .has-error'))
|
||||
|
||||
# Test html sanitization
|
||||
test_file = StringIO('<html><h1>Title</h1><section>Some text</section></html>')
|
||||
test_file.name = "some.html"
|
||||
r = self.client.post(url,dict(file=test_file))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document
|
||||
self.assertEqual(doc.rev,'00')
|
||||
text = doc.text()
|
||||
self.assertIn('Some text', text)
|
||||
self.assertNotIn('<section>', text)
|
||||
|
||||
test_file = StringIO(u'This is some text for a test, with the word\nvirtual at the beginning of a line.')
|
||||
test_file.name = "not_really.txt"
|
||||
r = self.client.post(url,dict(file=test_file,apply_to_all=False))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document
|
||||
self.assertEqual(doc.rev,'00')
|
||||
self.assertEqual(doc.rev,'01')
|
||||
self.assertFalse(session2.sessionpresentation_set.filter(document__type_id=doctype))
|
||||
|
||||
r = self.client.get(url)
|
||||
|
@ -1697,7 +1708,7 @@ class MaterialsTests(TestCase):
|
|||
r = self.client.post(url,dict(file=test_file,apply_to_all=True))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
doc = Document.objects.get(pk=doc.pk)
|
||||
self.assertEqual(doc.rev,'01')
|
||||
self.assertEqual(doc.rev,'02')
|
||||
self.assertTrue(session2.sessionpresentation_set.filter(document__type_id=doctype))
|
||||
|
||||
def test_upload_minutes_agenda_unscheduled(self):
|
||||
|
|
|
@ -63,11 +63,10 @@ from ietf.utils.mail import send_mail_message
|
|||
from ietf.utils.pipe import pipe
|
||||
from ietf.utils.pdf import pdf_pages
|
||||
from ietf.utils.text import xslugify
|
||||
from ietf.utils.validators import ( validate_file_size, validate_mime_type,
|
||||
validate_file_extension, validate_no_html_frame, get_mime_type)
|
||||
from ietf.utils.validators import get_mime_type
|
||||
|
||||
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
|
||||
InterimCancelForm, InterimSessionInlineFormSet)
|
||||
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm)
|
||||
|
||||
|
||||
def get_menu_entries(request):
|
||||
|
@ -1117,14 +1116,13 @@ def add_session_drafts(request, session_id, num):
|
|||
'form': form,
|
||||
})
|
||||
|
||||
class UploadBlueSheetForm(forms.Form):
|
||||
file = forms.FileField(label='Bluesheet scan to upload')
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_mime_type(file, settings.MEETING_VALID_BLUESHEET_MIME_TYPES)
|
||||
validate_file_extension(file, settings.MEETING_VALID_BLUESHEET_EXTENSIONS)
|
||||
return file
|
||||
class UploadBlueSheetForm(FileUploadForm):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['doc_type'] = 'bluesheets'
|
||||
super(UploadBlueSheetForm, self).__init__(*args, **kwargs )
|
||||
|
||||
|
||||
@role_required('Area Director', 'Secretariat', 'IRTF Chair', 'WG Chair', 'RG Chair')
|
||||
def upload_session_bluesheets(request, session_id, num):
|
||||
|
@ -1193,25 +1191,15 @@ def upload_session_bluesheets(request, session_id, num):
|
|||
})
|
||||
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadMinutesForm(forms.Form):
|
||||
file = forms.FileField(label='Minutes file to upload. Note that you can only upload minutes in txt, html, or pdf formats.')
|
||||
class UploadMinutesForm(FileUploadForm):
|
||||
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
|
||||
|
||||
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
|
||||
super(UploadMinutesForm, self).__init__(*args, **kwargs)
|
||||
kwargs['doc_type'] = 'minutes'
|
||||
super(UploadMinutesForm, self).__init__(*args, **kwargs )
|
||||
if not show_apply_to_all_checkbox:
|
||||
self.fields.pop('apply_to_all')
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_file_size(file)
|
||||
ext = validate_file_extension(file, settings.MEETING_VALID_MINUTES_EXTENSIONS)
|
||||
mime_type, encoding = validate_mime_type(file, settings.MEETING_VALID_MINUTES_MIME_TYPES)
|
||||
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
|
||||
validate_no_html_frame(file)
|
||||
return file
|
||||
|
||||
def upload_session_minutes(request, session_id, num):
|
||||
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
|
||||
|
@ -1301,26 +1289,15 @@ def upload_session_minutes(request, session_id, num):
|
|||
})
|
||||
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadAgendaForm(forms.Form):
|
||||
file = forms.FileField(label='Agenda file to upload. Note that you can only upload agendas in txt or html formats.')
|
||||
class UploadAgendaForm(FileUploadForm):
|
||||
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
|
||||
|
||||
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
|
||||
super(UploadAgendaForm, self).__init__(*args, **kwargs)
|
||||
kwargs['doc_type'] = 'agenda'
|
||||
super(UploadAgendaForm, self).__init__(*args, **kwargs )
|
||||
if not show_apply_to_all_checkbox:
|
||||
self.fields.pop('apply_to_all')
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_file_size(file)
|
||||
ext = validate_file_extension(file, settings.MEETING_VALID_AGENDA_EXTENSIONS)
|
||||
mime_type, encoding = validate_mime_type(file, settings.MEETING_VALID_AGENDA_MIME_TYPES)
|
||||
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
|
||||
validate_no_html_frame(file)
|
||||
return file
|
||||
|
||||
def upload_session_agenda(request, session_id, num):
|
||||
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
|
||||
session = get_object_or_404(Session,pk=session_id)
|
||||
|
@ -1399,7 +1376,7 @@ def upload_session_agenda(request, session_id, num):
|
|||
e = NewRevisionDocEvent.objects.create(doc=doc,by=request.user.person,type='new_revision',desc='New revision available: %s'%doc.rev,rev=doc.rev)
|
||||
doc.save_with_history([e])
|
||||
# The way this function builds the filename it will never trigger the file delete in handle_file_upload.
|
||||
handle_upload_file(file, filename, session.meeting, 'agenda')
|
||||
handle_upload_file(file, filename, session.meeting, 'agenda', request)
|
||||
return redirect('ietf.meeting.views.session_details',num=num,acronym=session.group.acronym)
|
||||
else:
|
||||
form = UploadAgendaForm(show_apply_to_all_checkbox, initial={'apply_to_all':session.type_id=='session'})
|
||||
|
@ -1412,23 +1389,16 @@ def upload_session_agenda(request, session_id, num):
|
|||
})
|
||||
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadSlidesForm(forms.Form):
|
||||
class UploadSlidesForm(FileUploadForm):
|
||||
title = forms.CharField(max_length=255)
|
||||
file = forms.FileField(label='Slides file to upload.')
|
||||
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False)
|
||||
|
||||
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
|
||||
super(UploadSlidesForm, self).__init__(*args, **kwargs)
|
||||
kwargs['doc_type'] = 'slides'
|
||||
super(UploadSlidesForm, self).__init__(*args, **kwargs )
|
||||
if not show_apply_to_all_checkbox:
|
||||
self.fields.pop('apply_to_all')
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_file_size(file)
|
||||
validate_file_extension(file, settings.MEETING_VALID_SLIDES_EXTENSIONS)
|
||||
return file
|
||||
|
||||
def upload_session_slides(request, session_id, num, name):
|
||||
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import messages
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
def handle_upload_file(file,filename,meeting,subdir):
|
||||
from ietf.utils.html import sanitize
|
||||
|
||||
def handle_upload_file(file,filename,meeting,subdir, request=None):
|
||||
'''
|
||||
This function takes a file object, a filename and a meeting object and subdir as string.
|
||||
It saves the file to the appropriate directory, get_materials_path() + subdir.
|
||||
|
@ -28,8 +34,19 @@ def handle_upload_file(file,filename,meeting,subdir):
|
|||
os.remove(f)
|
||||
|
||||
destination = open(os.path.join(path,filename), 'wb+')
|
||||
for chunk in file.chunks():
|
||||
destination.write(chunk)
|
||||
if extension in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
|
||||
file.open()
|
||||
text = file.read()
|
||||
# Whole file sanitization; add back '<html>' (sanitize will remove it)
|
||||
clean = u"<html>\n%s\n</html>\n" % sanitize(text)
|
||||
destination.write(clean.encode('utf8'))
|
||||
if request and clean != text:
|
||||
messages.warning(request, "Uploaded html content is sanitized to prevent unsafe content. "
|
||||
"Your upload %s was changed by the sanitization; please check the "
|
||||
"resulting content. " % (filename, ))
|
||||
else:
|
||||
for chunk in file.chunks():
|
||||
destination.write(chunk)
|
||||
destination.close()
|
||||
|
||||
# unzip zipfile
|
||||
|
|
|
@ -729,16 +729,26 @@ MEETING_MATERIALS_DEFAULT_SUBMISSION_START_DAYS = 90
|
|||
MEETING_MATERIALS_DEFAULT_SUBMISSION_CUTOFF_DAYS = 26
|
||||
MEETING_MATERIALS_DEFAULT_SUBMISSION_CORRECTION_DAYS = 50
|
||||
|
||||
MEETING_VALID_AGENDA_EXTENSIONS = ['.txt','.html','.htm', '.md', ]
|
||||
MEETING_VALID_AGENDA_MIME_TYPES = ['text/plain', 'text/html', ]
|
||||
#
|
||||
MEETING_VALID_MINUTES_EXTENSIONS = ['.txt','.html','.htm', '.md', '.pdf', ]
|
||||
MEETING_VALID_MINUTES_MIME_TYPES = ['text/plain', 'text/html', 'application/pdf', ]
|
||||
#
|
||||
MEETING_VALID_SLIDES_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
|
||||
#
|
||||
MEETING_VALID_BLUESHEET_EXTENSIONS = ['.pdf', '.txt', ]
|
||||
MEETING_VALID_BLUESHEET_MIME_TYPES = ['application/pdf', 'text/plain', ]
|
||||
MEETING_VALID_UPLOAD_EXTENSIONS = {
|
||||
'agenda': ['.txt','.html','.htm', '.md', ],
|
||||
'minutes': ['.txt','.html','.htm', '.md', '.pdf', ],
|
||||
'slides': ['.doc','.docx','.pdf','.ppt','.pptx','.txt', ], # Note the removal of .zip
|
||||
'bluesheets': ['.pdf', '.txt', ],
|
||||
}
|
||||
|
||||
MEETING_VALID_UPLOAD_MIME_TYPES = {
|
||||
'agenda': ['text/plain', 'text/html', ],
|
||||
'minutes': ['text/plain', 'text/html', 'application/pdf', ],
|
||||
'slides': None,
|
||||
'bluesheets': ['application/pdf', 'text/plain', ],
|
||||
}
|
||||
|
||||
MEETING_VALID_MIME_TYPE_EXTENSIONS = {
|
||||
'text/plain': ['.txt', '.md', ],
|
||||
'text/html': ['.html', '.htm'],
|
||||
'application/pdf': ['.pdf'],
|
||||
}
|
||||
|
||||
|
||||
INTERNET_DRAFT_DAYS_TO_EXPIRE = 185
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ import html5lib
|
|||
import bleach
|
||||
from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from django.utils.functional import keep_lazy
|
||||
from django.utils import six
|
||||
|
||||
|
@ -69,6 +71,10 @@ def remove_tags(html, tags):
|
|||
return bleach.clean(html, tags=allowed)
|
||||
remove_tags = keep_lazy(remove_tags, six.text_type)
|
||||
|
||||
def sanitize(html, tags=acceptable_elements, extra=[], remove=[], strip=True):
|
||||
tags = list(set(tags) | set(extra) ^ set(remove))
|
||||
return bleach.clean(html, tags=tags, strip=strip)
|
||||
|
||||
def clean_html(html):
|
||||
return bleach.clean(html)
|
||||
|
||||
|
|
Loading…
Reference in a new issue