diff --git a/ietf/meeting/forms.py b/ietf/meeting/forms.py
index 1427d2956..07482c2a1 100644
--- a/ietf/meeting/forms.py
+++ b/ietf/meeting/forms.py
@@ -3,6 +3,8 @@ import codecs
import datetime
from django import forms
+from django.conf import settings
+from django.core.exceptions import ValidationError
from django.db.models import Q
from django.forms import BaseInlineFormSet
@@ -17,6 +19,8 @@ from ietf.meeting.helpers import is_meeting_approved, get_next_agenda_name
from ietf.message.models import Message
from ietf.person.models import Person
from ietf.utils.fields import DatepickerDateField, DurationField
+from ietf.utils.validators import ( validate_file_size, validate_mime_type,
+ validate_file_extension, validate_no_html_frame)
# need to insert empty option for use in ChoiceField
# countries.insert(0, ('', '-'*9 ))
@@ -305,3 +309,38 @@ class InterimCancelForm(forms.Form):
super(InterimCancelForm, self).__init__(*args, **kwargs)
self.fields['group'].widget.attrs['disabled'] = True
self.fields['date'].widget.attrs['disabled'] = True
+
+class FileUploadForm(forms.Form):
+ file = forms.FileField(label='File to upload')
+
+ def __init__(self, *args, **kwargs):
+ doc_type = kwargs.pop('doc_type')
+ assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
+ self.doc_type = doc_type
+ self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type]
+ self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type]
+ super(FileUploadForm, self).__init__(*args, **kwargs)
+ label = '%s file to upload. ' % (self.doc_type.capitalize(), )
+ if self.mime_types:
+ label += 'Note that you can only upload files with these formats: %s.' % (', '.join(self.mime_types, ))
+ self.fields['file'].label=label
+
+ def clean_file(self):
+ file = self.cleaned_data['file']
+ validate_file_size(file)
+ ext = validate_file_extension(file, self.extensions)
+ mime_type = None
+ if self.mime_types:
+ mime_type, encoding = validate_mime_type(file, self.mime_types)
+ if mime_type != file.content_type:
+ raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))
+ if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
+ if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]:
+ raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext))
+ if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
+ # We'll do html sanitization later, but for frames, we fail here,
+ # as the sanitized version will most likely be useless.
+ validate_no_html_frame(file)
+ return file
+
+
diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py
index ffc051295..9b34f78e1 100644
--- a/ietf/meeting/tests_views.py
+++ b/ietf/meeting/tests_views.py
@@ -1680,12 +1680,23 @@ class MaterialsTests(TestCase):
q = PyQuery(r.content)
self.assertTrue(q('form .has-error'))
+ # Test html sanitization
+ test_file = StringIO('
Title
')
+ test_file.name = "some.html"
+ r = self.client.post(url,dict(file=test_file))
+ self.assertEqual(r.status_code, 302)
+ doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document
+ self.assertEqual(doc.rev,'00')
+ text = doc.text()
+ self.assertIn('Some text', text)
+ self.assertNotIn('', text)
+
test_file = StringIO(u'This is some text for a test, with the word\nvirtual at the beginning of a line.')
test_file.name = "not_really.txt"
r = self.client.post(url,dict(file=test_file,apply_to_all=False))
self.assertEqual(r.status_code, 302)
doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document
- self.assertEqual(doc.rev,'00')
+ self.assertEqual(doc.rev,'01')
self.assertFalse(session2.sessionpresentation_set.filter(document__type_id=doctype))
r = self.client.get(url)
@@ -1697,7 +1708,7 @@ class MaterialsTests(TestCase):
r = self.client.post(url,dict(file=test_file,apply_to_all=True))
self.assertEqual(r.status_code, 302)
doc = Document.objects.get(pk=doc.pk)
- self.assertEqual(doc.rev,'01')
+ self.assertEqual(doc.rev,'02')
self.assertTrue(session2.sessionpresentation_set.filter(document__type_id=doctype))
def test_upload_minutes_agenda_unscheduled(self):
diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py
index a946334c5..9558579c2 100644
--- a/ietf/meeting/views.py
+++ b/ietf/meeting/views.py
@@ -63,11 +63,10 @@ from ietf.utils.mail import send_mail_message
from ietf.utils.pipe import pipe
from ietf.utils.pdf import pdf_pages
from ietf.utils.text import xslugify
-from ietf.utils.validators import ( validate_file_size, validate_mime_type,
- validate_file_extension, validate_no_html_frame, get_mime_type)
+from ietf.utils.validators import get_mime_type
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
- InterimCancelForm, InterimSessionInlineFormSet)
+ InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm)
def get_menu_entries(request):
@@ -1117,14 +1116,13 @@ def add_session_drafts(request, session_id, num):
'form': form,
})
-class UploadBlueSheetForm(forms.Form):
- file = forms.FileField(label='Bluesheet scan to upload')
- def clean_file(self):
- file = self.cleaned_data['file']
- validate_mime_type(file, settings.MEETING_VALID_BLUESHEET_MIME_TYPES)
- validate_file_extension(file, settings.MEETING_VALID_BLUESHEET_EXTENSIONS)
- return file
+class UploadBlueSheetForm(FileUploadForm):
+
+ def __init__(self, *args, **kwargs):
+ kwargs['doc_type'] = 'bluesheets'
+ super(UploadBlueSheetForm, self).__init__(*args, **kwargs )
+
@role_required('Area Director', 'Secretariat', 'IRTF Chair', 'WG Chair', 'RG Chair')
def upload_session_bluesheets(request, session_id, num):
@@ -1193,25 +1191,15 @@ def upload_session_bluesheets(request, session_id, num):
})
-# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
-# It should look at the contents of the files instead.
-class UploadMinutesForm(forms.Form):
- file = forms.FileField(label='Minutes file to upload. Note that you can only upload minutes in txt, html, or pdf formats.')
+class UploadMinutesForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
- super(UploadMinutesForm, self).__init__(*args, **kwargs)
+ kwargs['doc_type'] = 'minutes'
+ super(UploadMinutesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
- def clean_file(self):
- file = self.cleaned_data['file']
- validate_file_size(file)
- ext = validate_file_extension(file, settings.MEETING_VALID_MINUTES_EXTENSIONS)
- mime_type, encoding = validate_mime_type(file, settings.MEETING_VALID_MINUTES_MIME_TYPES)
- if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
- validate_no_html_frame(file)
- return file
def upload_session_minutes(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
@@ -1301,26 +1289,15 @@ def upload_session_minutes(request, session_id, num):
})
-# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
-# It should look at the contents of the files instead.
-class UploadAgendaForm(forms.Form):
- file = forms.FileField(label='Agenda file to upload. Note that you can only upload agendas in txt or html formats.')
+class UploadAgendaForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
- super(UploadAgendaForm, self).__init__(*args, **kwargs)
+ kwargs['doc_type'] = 'agenda'
+ super(UploadAgendaForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
- def clean_file(self):
- file = self.cleaned_data['file']
- validate_file_size(file)
- ext = validate_file_extension(file, settings.MEETING_VALID_AGENDA_EXTENSIONS)
- mime_type, encoding = validate_mime_type(file, settings.MEETING_VALID_AGENDA_MIME_TYPES)
- if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
- validate_no_html_frame(file)
- return file
-
def upload_session_agenda(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id)
@@ -1399,7 +1376,7 @@ def upload_session_agenda(request, session_id, num):
e = NewRevisionDocEvent.objects.create(doc=doc,by=request.user.person,type='new_revision',desc='New revision available: %s'%doc.rev,rev=doc.rev)
doc.save_with_history([e])
# The way this function builds the filename it will never trigger the file delete in handle_file_upload.
- handle_upload_file(file, filename, session.meeting, 'agenda')
+ handle_upload_file(file, filename, session.meeting, 'agenda', request)
return redirect('ietf.meeting.views.session_details',num=num,acronym=session.group.acronym)
else:
form = UploadAgendaForm(show_apply_to_all_checkbox, initial={'apply_to_all':session.type_id=='session'})
@@ -1412,23 +1389,16 @@ def upload_session_agenda(request, session_id, num):
})
-# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
-# It should look at the contents of the files instead.
-class UploadSlidesForm(forms.Form):
+class UploadSlidesForm(FileUploadForm):
title = forms.CharField(max_length=255)
- file = forms.FileField(label='Slides file to upload.')
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
- super(UploadSlidesForm, self).__init__(*args, **kwargs)
+ kwargs['doc_type'] = 'slides'
+ super(UploadSlidesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
- def clean_file(self):
- file = self.cleaned_data['file']
- validate_file_size(file)
- validate_file_extension(file, settings.MEETING_VALID_SLIDES_EXTENSIONS)
- return file
def upload_session_slides(request, session_id, num, name):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
diff --git a/ietf/secr/proceedings/utils.py b/ietf/secr/proceedings/utils.py
index 799951c7d..a83fa7008 100644
--- a/ietf/secr/proceedings/utils.py
+++ b/ietf/secr/proceedings/utils.py
@@ -1,9 +1,15 @@
+
import glob
import os
+from django.conf import settings
+from django.contrib import messages
+
import debug # pyflakes:ignore
-def handle_upload_file(file,filename,meeting,subdir):
+from ietf.utils.html import sanitize
+
+def handle_upload_file(file,filename,meeting,subdir, request=None):
'''
This function takes a file object, a filename and a meeting object and subdir as string.
It saves the file to the appropriate directory, get_materials_path() + subdir.
@@ -28,8 +34,19 @@ def handle_upload_file(file,filename,meeting,subdir):
os.remove(f)
destination = open(os.path.join(path,filename), 'wb+')
- for chunk in file.chunks():
- destination.write(chunk)
+ if extension in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
+ file.open()
+ text = file.read()
+ # Whole file sanitization; add back '' (sanitize will remove it)
+ clean = u"\n%s\n\n" % sanitize(text)
+ destination.write(clean.encode('utf8'))
+ if request and clean != text:
+ messages.warning(request, "Uploaded html content is sanitized to prevent unsafe content. "
+ "Your upload %s was changed by the sanitization; please check the "
+ "resulting content. " % (filename, ))
+ else:
+ for chunk in file.chunks():
+ destination.write(chunk)
destination.close()
# unzip zipfile
diff --git a/ietf/settings.py b/ietf/settings.py
index 6e8788bc9..8b372a62a 100644
--- a/ietf/settings.py
+++ b/ietf/settings.py
@@ -729,16 +729,26 @@ MEETING_MATERIALS_DEFAULT_SUBMISSION_START_DAYS = 90
MEETING_MATERIALS_DEFAULT_SUBMISSION_CUTOFF_DAYS = 26
MEETING_MATERIALS_DEFAULT_SUBMISSION_CORRECTION_DAYS = 50
-MEETING_VALID_AGENDA_EXTENSIONS = ['.txt','.html','.htm', '.md', ]
-MEETING_VALID_AGENDA_MIME_TYPES = ['text/plain', 'text/html', ]
-#
-MEETING_VALID_MINUTES_EXTENSIONS = ['.txt','.html','.htm', '.md', '.pdf', ]
-MEETING_VALID_MINUTES_MIME_TYPES = ['text/plain', 'text/html', 'application/pdf', ]
-#
-MEETING_VALID_SLIDES_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
-#
-MEETING_VALID_BLUESHEET_EXTENSIONS = ['.pdf', '.txt', ]
-MEETING_VALID_BLUESHEET_MIME_TYPES = ['application/pdf', 'text/plain', ]
+MEETING_VALID_UPLOAD_EXTENSIONS = {
+ 'agenda': ['.txt','.html','.htm', '.md', ],
+ 'minutes': ['.txt','.html','.htm', '.md', '.pdf', ],
+ 'slides': ['.doc','.docx','.pdf','.ppt','.pptx','.txt', ], # Note the removal of .zip
+ 'bluesheets': ['.pdf', '.txt', ],
+}
+
+MEETING_VALID_UPLOAD_MIME_TYPES = {
+ 'agenda': ['text/plain', 'text/html', ],
+ 'minutes': ['text/plain', 'text/html', 'application/pdf', ],
+ 'slides': None,
+ 'bluesheets': ['application/pdf', 'text/plain', ],
+}
+
+MEETING_VALID_MIME_TYPE_EXTENSIONS = {
+ 'text/plain': ['.txt', '.md', ],
+ 'text/html': ['.html', '.htm'],
+ 'application/pdf': ['.pdf'],
+}
+
INTERNET_DRAFT_DAYS_TO_EXPIRE = 185
diff --git a/ietf/utils/html.py b/ietf/utils/html.py
index 1afc77626..398bc3203 100644
--- a/ietf/utils/html.py
+++ b/ietf/utils/html.py
@@ -5,6 +5,8 @@ import html5lib
import bleach
from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers
+import debug # pyflakes:ignore
+
from django.utils.functional import keep_lazy
from django.utils import six
@@ -69,6 +71,10 @@ def remove_tags(html, tags):
return bleach.clean(html, tags=allowed)
remove_tags = keep_lazy(remove_tags, six.text_type)
+def sanitize(html, tags=acceptable_elements, extra=[], remove=[], strip=True):
+ tags = list(set(tags) | set(extra) ^ set(remove))
+ return bleach.clean(html, tags=tags, strip=strip)
+
def clean_html(html):
return bleach.clean(html)