diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index 49edb784a..a8fc648d1 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import json import os import shutil @@ -1520,7 +1522,7 @@ class MaterialsTests(TestCase): q = PyQuery(r.content) self.assertTrue('Upload' in unicode(q("title"))) self.assertFalse(session.sessionpresentation_set.exists()) - test_file = StringIO('this is some text for a test') + test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test') test_file.name = "not_really.pdf" r = self.client.post(url,dict(file=test_file)) self.assertEqual(r.status_code, 302) @@ -1530,7 +1532,7 @@ class MaterialsTests(TestCase): self.assertEqual(r.status_code, 200) q = PyQuery(r.content) self.assertTrue('Revise' in unicode(q("title"))) - test_file = StringIO('this is some different text for a test') + test_file = StringIO('%PDF-1.4\n%âãÏÓ\nthis is some different text for a test') test_file.name = "also_not_really.pdf" r = self.client.post(url,dict(file=test_file)) self.assertEqual(r.status_code, 302) @@ -1555,7 +1557,7 @@ class MaterialsTests(TestCase): q = PyQuery(r.content) self.assertTrue('Upload' in unicode(q("title"))) self.assertFalse(session.sessionpresentation_set.exists()) - test_file = StringIO('this is some text for a test') + test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test') test_file.name = "not_really.pdf" r = self.client.post(url,dict(file=test_file)) self.assertEqual(r.status_code, 302) @@ -1610,6 +1612,13 @@ class MaterialsTests(TestCase): q = PyQuery(r.content) self.assertTrue(q('form .has-error')) + test_file = StringIO('') + test_file.name = "not_really.html" + r = self.client.post(url,dict(file=test_file)) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertTrue(q('form .has-error')) + test_file = StringIO('this is some text for a test') test_file.name = "not_really.txt" r = self.client.post(url,dict(file=test_file,apply_to_all=False)) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 3072ea486..75c45866f 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -19,11 +19,11 @@ import debug # pyflakes:ignore from django import forms from django.shortcuts import render, redirect, get_object_or_404 from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden, Http404 +from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required from django.urls import reverse,reverse_lazy from django.db.models import Min, Max, Q -from django.conf import settings from django.forms.models import modelform_factory, inlineformset_factory from django.forms import ModelForm from django.template import TemplateDoesNotExist @@ -33,7 +33,7 @@ from django.views.decorators.cache import cache_page from django.utils.text import slugify from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt from django.views.generic import RedirectView -from django.template.defaultfilters import filesizeformat + from ietf.doc.fields import SearchableDocumentsField from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent @@ -64,6 +64,8 @@ from ietf.utils.mail import send_mail_message from ietf.utils.pipe import pipe from ietf.utils.pdf import pdf_pages from ietf.utils.text import xslugify +from ietf.utils.textupload import ( validate_file_size, validate_mime_type, + validate_file_extension, validate_no_html_frame, ) from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm, InterimCancelForm, InterimSessionInlineFormSet) @@ -1132,6 +1134,12 @@ def add_session_drafts(request, session_id, num): class UploadBlueSheetForm(forms.Form): file = forms.FileField(label='Bluesheet scan to upload') + def clean_file(self): + file = self.cleaned_data['file'] + validate_mime_type(file.read(), settings.MEETING_VALID_BLUESHEET_MIME_TYPES) + validate_file_extension(file.name, settings.MEETING_VALID_BLUESHEET_EXTENSIONS) + return file + @role_required('Area Director', 'Secretariat', 'IRTF Chair', 'WG Chair') def upload_session_bluesheets(request, session_id, num): # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure @@ -1196,7 +1204,7 @@ def upload_session_bluesheets(request, session_id, num): 'form': form, }) -VALID_MINUTES_EXTENSIONS = ('.txt','.html','.htm','.pdf') + # FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions # It should look at the contents of the files instead. class UploadMinutesForm(forms.Form): @@ -1210,10 +1218,12 @@ class UploadMinutesForm(forms.Form): def clean_file(self): file = self.cleaned_data['file'] - if file._size > settings.SECR_MAX_UPLOAD_SIZE: - raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size))) - if os.path.splitext(file.name)[1].lower() not in VALID_MINUTES_EXTENSIONS: - raise forms.ValidationError('Only these file types supported for minutes: %s' % ','.join(VALID_MINUTES_EXTENSIONS)) + validate_file_size(file._size) + ext = validate_file_extension(file.name, settings.MEETING_VALID_MINUTES_EXTENSIONS) + content = file.read() + mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_MINUTES_MIME_TYPES) + if ext in ['.html', '.htm'] or mime_type in ['text/html', ]: + validate_no_html_frame(content) return file def upload_session_minutes(request, session_id, num): @@ -1292,7 +1302,7 @@ def upload_session_minutes(request, session_id, num): 'form': form, }) -VALID_AGENDA_EXTENSIONS = ('.txt','.html','.htm',) + # FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions # It should look at the contents of the files instead. class UploadAgendaForm(forms.Form): @@ -1306,10 +1316,12 @@ class UploadAgendaForm(forms.Form): def clean_file(self): file = self.cleaned_data['file'] - if file._size > settings.SECR_MAX_UPLOAD_SIZE: - raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size))) - if os.path.splitext(file.name)[1].lower() not in VALID_AGENDA_EXTENSIONS: - raise forms.ValidationError('Only these file types supported for agendas: %s' % ','.join(VALID_AGENDA_EXTENSIONS)) + validate_file_size(file._size) + ext = validate_file_extension(file.name, settings.MEETING_VALID_AGENDA_EXTENSIONS) + content = file.read() + mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_AGENDA_MIME_TYPES) + if ext in ['.html', '.htm'] or mime_type in ['text/html', ]: + validate_no_html_frame(content) return file def upload_session_agenda(request, session_id, num): @@ -1400,7 +1412,7 @@ def upload_session_agenda(request, session_id, num): 'form': form, }) -VALID_SLIDE_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip + # FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions # It should look at the contents of the files instead. class UploadSlidesForm(forms.Form): @@ -1415,10 +1427,8 @@ class UploadSlidesForm(forms.Form): def clean_file(self): file = self.cleaned_data['file'] - if file._size > settings.SECR_MAX_UPLOAD_SIZE: - raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size))) - if os.path.splitext(file.name)[1].lower() not in VALID_SLIDE_EXTENSIONS: - raise forms.ValidationError('Only these file types supported for slides: %s' % ','.join(VALID_SLIDE_EXTENSIONS)) + validate_file_size(file._size) + validate_file_extension(file.name, settings.MEETING_VALID_SLIDES_EXTENSIONS) return file def upload_session_slides(request, session_id, num, name): diff --git a/ietf/settings.py b/ietf/settings.py index 54e4fe2cc..65fd1252f 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -722,6 +722,17 @@ MEETING_MATERIALS_DEFAULT_SUBMISSION_START_DAYS = 90 MEETING_MATERIALS_DEFAULT_SUBMISSION_CUTOFF_DAYS = 26 MEETING_MATERIALS_DEFAULT_SUBMISSION_CORRECTION_DAYS = 50 +MEETING_VALID_AGENDA_EXTENSIONS = ['.txt','.html','.htm', '.md', ] +MEETING_VALID_AGENDA_MIME_TYPES = ['text/plain', 'text/html', ] +# +MEETING_VALID_MINUTES_EXTENSIONS = ['.txt','.html','.htm', '.md', '.pdf', ] +MEETING_VALID_MINUTES_MIME_TYPES = ['text/plain', 'text/html', 'application/pdf', ] +# +MEETING_VALID_SLIDES_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip +# +MEETING_VALID_BLUESHEET_EXTENSIONS = ['.pdf', ] +MEETING_VALID_BLUESHEET_MIME_TYPES = ['application/pdf', ] + INTERNET_DRAFT_DAYS_TO_EXPIRE = 185 FLOORPLAN_MEDIA_DIR = 'floor' diff --git a/ietf/utils/textupload.py b/ietf/utils/textupload.py index 7456825a1..c8b7b0614 100644 --- a/ietf/utils/textupload.py +++ b/ietf/utils/textupload.py @@ -1,6 +1,14 @@ import re +import os +import magic +from pyquery import PyQuery +from django import forms +from django.conf import settings from django.core.exceptions import ValidationError +from django.template.defaultfilters import filesizeformat + +import debug # pyflakes:ignore def get_cleaned_text_file_content(uploaded_file): """Read uploaded file, try to fix up encoding to UTF-8 and @@ -46,3 +54,39 @@ def get_cleaned_text_file_content(uploaded_file): content = content.replace("\r\n", "\n").replace("\r", "\n") return content.encode("utf-8") + +def get_mime_type(content): + # try to fixup encoding + if hasattr(magic, "open"): + m = magic.open(magic.MAGIC_MIME) + m.load() + filetype = m.buffer(content) + else: + m = magic.Magic() + m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) + magic.magic_load(m.cookie, None) + filetype = m.from_buffer(content) + + return filetype.split('; ', 1) + +def validate_file_size(size): + if size > settings.SECR_MAX_UPLOAD_SIZE: + raise forms.ValidationError('Please keep filesize under %s. Requested upload size was %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE), filesizeformat(size))) + +def validate_mime_type(content, valid): + mime_type, encoding = get_mime_type(content) + if not mime_type in valid: + raise forms.ValidationError('Found content with unexpected mime type: %s. Expected one of %s.' % + (mime_type, ', '.join(valid) )) + return mime_type, encoding + +def validate_file_extension(name, valid): + name, ext = os.path.splitext(name) + if ext.lower() not in valid: + raise forms.ValidationError('Found an unexpected extension: %s. Expected one of %s' % (ext, ','.join(valid))) + return ext + +def validate_no_html_frame(content): + q = PyQuery(content) + if q("frameset") or q("frame") or q("iframe"): + raise forms.ValidationError('Found content with html frames. Please upload a file that does not use frames')