Added more validation of extension, mime type, etc. for uploaded meeting agendas and minutes. Added '.md' (markdown) as an accepted file type. Html with frames is now rejected. Factored out validation code into separate functions.

- Legacy-Id: 13849
This commit is contained in:
Henrik Levkowetz 2017-07-12 22:45:57 +00:00
parent fee74d3357
commit 46fc7b77fd
4 changed files with 94 additions and 20 deletions

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import json
import os
import shutil
@ -1520,7 +1522,7 @@ class MaterialsTests(TestCase):
q = PyQuery(r.content)
self.assertTrue('Upload' in unicode(q("title")))
self.assertFalse(session.sessionpresentation_set.exists())
test_file = StringIO('this is some text for a test')
test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test')
test_file.name = "not_really.pdf"
r = self.client.post(url,dict(file=test_file))
self.assertEqual(r.status_code, 302)
@ -1530,7 +1532,7 @@ class MaterialsTests(TestCase):
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertTrue('Revise' in unicode(q("title")))
test_file = StringIO('this is some different text for a test')
test_file = StringIO('%PDF-1.4\n%âãÏÓ\nthis is some different text for a test')
test_file.name = "also_not_really.pdf"
r = self.client.post(url,dict(file=test_file))
self.assertEqual(r.status_code, 302)
@ -1555,7 +1557,7 @@ class MaterialsTests(TestCase):
q = PyQuery(r.content)
self.assertTrue('Upload' in unicode(q("title")))
self.assertFalse(session.sessionpresentation_set.exists())
test_file = StringIO('this is some text for a test')
test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test')
test_file.name = "not_really.pdf"
r = self.client.post(url,dict(file=test_file))
self.assertEqual(r.status_code, 302)
@ -1610,6 +1612,13 @@ class MaterialsTests(TestCase):
q = PyQuery(r.content)
self.assertTrue(q('form .has-error'))
test_file = StringIO('<html><frameset><frame src="foo.html"></frame><frame src="bar.html"></frame></frameset></html>')
test_file.name = "not_really.html"
r = self.client.post(url,dict(file=test_file))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertTrue(q('form .has-error'))
test_file = StringIO('this is some text for a test')
test_file.name = "not_really.txt"
r = self.client.post(url,dict(file=test_file,apply_to_all=False))

View file

@ -19,11 +19,11 @@ import debug # pyflakes:ignore
from django import forms
from django.shortcuts import render, redirect, get_object_or_404
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden, Http404
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.urls import reverse,reverse_lazy
from django.db.models import Min, Max, Q
from django.conf import settings
from django.forms.models import modelform_factory, inlineformset_factory
from django.forms import ModelForm
from django.template import TemplateDoesNotExist
@ -33,7 +33,7 @@ from django.views.decorators.cache import cache_page
from django.utils.text import slugify
from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt
from django.views.generic import RedirectView
from django.template.defaultfilters import filesizeformat
from ietf.doc.fields import SearchableDocumentsField
from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent
@ -64,6 +64,8 @@ from ietf.utils.mail import send_mail_message
from ietf.utils.pipe import pipe
from ietf.utils.pdf import pdf_pages
from ietf.utils.text import xslugify
from ietf.utils.textupload import ( validate_file_size, validate_mime_type,
validate_file_extension, validate_no_html_frame, )
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
InterimCancelForm, InterimSessionInlineFormSet)
@ -1132,6 +1134,12 @@ def add_session_drafts(request, session_id, num):
class UploadBlueSheetForm(forms.Form):
file = forms.FileField(label='Bluesheet scan to upload')
def clean_file(self):
file = self.cleaned_data['file']
validate_mime_type(file.read(), settings.MEETING_VALID_BLUESHEET_MIME_TYPES)
validate_file_extension(file.name, settings.MEETING_VALID_BLUESHEET_EXTENSIONS)
return file
@role_required('Area Director', 'Secretariat', 'IRTF Chair', 'WG Chair')
def upload_session_bluesheets(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
@ -1196,7 +1204,7 @@ def upload_session_bluesheets(request, session_id, num):
'form': form,
})
VALID_MINUTES_EXTENSIONS = ('.txt','.html','.htm','.pdf')
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
# It should look at the contents of the files instead.
class UploadMinutesForm(forms.Form):
@ -1210,10 +1218,12 @@ class UploadMinutesForm(forms.Form):
def clean_file(self):
file = self.cleaned_data['file']
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
if os.path.splitext(file.name)[1].lower() not in VALID_MINUTES_EXTENSIONS:
raise forms.ValidationError('Only these file types supported for minutes: %s' % ','.join(VALID_MINUTES_EXTENSIONS))
validate_file_size(file._size)
ext = validate_file_extension(file.name, settings.MEETING_VALID_MINUTES_EXTENSIONS)
content = file.read()
mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_MINUTES_MIME_TYPES)
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
validate_no_html_frame(content)
return file
def upload_session_minutes(request, session_id, num):
@ -1292,7 +1302,7 @@ def upload_session_minutes(request, session_id, num):
'form': form,
})
VALID_AGENDA_EXTENSIONS = ('.txt','.html','.htm',)
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
# It should look at the contents of the files instead.
class UploadAgendaForm(forms.Form):
@ -1306,10 +1316,12 @@ class UploadAgendaForm(forms.Form):
def clean_file(self):
file = self.cleaned_data['file']
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
if os.path.splitext(file.name)[1].lower() not in VALID_AGENDA_EXTENSIONS:
raise forms.ValidationError('Only these file types supported for agendas: %s' % ','.join(VALID_AGENDA_EXTENSIONS))
validate_file_size(file._size)
ext = validate_file_extension(file.name, settings.MEETING_VALID_AGENDA_EXTENSIONS)
content = file.read()
mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_AGENDA_MIME_TYPES)
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
validate_no_html_frame(content)
return file
def upload_session_agenda(request, session_id, num):
@ -1400,7 +1412,7 @@ def upload_session_agenda(request, session_id, num):
'form': form,
})
VALID_SLIDE_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
# It should look at the contents of the files instead.
class UploadSlidesForm(forms.Form):
@ -1415,10 +1427,8 @@ class UploadSlidesForm(forms.Form):
def clean_file(self):
file = self.cleaned_data['file']
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
if os.path.splitext(file.name)[1].lower() not in VALID_SLIDE_EXTENSIONS:
raise forms.ValidationError('Only these file types supported for slides: %s' % ','.join(VALID_SLIDE_EXTENSIONS))
validate_file_size(file._size)
validate_file_extension(file.name, settings.MEETING_VALID_SLIDES_EXTENSIONS)
return file
def upload_session_slides(request, session_id, num, name):

View file

@ -722,6 +722,17 @@ MEETING_MATERIALS_DEFAULT_SUBMISSION_START_DAYS = 90
MEETING_MATERIALS_DEFAULT_SUBMISSION_CUTOFF_DAYS = 26
MEETING_MATERIALS_DEFAULT_SUBMISSION_CORRECTION_DAYS = 50
MEETING_VALID_AGENDA_EXTENSIONS = ['.txt','.html','.htm', '.md', ]
MEETING_VALID_AGENDA_MIME_TYPES = ['text/plain', 'text/html', ]
#
MEETING_VALID_MINUTES_EXTENSIONS = ['.txt','.html','.htm', '.md', '.pdf', ]
MEETING_VALID_MINUTES_MIME_TYPES = ['text/plain', 'text/html', 'application/pdf', ]
#
MEETING_VALID_SLIDES_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
#
MEETING_VALID_BLUESHEET_EXTENSIONS = ['.pdf', ]
MEETING_VALID_BLUESHEET_MIME_TYPES = ['application/pdf', ]
INTERNET_DRAFT_DAYS_TO_EXPIRE = 185
FLOORPLAN_MEDIA_DIR = 'floor'

View file

@ -1,6 +1,14 @@
import re
import os
import magic
from pyquery import PyQuery
from django import forms
from django.conf import settings
from django.core.exceptions import ValidationError
from django.template.defaultfilters import filesizeformat
import debug # pyflakes:ignore
def get_cleaned_text_file_content(uploaded_file):
"""Read uploaded file, try to fix up encoding to UTF-8 and
@ -46,3 +54,39 @@ def get_cleaned_text_file_content(uploaded_file):
content = content.replace("\r\n", "\n").replace("\r", "\n")
return content.encode("utf-8")
def get_mime_type(content):
# try to fixup encoding
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
return filetype.split('; ', 1)
def validate_file_size(size):
if size > settings.SECR_MAX_UPLOAD_SIZE:
raise forms.ValidationError('Please keep filesize under %s. Requested upload size was %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE), filesizeformat(size)))
def validate_mime_type(content, valid):
mime_type, encoding = get_mime_type(content)
if not mime_type in valid:
raise forms.ValidationError('Found content with unexpected mime type: %s. Expected one of %s.' %
(mime_type, ', '.join(valid) ))
return mime_type, encoding
def validate_file_extension(name, valid):
name, ext = os.path.splitext(name)
if ext.lower() not in valid:
raise forms.ValidationError('Found an unexpected extension: %s. Expected one of %s' % (ext, ','.join(valid)))
return ext
def validate_no_html_frame(content):
q = PyQuery(content)
if q("frameset") or q("frame") or q("iframe"):
raise forms.ValidationError('Found content with html frames. Please upload a file that does not use frames')