Added more validation of extension, mime type, etc. for uploaded meeting agendas and minutes. Added '.md' (markdown) as an accepted file type. Html with frames is now rejected. Factored out validation code into separate functions.
- Legacy-Id: 13849
This commit is contained in:
parent
fee74d3357
commit
46fc7b77fd
|
@ -1,3 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
@ -1520,7 +1522,7 @@ class MaterialsTests(TestCase):
|
|||
q = PyQuery(r.content)
|
||||
self.assertTrue('Upload' in unicode(q("title")))
|
||||
self.assertFalse(session.sessionpresentation_set.exists())
|
||||
test_file = StringIO('this is some text for a test')
|
||||
test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test')
|
||||
test_file.name = "not_really.pdf"
|
||||
r = self.client.post(url,dict(file=test_file))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
|
@ -1530,7 +1532,7 @@ class MaterialsTests(TestCase):
|
|||
self.assertEqual(r.status_code, 200)
|
||||
q = PyQuery(r.content)
|
||||
self.assertTrue('Revise' in unicode(q("title")))
|
||||
test_file = StringIO('this is some different text for a test')
|
||||
test_file = StringIO('%PDF-1.4\n%âãÏÓ\nthis is some different text for a test')
|
||||
test_file.name = "also_not_really.pdf"
|
||||
r = self.client.post(url,dict(file=test_file))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
|
@ -1555,7 +1557,7 @@ class MaterialsTests(TestCase):
|
|||
q = PyQuery(r.content)
|
||||
self.assertTrue('Upload' in unicode(q("title")))
|
||||
self.assertFalse(session.sessionpresentation_set.exists())
|
||||
test_file = StringIO('this is some text for a test')
|
||||
test_file = StringIO(b'%PDF-1.4\n%âãÏÓ\nthis is some text for a test')
|
||||
test_file.name = "not_really.pdf"
|
||||
r = self.client.post(url,dict(file=test_file))
|
||||
self.assertEqual(r.status_code, 302)
|
||||
|
@ -1610,6 +1612,13 @@ class MaterialsTests(TestCase):
|
|||
q = PyQuery(r.content)
|
||||
self.assertTrue(q('form .has-error'))
|
||||
|
||||
test_file = StringIO('<html><frameset><frame src="foo.html"></frame><frame src="bar.html"></frame></frameset></html>')
|
||||
test_file.name = "not_really.html"
|
||||
r = self.client.post(url,dict(file=test_file))
|
||||
self.assertEqual(r.status_code, 200)
|
||||
q = PyQuery(r.content)
|
||||
self.assertTrue(q('form .has-error'))
|
||||
|
||||
test_file = StringIO('this is some text for a test')
|
||||
test_file.name = "not_really.txt"
|
||||
r = self.client.post(url,dict(file=test_file,apply_to_all=False))
|
||||
|
|
|
@ -19,11 +19,11 @@ import debug # pyflakes:ignore
|
|||
from django import forms
|
||||
from django.shortcuts import render, redirect, get_object_or_404
|
||||
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden, Http404
|
||||
from django.conf import settings
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.urls import reverse,reverse_lazy
|
||||
from django.db.models import Min, Max, Q
|
||||
from django.conf import settings
|
||||
from django.forms.models import modelform_factory, inlineformset_factory
|
||||
from django.forms import ModelForm
|
||||
from django.template import TemplateDoesNotExist
|
||||
|
@ -33,7 +33,7 @@ from django.views.decorators.cache import cache_page
|
|||
from django.utils.text import slugify
|
||||
from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from django.template.defaultfilters import filesizeformat
|
||||
|
||||
|
||||
from ietf.doc.fields import SearchableDocumentsField
|
||||
from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent
|
||||
|
@ -64,6 +64,8 @@ from ietf.utils.mail import send_mail_message
|
|||
from ietf.utils.pipe import pipe
|
||||
from ietf.utils.pdf import pdf_pages
|
||||
from ietf.utils.text import xslugify
|
||||
from ietf.utils.textupload import ( validate_file_size, validate_mime_type,
|
||||
validate_file_extension, validate_no_html_frame, )
|
||||
|
||||
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
|
||||
InterimCancelForm, InterimSessionInlineFormSet)
|
||||
|
@ -1132,6 +1134,12 @@ def add_session_drafts(request, session_id, num):
|
|||
class UploadBlueSheetForm(forms.Form):
|
||||
file = forms.FileField(label='Bluesheet scan to upload')
|
||||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
validate_mime_type(file.read(), settings.MEETING_VALID_BLUESHEET_MIME_TYPES)
|
||||
validate_file_extension(file.name, settings.MEETING_VALID_BLUESHEET_EXTENSIONS)
|
||||
return file
|
||||
|
||||
@role_required('Area Director', 'Secretariat', 'IRTF Chair', 'WG Chair')
|
||||
def upload_session_bluesheets(request, session_id, num):
|
||||
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
|
||||
|
@ -1196,7 +1204,7 @@ def upload_session_bluesheets(request, session_id, num):
|
|||
'form': form,
|
||||
})
|
||||
|
||||
VALID_MINUTES_EXTENSIONS = ('.txt','.html','.htm','.pdf')
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadMinutesForm(forms.Form):
|
||||
|
@ -1210,10 +1218,12 @@ class UploadMinutesForm(forms.Form):
|
|||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
|
||||
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
|
||||
if os.path.splitext(file.name)[1].lower() not in VALID_MINUTES_EXTENSIONS:
|
||||
raise forms.ValidationError('Only these file types supported for minutes: %s' % ','.join(VALID_MINUTES_EXTENSIONS))
|
||||
validate_file_size(file._size)
|
||||
ext = validate_file_extension(file.name, settings.MEETING_VALID_MINUTES_EXTENSIONS)
|
||||
content = file.read()
|
||||
mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_MINUTES_MIME_TYPES)
|
||||
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
|
||||
validate_no_html_frame(content)
|
||||
return file
|
||||
|
||||
def upload_session_minutes(request, session_id, num):
|
||||
|
@ -1292,7 +1302,7 @@ def upload_session_minutes(request, session_id, num):
|
|||
'form': form,
|
||||
})
|
||||
|
||||
VALID_AGENDA_EXTENSIONS = ('.txt','.html','.htm',)
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadAgendaForm(forms.Form):
|
||||
|
@ -1306,10 +1316,12 @@ class UploadAgendaForm(forms.Form):
|
|||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
|
||||
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
|
||||
if os.path.splitext(file.name)[1].lower() not in VALID_AGENDA_EXTENSIONS:
|
||||
raise forms.ValidationError('Only these file types supported for agendas: %s' % ','.join(VALID_AGENDA_EXTENSIONS))
|
||||
validate_file_size(file._size)
|
||||
ext = validate_file_extension(file.name, settings.MEETING_VALID_AGENDA_EXTENSIONS)
|
||||
content = file.read()
|
||||
mime_type, encoding = validate_mime_type(content, settings.MEETING_VALID_AGENDA_MIME_TYPES)
|
||||
if ext in ['.html', '.htm'] or mime_type in ['text/html', ]:
|
||||
validate_no_html_frame(content)
|
||||
return file
|
||||
|
||||
def upload_session_agenda(request, session_id, num):
|
||||
|
@ -1400,7 +1412,7 @@ def upload_session_agenda(request, session_id, num):
|
|||
'form': form,
|
||||
})
|
||||
|
||||
VALID_SLIDE_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
|
||||
|
||||
# FIXME: This form validation code (based on the secretariat upload code) only looks at filename extensions
|
||||
# It should look at the contents of the files instead.
|
||||
class UploadSlidesForm(forms.Form):
|
||||
|
@ -1415,10 +1427,8 @@ class UploadSlidesForm(forms.Form):
|
|||
|
||||
def clean_file(self):
|
||||
file = self.cleaned_data['file']
|
||||
if file._size > settings.SECR_MAX_UPLOAD_SIZE:
|
||||
raise forms.ValidationError('Please keep filesize under %s. Requested upload size is %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE),filesizeformat(file._size)))
|
||||
if os.path.splitext(file.name)[1].lower() not in VALID_SLIDE_EXTENSIONS:
|
||||
raise forms.ValidationError('Only these file types supported for slides: %s' % ','.join(VALID_SLIDE_EXTENSIONS))
|
||||
validate_file_size(file._size)
|
||||
validate_file_extension(file.name, settings.MEETING_VALID_SLIDES_EXTENSIONS)
|
||||
return file
|
||||
|
||||
def upload_session_slides(request, session_id, num, name):
|
||||
|
|
|
@ -722,6 +722,17 @@ MEETING_MATERIALS_DEFAULT_SUBMISSION_START_DAYS = 90
|
|||
MEETING_MATERIALS_DEFAULT_SUBMISSION_CUTOFF_DAYS = 26
|
||||
MEETING_MATERIALS_DEFAULT_SUBMISSION_CORRECTION_DAYS = 50
|
||||
|
||||
MEETING_VALID_AGENDA_EXTENSIONS = ['.txt','.html','.htm', '.md', ]
|
||||
MEETING_VALID_AGENDA_MIME_TYPES = ['text/plain', 'text/html', ]
|
||||
#
|
||||
MEETING_VALID_MINUTES_EXTENSIONS = ['.txt','.html','.htm', '.md', '.pdf', ]
|
||||
MEETING_VALID_MINUTES_MIME_TYPES = ['text/plain', 'text/html', 'application/pdf', ]
|
||||
#
|
||||
MEETING_VALID_SLIDES_EXTENSIONS = ('.doc','.docx','.pdf','.ppt','.pptx','.txt') # Note the removal of .zip
|
||||
#
|
||||
MEETING_VALID_BLUESHEET_EXTENSIONS = ['.pdf', ]
|
||||
MEETING_VALID_BLUESHEET_MIME_TYPES = ['application/pdf', ]
|
||||
|
||||
INTERNET_DRAFT_DAYS_TO_EXPIRE = 185
|
||||
|
||||
FLOORPLAN_MEDIA_DIR = 'floor'
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
import re
|
||||
import os
|
||||
import magic
|
||||
from pyquery import PyQuery
|
||||
|
||||
from django import forms
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.template.defaultfilters import filesizeformat
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
def get_cleaned_text_file_content(uploaded_file):
|
||||
"""Read uploaded file, try to fix up encoding to UTF-8 and
|
||||
|
@ -46,3 +54,39 @@ def get_cleaned_text_file_content(uploaded_file):
|
|||
content = content.replace("\r\n", "\n").replace("\r", "\n")
|
||||
|
||||
return content.encode("utf-8")
|
||||
|
||||
def get_mime_type(content):
|
||||
# try to fixup encoding
|
||||
if hasattr(magic, "open"):
|
||||
m = magic.open(magic.MAGIC_MIME)
|
||||
m.load()
|
||||
filetype = m.buffer(content)
|
||||
else:
|
||||
m = magic.Magic()
|
||||
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
|
||||
magic.magic_load(m.cookie, None)
|
||||
filetype = m.from_buffer(content)
|
||||
|
||||
return filetype.split('; ', 1)
|
||||
|
||||
def validate_file_size(size):
|
||||
if size > settings.SECR_MAX_UPLOAD_SIZE:
|
||||
raise forms.ValidationError('Please keep filesize under %s. Requested upload size was %s' % (filesizeformat(settings.SECR_MAX_UPLOAD_SIZE), filesizeformat(size)))
|
||||
|
||||
def validate_mime_type(content, valid):
|
||||
mime_type, encoding = get_mime_type(content)
|
||||
if not mime_type in valid:
|
||||
raise forms.ValidationError('Found content with unexpected mime type: %s. Expected one of %s.' %
|
||||
(mime_type, ', '.join(valid) ))
|
||||
return mime_type, encoding
|
||||
|
||||
def validate_file_extension(name, valid):
|
||||
name, ext = os.path.splitext(name)
|
||||
if ext.lower() not in valid:
|
||||
raise forms.ValidationError('Found an unexpected extension: %s. Expected one of %s' % (ext, ','.join(valid)))
|
||||
return ext
|
||||
|
||||
def validate_no_html_frame(content):
|
||||
q = PyQuery(content)
|
||||
if q("frameset") or q("frame") or q("iframe"):
|
||||
raise forms.ValidationError('Found content with html frames. Please upload a file that does not use frames')
|
||||
|
|
Loading…
Reference in a new issue