Treat application/octet-stream as text/markdown for '.md' materials uploads. Refactor FileUploadForm hierarchy to reduce boilerplate. Fixes #3163. Commit ready for merge.

- Legacy-Id: 19744
This commit is contained in:
Jennifer Richards 2021-12-02 19:50:19 +00:00
parent 50bdddb624
commit b04254a293
5 changed files with 197 additions and 56 deletions

View file

@ -6,6 +6,9 @@ import io
import os import os
import datetime import datetime
import json import json
import re
from pathlib import Path
from django import forms from django import forms
from django.conf import settings from django.conf import settings
@ -326,14 +329,19 @@ class InterimCancelForm(forms.Form):
self.fields['date'].widget.attrs['disabled'] = True self.fields['date'].widget.attrs['disabled'] = True
class FileUploadForm(forms.Form): class FileUploadForm(forms.Form):
"""Base class for FileUploadForms
Abstract base class - subclasses must fill in the doc_type value with
the type of document they handle.
"""
file = forms.FileField(label='File to upload') file = forms.FileField(label='File to upload')
doc_type = '' # subclasses must set this
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
doc_type = kwargs.pop('doc_type') assert self.doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[self.doc_type]
self.doc_type = doc_type self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[self.doc_type]
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type]
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type]
super(FileUploadForm, self).__init__(*args, **kwargs) super(FileUploadForm, self).__init__(*args, **kwargs)
label = '%s file to upload. ' % (self.doc_type.capitalize(), ) label = '%s file to upload. ' % (self.doc_type.capitalize(), )
if self.doc_type == "slides": if self.doc_type == "slides":
@ -346,6 +354,15 @@ class FileUploadForm(forms.Form):
file = self.cleaned_data['file'] file = self.cleaned_data['file']
validate_file_size(file) validate_file_size(file)
ext = validate_file_extension(file, self.extensions) ext = validate_file_extension(file, self.extensions)
# override the Content-Type if needed
if file.content_type in 'application/octet-stream':
content_type_map = settings.MEETING_APPLICATION_OCTET_STREAM_OVERRIDES
filename = Path(file.name)
if filename.suffix in content_type_map:
file.content_type = content_type_map[filename.suffix]
self.cleaned_data['file'] = file
mime_type, encoding = validate_mime_type(file, self.mime_types) mime_type, encoding = validate_mime_type(file, self.mime_types)
if not hasattr(self, 'file_encoding'): if not hasattr(self, 'file_encoding'):
self.file_encoding = {} self.file_encoding = {}
@ -353,15 +370,72 @@ class FileUploadForm(forms.Form):
if self.mime_types: if self.mime_types:
if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]: if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]:
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type)) raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))
if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS: # We just validated that file.content_type is safe to accept despite being identified
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]: # as a different MIME type by the validator. Check extension based on file.content_type
# because that better reflects the intention of the upload client.
if file.content_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[file.content_type]:
raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext)) raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext))
if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']: if (file.content_type in ['text/html', ]
or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS.get('text/html', [])):
# We'll do html sanitization later, but for frames, we fail here, # We'll do html sanitization later, but for frames, we fail here,
# as the sanitized version will most likely be useless. # as the sanitized version will most likely be useless.
validate_no_html_frame(file) validate_no_html_frame(file)
return file return file
class UploadBlueSheetForm(FileUploadForm):
doc_type = 'bluesheets'
class ApplyToAllFileUploadForm(FileUploadForm):
"""FileUploadField that adds an apply_to_all checkbox
Checkbox can be disabled by passing show_apply_to_all_checkbox=False to the constructor.
This entirely removes the field from the form.
"""
# Note: subclasses must set doc_type for FileUploadForm
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
super().__init__(*args, **kwargs)
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
else:
self.order_fields(
sorted(
self.fields.keys(),
key=lambda f: 'zzzzzz' if f == 'apply_to_all' else f
)
)
class UploadMinutesForm(ApplyToAllFileUploadForm):
doc_type = 'minutes'
class UploadAgendaForm(ApplyToAllFileUploadForm):
doc_type = 'agenda'
class UploadSlidesForm(ApplyToAllFileUploadForm):
doc_type = 'slides'
title = forms.CharField(max_length=255)
def __init__(self, session, *args, **kwargs):
super().__init__(*args, **kwargs)
self.session = session
def clean_title(self):
title = self.cleaned_data['title']
# The current tables only handles Unicode BMP:
if ord(max(title)) > 0xffff:
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
if self.session.meeting.type_id=='interim':
if re.search(r'-\d{2}$', title):
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
return title
class RequestMinutesForm(forms.Form): class RequestMinutesForm(forms.Form):
to = MultiEmailField() to = MultiEmailField()
cc = MultiEmailField(required=False) cc = MultiEmailField(required=False)

104
ietf/meeting/tests_forms.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright The IETF Trust 2021, All Rights Reserved
# -*- coding: utf-8 -*-
"""Tests of forms in the Meeting application"""
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import override_settings
from ietf.meeting.forms import FileUploadForm, ApplyToAllFileUploadForm
from ietf.utils.test_utils import TestCase
@override_settings(
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES={'.md': 'text/markdown'}, # test relies on .txt not mapping
MEETING_VALID_UPLOAD_EXTENSIONS={'minutes': ['.txt', '.md']}, # test relies on .exe being absent
MEETING_VALID_UPLOAD_MIME_TYPES={'minutes': ['text/plain', 'text/markdown']},
MEETING_VALID_MIME_TYPE_EXTENSIONS={'text/plain': ['.txt'], 'text/markdown': ['.md']},
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME={'text/plain': ['text/plain', 'text/markdown']},
)
class FileUploadFormTests(TestCase):
class TestClass(FileUploadForm):
doc_type = 'minutes'
def test_accepts_valid_data(self):
test_file = SimpleUploadedFile(
name='file.txt',
content=b'plain text',
content_type='text/plain',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertTrue(form.is_valid(), 'Test data are valid input')
cleaned_file = form.cleaned_data['file']
self.assertEqual(cleaned_file.name, 'file.txt', 'Uploaded filename should not be changed')
with cleaned_file.open('rb') as f:
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
self.assertEqual(cleaned_file.content_type, 'text/plain', 'Content-Type should be overridden')
def test_overrides_content_type_application_octet_stream(self):
test_file = SimpleUploadedFile(
name='file.md',
content=b'plain text',
content_type='application/octet-stream',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertTrue(form.is_valid(), 'Test data are valid input')
cleaned_file = form.cleaned_data['file']
# Test that the test_file is what actually came out of the cleaning process.
# This is not technically required here, but the other tests check that test_file's
# content_type has not been changed. If cleaning does not modify the content_type
# when it succeeds, then those other tests are not actually testing anything.
self.assertEqual(cleaned_file, test_file, 'Cleaning should return the file object that was passed in')
self.assertEqual(cleaned_file.name, 'file.md', 'Uploaded filename should not be changed')
with cleaned_file.open('rb') as f:
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
self.assertEqual(cleaned_file.content_type, 'text/markdown', 'Content-Type should be overridden')
def test_overrides_only_application_octet_stream(self):
test_file = SimpleUploadedFile(
name='file.md',
content=b'plain text',
content_type='application/json'
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.md', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/json', 'Uploaded Content-Type should not be changed')
def test_overrides_only_requested_extensions_when_valid_ext(self):
test_file = SimpleUploadedFile(
name='file.txt',
content=b'plain text',
content_type='application/octet-stream',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.txt', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
def test_overrides_only_requested_extensions_when_invalid_ext(self):
test_file = SimpleUploadedFile(
name='file.exe',
content=b'plain text',
content_type='application/octet-stream'
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.exe', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
class ApplyToAllFileUploadFormTests(TestCase):
class TestClass(ApplyToAllFileUploadForm):
doc_type = 'minutes'
def test_has_apply_to_all_field_by_default(self):
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=True)
self.assertIn('apply_to_all', form.fields)
def test_no_show_apply_to_all_field(self):
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=False)
self.assertNotIn('apply_to_all', form.fields)

View file

@ -99,7 +99,8 @@ from ietf.utils.response import permission_denied
from ietf.utils.text import xslugify from ietf.utils.text import xslugify
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm, from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,) InterimCancelForm, InterimSessionInlineFormSet, RequestMinutesForm,
UploadAgendaForm, UploadBlueSheetForm, UploadMinutesForm, UploadSlidesForm)
def get_interim_menu_entries(request): def get_interim_menu_entries(request):
@ -2341,13 +2342,6 @@ def add_session_drafts(request, session_id, num):
}) })
class UploadBlueSheetForm(FileUploadForm):
def __init__(self, *args, **kwargs):
kwargs['doc_type'] = 'bluesheets'
super(UploadBlueSheetForm, self).__init__(*args, **kwargs )
def upload_session_bluesheets(request, session_id, num): def upload_session_bluesheets(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id) session = get_object_or_404(Session,pk=session_id)
@ -2435,15 +2429,6 @@ def save_bluesheet(request, session, file, encoding='utf-8'):
doc.save_with_history([e]) doc.save_with_history([e])
return save_error return save_error
class UploadMinutesForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
kwargs['doc_type'] = 'minutes'
super(UploadMinutesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def upload_session_minutes(request, session_id, num): def upload_session_minutes(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
@ -2536,15 +2521,6 @@ def upload_session_minutes(request, session_id, num):
}) })
class UploadAgendaForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
kwargs['doc_type'] = 'agenda'
super(UploadAgendaForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def upload_session_agenda(request, session_id, num): def upload_session_agenda(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id) session = get_object_or_404(Session,pk=session_id)
@ -2639,27 +2615,6 @@ def upload_session_agenda(request, session_id, num):
}) })
class UploadSlidesForm(FileUploadForm):
title = forms.CharField(max_length=255)
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False)
def __init__(self, session, show_apply_to_all_checkbox, *args, **kwargs):
self.session = session
kwargs['doc_type'] = 'slides'
super(UploadSlidesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def clean_title(self):
title = self.cleaned_data['title']
# The current tables only handles Unicode BMP:
if ord(max(title)) > 0xffff:
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
if self.session.meeting.type_id=='interim':
if re.search(r'-\d{2}$', title):
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
return title
def upload_session_slides(request, session_id, num, name): def upload_session_slides(request, session_id, num, name):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id) session = get_object_or_404(Session,pk=session_id)

View file

@ -18,6 +18,8 @@ from ietf.secr.proceedings.utils import handle_upload_file
from ietf.utils.text import xslugify from ietf.utils.text import xslugify
class UploadProceedingsMaterialForm(FileUploadForm): class UploadProceedingsMaterialForm(FileUploadForm):
doc_type = 'procmaterials'
use_url = forms.BooleanField( use_url = forms.BooleanField(
required=False, required=False,
label='Use an external URL instead of uploading a document', label='Use an external URL instead of uploading a document',
@ -34,7 +36,7 @@ class UploadProceedingsMaterialForm(FileUploadForm):
) )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(doc_type='procmaterials', *args, **kwargs) super().__init__(*args, **kwargs)
self.fields['file'].label = 'Select a file to upload. Allowed format{}: {}'.format( self.fields['file'].label = 'Select a file to upload. Allowed format{}: {}'.format(
'' if len(self.mime_types) == 1 else 's', '' if len(self.mime_types) == 1 else 's',
', '.join(self.mime_types), ', '.join(self.mime_types),

View file

@ -949,6 +949,12 @@ MEETING_VALID_MIME_TYPE_EXTENSIONS = {
'application/pdf': ['.pdf'], 'application/pdf': ['.pdf'],
} }
# Files uploaded with Content-Type application/octet-stream and an extension in this map will
# be treated as if they had been uploaded with the mapped Content-Type value.
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES = {
'.md': 'text/markdown',
}
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME = { MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME = {
'text/plain': ['text/plain', 'text/markdown', 'text/x-markdown', ], 'text/plain': ['text/plain', 'text/markdown', 'text/x-markdown', ],
'text/html': ['text/html', ], 'text/html': ['text/html', ],