Treat application/octet-stream as text/markdown for '.md' materials uploads. Refactor FileUploadForm hierarchy to reduce boilerplate. Fixes #3163. Commit ready for merge.

- Legacy-Id: 19744
This commit is contained in:
Jennifer Richards 2021-12-02 19:50:19 +00:00
parent 50bdddb624
commit b04254a293
5 changed files with 197 additions and 56 deletions

View file

@ -6,6 +6,9 @@ import io
import os
import datetime
import json
import re
from pathlib import Path
from django import forms
from django.conf import settings
@ -326,14 +329,19 @@ class InterimCancelForm(forms.Form):
self.fields['date'].widget.attrs['disabled'] = True
class FileUploadForm(forms.Form):
"""Base class for FileUploadForms
Abstract base class - subclasses must fill in the doc_type value with
the type of document they handle.
"""
file = forms.FileField(label='File to upload')
doc_type = '' # subclasses must set this
def __init__(self, *args, **kwargs):
doc_type = kwargs.pop('doc_type')
assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
self.doc_type = doc_type
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type]
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type]
assert self.doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[self.doc_type]
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[self.doc_type]
super(FileUploadForm, self).__init__(*args, **kwargs)
label = '%s file to upload. ' % (self.doc_type.capitalize(), )
if self.doc_type == "slides":
@ -346,6 +354,15 @@ class FileUploadForm(forms.Form):
file = self.cleaned_data['file']
validate_file_size(file)
ext = validate_file_extension(file, self.extensions)
# override the Content-Type if needed
if file.content_type in 'application/octet-stream':
content_type_map = settings.MEETING_APPLICATION_OCTET_STREAM_OVERRIDES
filename = Path(file.name)
if filename.suffix in content_type_map:
file.content_type = content_type_map[filename.suffix]
self.cleaned_data['file'] = file
mime_type, encoding = validate_mime_type(file, self.mime_types)
if not hasattr(self, 'file_encoding'):
self.file_encoding = {}
@ -353,15 +370,72 @@ class FileUploadForm(forms.Form):
if self.mime_types:
if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]:
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))
if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]:
# We just validated that file.content_type is safe to accept despite being identified
# as a different MIME type by the validator. Check extension based on file.content_type
# because that better reflects the intention of the upload client.
if file.content_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[file.content_type]:
raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext))
if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
if (file.content_type in ['text/html', ]
or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS.get('text/html', [])):
# We'll do html sanitization later, but for frames, we fail here,
# as the sanitized version will most likely be useless.
validate_no_html_frame(file)
return file
class UploadBlueSheetForm(FileUploadForm):
doc_type = 'bluesheets'
class ApplyToAllFileUploadForm(FileUploadForm):
"""FileUploadField that adds an apply_to_all checkbox
Checkbox can be disabled by passing show_apply_to_all_checkbox=False to the constructor.
This entirely removes the field from the form.
"""
# Note: subclasses must set doc_type for FileUploadForm
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
super().__init__(*args, **kwargs)
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
else:
self.order_fields(
sorted(
self.fields.keys(),
key=lambda f: 'zzzzzz' if f == 'apply_to_all' else f
)
)
class UploadMinutesForm(ApplyToAllFileUploadForm):
doc_type = 'minutes'
class UploadAgendaForm(ApplyToAllFileUploadForm):
doc_type = 'agenda'
class UploadSlidesForm(ApplyToAllFileUploadForm):
doc_type = 'slides'
title = forms.CharField(max_length=255)
def __init__(self, session, *args, **kwargs):
super().__init__(*args, **kwargs)
self.session = session
def clean_title(self):
title = self.cleaned_data['title']
# The current tables only handles Unicode BMP:
if ord(max(title)) > 0xffff:
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
if self.session.meeting.type_id=='interim':
if re.search(r'-\d{2}$', title):
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
return title
class RequestMinutesForm(forms.Form):
to = MultiEmailField()
cc = MultiEmailField(required=False)

104
ietf/meeting/tests_forms.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright The IETF Trust 2021, All Rights Reserved
# -*- coding: utf-8 -*-
"""Tests of forms in the Meeting application"""
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import override_settings
from ietf.meeting.forms import FileUploadForm, ApplyToAllFileUploadForm
from ietf.utils.test_utils import TestCase
@override_settings(
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES={'.md': 'text/markdown'}, # test relies on .txt not mapping
MEETING_VALID_UPLOAD_EXTENSIONS={'minutes': ['.txt', '.md']}, # test relies on .exe being absent
MEETING_VALID_UPLOAD_MIME_TYPES={'minutes': ['text/plain', 'text/markdown']},
MEETING_VALID_MIME_TYPE_EXTENSIONS={'text/plain': ['.txt'], 'text/markdown': ['.md']},
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME={'text/plain': ['text/plain', 'text/markdown']},
)
class FileUploadFormTests(TestCase):
class TestClass(FileUploadForm):
doc_type = 'minutes'
def test_accepts_valid_data(self):
test_file = SimpleUploadedFile(
name='file.txt',
content=b'plain text',
content_type='text/plain',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertTrue(form.is_valid(), 'Test data are valid input')
cleaned_file = form.cleaned_data['file']
self.assertEqual(cleaned_file.name, 'file.txt', 'Uploaded filename should not be changed')
with cleaned_file.open('rb') as f:
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
self.assertEqual(cleaned_file.content_type, 'text/plain', 'Content-Type should be overridden')
def test_overrides_content_type_application_octet_stream(self):
test_file = SimpleUploadedFile(
name='file.md',
content=b'plain text',
content_type='application/octet-stream',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertTrue(form.is_valid(), 'Test data are valid input')
cleaned_file = form.cleaned_data['file']
# Test that the test_file is what actually came out of the cleaning process.
# This is not technically required here, but the other tests check that test_file's
# content_type has not been changed. If cleaning does not modify the content_type
# when it succeeds, then those other tests are not actually testing anything.
self.assertEqual(cleaned_file, test_file, 'Cleaning should return the file object that was passed in')
self.assertEqual(cleaned_file.name, 'file.md', 'Uploaded filename should not be changed')
with cleaned_file.open('rb') as f:
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
self.assertEqual(cleaned_file.content_type, 'text/markdown', 'Content-Type should be overridden')
def test_overrides_only_application_octet_stream(self):
test_file = SimpleUploadedFile(
name='file.md',
content=b'plain text',
content_type='application/json'
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.md', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/json', 'Uploaded Content-Type should not be changed')
def test_overrides_only_requested_extensions_when_valid_ext(self):
test_file = SimpleUploadedFile(
name='file.txt',
content=b'plain text',
content_type='application/octet-stream',
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.txt', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
def test_overrides_only_requested_extensions_when_invalid_ext(self):
test_file = SimpleUploadedFile(
name='file.exe',
content=b'plain text',
content_type='application/octet-stream'
)
form = FileUploadFormTests.TestClass(files={'file': test_file})
self.assertFalse(form.is_valid(), 'Test data are invalid input')
self.assertEqual(test_file.name, 'file.exe', 'Uploaded filename should not be changed')
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
class ApplyToAllFileUploadFormTests(TestCase):
class TestClass(ApplyToAllFileUploadForm):
doc_type = 'minutes'
def test_has_apply_to_all_field_by_default(self):
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=True)
self.assertIn('apply_to_all', form.fields)
def test_no_show_apply_to_all_field(self):
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=False)
self.assertNotIn('apply_to_all', form.fields)

View file

@ -99,7 +99,8 @@ from ietf.utils.response import permission_denied
from ietf.utils.text import xslugify
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,)
InterimCancelForm, InterimSessionInlineFormSet, RequestMinutesForm,
UploadAgendaForm, UploadBlueSheetForm, UploadMinutesForm, UploadSlidesForm)
def get_interim_menu_entries(request):
@ -2341,13 +2342,6 @@ def add_session_drafts(request, session_id, num):
})
class UploadBlueSheetForm(FileUploadForm):
def __init__(self, *args, **kwargs):
kwargs['doc_type'] = 'bluesheets'
super(UploadBlueSheetForm, self).__init__(*args, **kwargs )
def upload_session_bluesheets(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id)
@ -2435,15 +2429,6 @@ def save_bluesheet(request, session, file, encoding='utf-8'):
doc.save_with_history([e])
return save_error
class UploadMinutesForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
kwargs['doc_type'] = 'minutes'
super(UploadMinutesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def upload_session_minutes(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
@ -2536,15 +2521,6 @@ def upload_session_minutes(request, session_id, num):
})
class UploadAgendaForm(FileUploadForm):
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
kwargs['doc_type'] = 'agenda'
super(UploadAgendaForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def upload_session_agenda(request, session_id, num):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id)
@ -2639,27 +2615,6 @@ def upload_session_agenda(request, session_id, num):
})
class UploadSlidesForm(FileUploadForm):
title = forms.CharField(max_length=255)
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False)
def __init__(self, session, show_apply_to_all_checkbox, *args, **kwargs):
self.session = session
kwargs['doc_type'] = 'slides'
super(UploadSlidesForm, self).__init__(*args, **kwargs )
if not show_apply_to_all_checkbox:
self.fields.pop('apply_to_all')
def clean_title(self):
title = self.cleaned_data['title']
# The current tables only handles Unicode BMP:
if ord(max(title)) > 0xffff:
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
if self.session.meeting.type_id=='interim':
if re.search(r'-\d{2}$', title):
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
return title
def upload_session_slides(request, session_id, num, name):
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
session = get_object_or_404(Session,pk=session_id)

View file

@ -18,6 +18,8 @@ from ietf.secr.proceedings.utils import handle_upload_file
from ietf.utils.text import xslugify
class UploadProceedingsMaterialForm(FileUploadForm):
doc_type = 'procmaterials'
use_url = forms.BooleanField(
required=False,
label='Use an external URL instead of uploading a document',
@ -34,7 +36,7 @@ class UploadProceedingsMaterialForm(FileUploadForm):
)
def __init__(self, *args, **kwargs):
super().__init__(doc_type='procmaterials', *args, **kwargs)
super().__init__(*args, **kwargs)
self.fields['file'].label = 'Select a file to upload. Allowed format{}: {}'.format(
'' if len(self.mime_types) == 1 else 's',
', '.join(self.mime_types),

View file

@ -949,6 +949,12 @@ MEETING_VALID_MIME_TYPE_EXTENSIONS = {
'application/pdf': ['.pdf'],
}
# Files uploaded with Content-Type application/octet-stream and an extension in this map will
# be treated as if they had been uploaded with the mapped Content-Type value.
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES = {
'.md': 'text/markdown',
}
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME = {
'text/plain': ['text/plain', 'text/markdown', 'text/x-markdown', ],
'text/html': ['text/html', ],