From b04254a2937dadd39209d5921bd6add12375b197 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 2 Dec 2021 19:50:19 +0000 Subject: [PATCH] Treat application/octet-stream as text/markdown for '.md' materials uploads. Refactor FileUploadForm hierarchy to reduce boilerplate. Fixes #3163. Commit ready for merge. - Legacy-Id: 19744 --- ietf/meeting/forms.py | 90 +++++++++++++++++++++++--- ietf/meeting/tests_forms.py | 104 ++++++++++++++++++++++++++++++ ietf/meeting/views.py | 49 +------------- ietf/meeting/views_proceedings.py | 4 +- ietf/settings.py | 6 ++ 5 files changed, 197 insertions(+), 56 deletions(-) create mode 100644 ietf/meeting/tests_forms.py diff --git a/ietf/meeting/forms.py b/ietf/meeting/forms.py index 9305e7c0b..156b11d22 100644 --- a/ietf/meeting/forms.py +++ b/ietf/meeting/forms.py @@ -6,6 +6,9 @@ import io import os import datetime import json +import re + +from pathlib import Path from django import forms from django.conf import settings @@ -326,14 +329,19 @@ class InterimCancelForm(forms.Form): self.fields['date'].widget.attrs['disabled'] = True class FileUploadForm(forms.Form): + """Base class for FileUploadForms + + Abstract base class - subclasses must fill in the doc_type value with + the type of document they handle. + """ file = forms.FileField(label='File to upload') + doc_type = '' # subclasses must set this + def __init__(self, *args, **kwargs): - doc_type = kwargs.pop('doc_type') - assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS - self.doc_type = doc_type - self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type] - self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type] + assert self.doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS + self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[self.doc_type] + self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[self.doc_type] super(FileUploadForm, self).__init__(*args, **kwargs) label = '%s file to upload. ' % (self.doc_type.capitalize(), ) if self.doc_type == "slides": @@ -346,6 +354,15 @@ class FileUploadForm(forms.Form): file = self.cleaned_data['file'] validate_file_size(file) ext = validate_file_extension(file, self.extensions) + + # override the Content-Type if needed + if file.content_type in 'application/octet-stream': + content_type_map = settings.MEETING_APPLICATION_OCTET_STREAM_OVERRIDES + filename = Path(file.name) + if filename.suffix in content_type_map: + file.content_type = content_type_map[filename.suffix] + self.cleaned_data['file'] = file + mime_type, encoding = validate_mime_type(file, self.mime_types) if not hasattr(self, 'file_encoding'): self.file_encoding = {} @@ -353,15 +370,72 @@ class FileUploadForm(forms.Form): if self.mime_types: if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]: raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type)) - if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS: - if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]: + # We just validated that file.content_type is safe to accept despite being identified + # as a different MIME type by the validator. Check extension based on file.content_type + # because that better reflects the intention of the upload client. + if file.content_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS: + if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[file.content_type]: raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext)) - if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']: + if (file.content_type in ['text/html', ] + or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS.get('text/html', [])): # We'll do html sanitization later, but for frames, we fail here, # as the sanitized version will most likely be useless. validate_no_html_frame(file) return file + +class UploadBlueSheetForm(FileUploadForm): + doc_type = 'bluesheets' + + +class ApplyToAllFileUploadForm(FileUploadForm): + """FileUploadField that adds an apply_to_all checkbox + + Checkbox can be disabled by passing show_apply_to_all_checkbox=False to the constructor. + This entirely removes the field from the form. + """ + # Note: subclasses must set doc_type for FileUploadForm + apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False) + + def __init__(self, show_apply_to_all_checkbox, *args, **kwargs): + super().__init__(*args, **kwargs) + if not show_apply_to_all_checkbox: + self.fields.pop('apply_to_all') + else: + self.order_fields( + sorted( + self.fields.keys(), + key=lambda f: 'zzzzzz' if f == 'apply_to_all' else f + ) + ) + +class UploadMinutesForm(ApplyToAllFileUploadForm): + doc_type = 'minutes' + + +class UploadAgendaForm(ApplyToAllFileUploadForm): + doc_type = 'agenda' + + +class UploadSlidesForm(ApplyToAllFileUploadForm): + doc_type = 'slides' + title = forms.CharField(max_length=255) + + def __init__(self, session, *args, **kwargs): + super().__init__(*args, **kwargs) + self.session = session + + def clean_title(self): + title = self.cleaned_data['title'] + # The current tables only handles Unicode BMP: + if ord(max(title)) > 0xffff: + raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported") + if self.session.meeting.type_id=='interim': + if re.search(r'-\d{2}$', title): + raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)") + return title + + class RequestMinutesForm(forms.Form): to = MultiEmailField() cc = MultiEmailField(required=False) diff --git a/ietf/meeting/tests_forms.py b/ietf/meeting/tests_forms.py new file mode 100644 index 000000000..871ad1ea3 --- /dev/null +++ b/ietf/meeting/tests_forms.py @@ -0,0 +1,104 @@ +# Copyright The IETF Trust 2021, All Rights Reserved +# -*- coding: utf-8 -*- +"""Tests of forms in the Meeting application""" +from django.core.files.uploadedfile import SimpleUploadedFile +from django.test import override_settings + +from ietf.meeting.forms import FileUploadForm, ApplyToAllFileUploadForm +from ietf.utils.test_utils import TestCase + + +@override_settings( + MEETING_APPLICATION_OCTET_STREAM_OVERRIDES={'.md': 'text/markdown'}, # test relies on .txt not mapping + MEETING_VALID_UPLOAD_EXTENSIONS={'minutes': ['.txt', '.md']}, # test relies on .exe being absent + MEETING_VALID_UPLOAD_MIME_TYPES={'minutes': ['text/plain', 'text/markdown']}, + MEETING_VALID_MIME_TYPE_EXTENSIONS={'text/plain': ['.txt'], 'text/markdown': ['.md']}, + MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME={'text/plain': ['text/plain', 'text/markdown']}, +) +class FileUploadFormTests(TestCase): + class TestClass(FileUploadForm): + doc_type = 'minutes' + + def test_accepts_valid_data(self): + test_file = SimpleUploadedFile( + name='file.txt', + content=b'plain text', + content_type='text/plain', + ) + + form = FileUploadFormTests.TestClass(files={'file': test_file}) + self.assertTrue(form.is_valid(), 'Test data are valid input') + cleaned_file = form.cleaned_data['file'] + self.assertEqual(cleaned_file.name, 'file.txt', 'Uploaded filename should not be changed') + with cleaned_file.open('rb') as f: + self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed') + self.assertEqual(cleaned_file.content_type, 'text/plain', 'Content-Type should be overridden') + + def test_overrides_content_type_application_octet_stream(self): + test_file = SimpleUploadedFile( + name='file.md', + content=b'plain text', + content_type='application/octet-stream', + ) + + form = FileUploadFormTests.TestClass(files={'file': test_file}) + self.assertTrue(form.is_valid(), 'Test data are valid input') + cleaned_file = form.cleaned_data['file'] + # Test that the test_file is what actually came out of the cleaning process. + # This is not technically required here, but the other tests check that test_file's + # content_type has not been changed. If cleaning does not modify the content_type + # when it succeeds, then those other tests are not actually testing anything. + self.assertEqual(cleaned_file, test_file, 'Cleaning should return the file object that was passed in') + self.assertEqual(cleaned_file.name, 'file.md', 'Uploaded filename should not be changed') + with cleaned_file.open('rb') as f: + self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed') + self.assertEqual(cleaned_file.content_type, 'text/markdown', 'Content-Type should be overridden') + + def test_overrides_only_application_octet_stream(self): + test_file = SimpleUploadedFile( + name='file.md', + content=b'plain text', + content_type='application/json' + ) + + form = FileUploadFormTests.TestClass(files={'file': test_file}) + self.assertFalse(form.is_valid(), 'Test data are invalid input') + self.assertEqual(test_file.name, 'file.md', 'Uploaded filename should not be changed') + self.assertEqual(test_file.content_type, 'application/json', 'Uploaded Content-Type should not be changed') + + def test_overrides_only_requested_extensions_when_valid_ext(self): + test_file = SimpleUploadedFile( + name='file.txt', + content=b'plain text', + content_type='application/octet-stream', + ) + + form = FileUploadFormTests.TestClass(files={'file': test_file}) + self.assertFalse(form.is_valid(), 'Test data are invalid input') + self.assertEqual(test_file.name, 'file.txt', 'Uploaded filename should not be changed') + self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed') + + def test_overrides_only_requested_extensions_when_invalid_ext(self): + test_file = SimpleUploadedFile( + name='file.exe', + content=b'plain text', + content_type='application/octet-stream' + ) + + form = FileUploadFormTests.TestClass(files={'file': test_file}) + self.assertFalse(form.is_valid(), 'Test data are invalid input') + self.assertEqual(test_file.name, 'file.exe', 'Uploaded filename should not be changed') + self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed') + + +class ApplyToAllFileUploadFormTests(TestCase): + class TestClass(ApplyToAllFileUploadForm): + doc_type = 'minutes' + + def test_has_apply_to_all_field_by_default(self): + form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=True) + self.assertIn('apply_to_all', form.fields) + + def test_no_show_apply_to_all_field(self): + form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=False) + self.assertNotIn('apply_to_all', form.fields) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 35a09be2d..6ef70c096 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -99,7 +99,8 @@ from ietf.utils.response import permission_denied from ietf.utils.text import xslugify from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm, - InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,) + InterimCancelForm, InterimSessionInlineFormSet, RequestMinutesForm, + UploadAgendaForm, UploadBlueSheetForm, UploadMinutesForm, UploadSlidesForm) def get_interim_menu_entries(request): @@ -2341,13 +2342,6 @@ def add_session_drafts(request, session_id, num): }) -class UploadBlueSheetForm(FileUploadForm): - - def __init__(self, *args, **kwargs): - kwargs['doc_type'] = 'bluesheets' - super(UploadBlueSheetForm, self).__init__(*args, **kwargs ) - - def upload_session_bluesheets(request, session_id, num): # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure session = get_object_or_404(Session,pk=session_id) @@ -2435,15 +2429,6 @@ def save_bluesheet(request, session, file, encoding='utf-8'): doc.save_with_history([e]) return save_error -class UploadMinutesForm(FileUploadForm): - apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False) - - def __init__(self, show_apply_to_all_checkbox, *args, **kwargs): - kwargs['doc_type'] = 'minutes' - super(UploadMinutesForm, self).__init__(*args, **kwargs ) - if not show_apply_to_all_checkbox: - self.fields.pop('apply_to_all') - def upload_session_minutes(request, session_id, num): # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure @@ -2536,15 +2521,6 @@ def upload_session_minutes(request, session_id, num): }) -class UploadAgendaForm(FileUploadForm): - apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False) - - def __init__(self, show_apply_to_all_checkbox, *args, **kwargs): - kwargs['doc_type'] = 'agenda' - super(UploadAgendaForm, self).__init__(*args, **kwargs ) - if not show_apply_to_all_checkbox: - self.fields.pop('apply_to_all') - def upload_session_agenda(request, session_id, num): # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure session = get_object_or_404(Session,pk=session_id) @@ -2639,27 +2615,6 @@ def upload_session_agenda(request, session_id, num): }) -class UploadSlidesForm(FileUploadForm): - title = forms.CharField(max_length=255) - apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False) - - def __init__(self, session, show_apply_to_all_checkbox, *args, **kwargs): - self.session = session - kwargs['doc_type'] = 'slides' - super(UploadSlidesForm, self).__init__(*args, **kwargs ) - if not show_apply_to_all_checkbox: - self.fields.pop('apply_to_all') - - def clean_title(self): - title = self.cleaned_data['title'] - # The current tables only handles Unicode BMP: - if ord(max(title)) > 0xffff: - raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported") - if self.session.meeting.type_id=='interim': - if re.search(r'-\d{2}$', title): - raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)") - return title - def upload_session_slides(request, session_id, num, name): # num is redundant, but we're dragging it along an artifact of where we are in the current URL structure session = get_object_or_404(Session,pk=session_id) diff --git a/ietf/meeting/views_proceedings.py b/ietf/meeting/views_proceedings.py index 9b2bd276b..cfb38e5ab 100644 --- a/ietf/meeting/views_proceedings.py +++ b/ietf/meeting/views_proceedings.py @@ -18,6 +18,8 @@ from ietf.secr.proceedings.utils import handle_upload_file from ietf.utils.text import xslugify class UploadProceedingsMaterialForm(FileUploadForm): + doc_type = 'procmaterials' + use_url = forms.BooleanField( required=False, label='Use an external URL instead of uploading a document', @@ -34,7 +36,7 @@ class UploadProceedingsMaterialForm(FileUploadForm): ) def __init__(self, *args, **kwargs): - super().__init__(doc_type='procmaterials', *args, **kwargs) + super().__init__(*args, **kwargs) self.fields['file'].label = 'Select a file to upload. Allowed format{}: {}'.format( '' if len(self.mime_types) == 1 else 's', ', '.join(self.mime_types), diff --git a/ietf/settings.py b/ietf/settings.py index f836ceb59..a6c4c8188 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -949,6 +949,12 @@ MEETING_VALID_MIME_TYPE_EXTENSIONS = { 'application/pdf': ['.pdf'], } +# Files uploaded with Content-Type application/octet-stream and an extension in this map will +# be treated as if they had been uploaded with the mapped Content-Type value. +MEETING_APPLICATION_OCTET_STREAM_OVERRIDES = { + '.md': 'text/markdown', +} + MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME = { 'text/plain': ['text/plain', 'text/markdown', 'text/x-markdown', ], 'text/html': ['text/html', ],