From fd53f988545eb5b7b704b2001c306c58ad1193ba Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Wed, 23 Oct 2019 20:15:30 +0000 Subject: [PATCH] Fix #2584 - Add additional content validation for uploaded texts. Permitted MIME types are now text/plain, text/markdown and text/x-rst. This applies to all usages of get_cleaned_text_file_content(), including reviews, but also other similar places where text can either be written either into a textarea or uploaded. Commit ready for merge. - Legacy-Id: 16922 --- ietf/settings.py | 11 +++++++++++ ietf/utils/test_textupload.py | 37 +++++++++++++++++++++++++++++++++++ ietf/utils/textupload.py | 8 ++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 ietf/utils/test_textupload.py diff --git a/ietf/settings.py b/ietf/settings.py index 03dee382c..a30d69ba4 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -672,6 +672,10 @@ MEETING_DOC_GREFS = { "bluesheets": "https://www.ietf.org/proceedings/{meeting.number}/bluesheets/{doc.uploaded_filename}", } +# Valid MIME types for cases where text is uploaded and immediately extracted, +# e.g. a charter or a review. Must be a tuple, not a list. +DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES = ('text/plain', 'text/markdown', 'text/x-rst') + # Override this in settings_local.py if needed CACHE_MIDDLEWARE_SECONDS = 300 CACHE_MIDDLEWARE_KEY_PREFIX = '' @@ -859,6 +863,13 @@ BIBXML_BASE_PATH = '/a/www/ietf-ftp/xml2rfc' TZDATA_ICS_PATH = BASE_DIR + '/../vzic/zoneinfo/' CHANGELOG_PATH = BASE_DIR + '/../changelog' +MEETING_VALID_UPLOAD_MIME_TYPES = { + 'agenda': ['text/plain', 'text/html', 'text/markdown', ], + 'minutes': ['text/plain', 'text/html', 'application/pdf', 'text/markdown', ], + 'slides': [], + 'bluesheets': ['application/pdf', 'text/plain', ], +} + SECR_BLUE_SHEET_PATH = '/a/www/ietf-datatracker/documents/blue_sheet.rtf' SECR_BLUE_SHEET_URL = '//datatracker.ietf.org/documents/blue_sheet.rtf' SECR_INTERIM_LISTING_DIR = '/a/www/www6/meeting/interim' diff --git a/ietf/utils/test_textupload.py b/ietf/utils/test_textupload.py new file mode 100644 index 000000000..a79fb73f8 --- /dev/null +++ b/ietf/utils/test_textupload.py @@ -0,0 +1,37 @@ +# Copyright The IETF Trust 2015-2019, All Rights Reserved +# -*- coding: utf-8 -*- + + +from __future__ import absolute_import, print_function, unicode_literals + +from django.core.exceptions import ValidationError +from django.core.files.uploadedfile import SimpleUploadedFile + +from .textupload import get_cleaned_text_file_content +from ietf.utils.test_utils import TestCase + + +class GetCleanedTextFileContentTest(TestCase): + def test_no_file(self): + self.assertEqual(get_cleaned_text_file_content(None), "") + + def test_valid_file(self): + data = 'testing 👾' + uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8')) + self.assertEqual(get_cleaned_text_file_content(uploaded_file), data) + + def test_invalid_mime_type_gif(self): + data = 'GIF89a;' + uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8')) + with self.assertRaises(ValidationError) as context: + get_cleaned_text_file_content(uploaded_file) + self.assertIn('does not appear to be a text file', context.exception.message) + self.assertIn('image/gif', context.exception.message) + + def test_invalid_mime_type_rst(self): + data = r'{\rtf1}' + uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8')) + with self.assertRaises(ValidationError) as context: + get_cleaned_text_file_content(uploaded_file) + self.assertIn('does not appear to be a text file', context.exception.message) + self.assertIn('text/rtf', context.exception.message) diff --git a/ietf/utils/textupload.py b/ietf/utils/textupload.py index 6dc010d56..3e827ba91 100644 --- a/ietf/utils/textupload.py +++ b/ietf/utils/textupload.py @@ -9,6 +9,8 @@ import re from django.core.exceptions import ValidationError import debug # pyflakes:ignore +from ietf.settings import DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES + def get_cleaned_text_file_content(uploaded_file): """Read uploaded file, try to fix up encoding to UTF-8 and @@ -36,8 +38,10 @@ def get_cleaned_text_file_content(uploaded_file): magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) - if not filetype.startswith("text"): - raise ValidationError("Uploaded file does not appear to be a text file.") + if not filetype.startswith(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES): + raise ValidationError("Uploaded file does not appear to be a text file. " + "Permitted MIME types are {}, this file is {}" + .format(', '.join(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES), filetype)) match = re.search(r"charset=([\w-]+)", filetype) if not match: