Fix #2584 - Add additional content validation for uploaded texts.

Permitted MIME types are now text/plain, text/markdown and text/x-rst.
This applies to all usages of get_cleaned_text_file_content(),
including reviews, but also other similar places where text can either
be written either into a textarea or uploaded.

Commit ready for merge.
 - Legacy-Id: 16922
This commit is contained in:
Sasha Romijn 2019-10-23 20:15:30 +00:00
parent 9df5839874
commit fd53f98854
3 changed files with 54 additions and 2 deletions

View file

@ -672,6 +672,10 @@ MEETING_DOC_GREFS = {
"bluesheets": "https://www.ietf.org/proceedings/{meeting.number}/bluesheets/{doc.uploaded_filename}",
}
# Valid MIME types for cases where text is uploaded and immediately extracted,
# e.g. a charter or a review. Must be a tuple, not a list.
DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES = ('text/plain', 'text/markdown', 'text/x-rst')
# Override this in settings_local.py if needed
CACHE_MIDDLEWARE_SECONDS = 300
CACHE_MIDDLEWARE_KEY_PREFIX = ''
@ -859,6 +863,13 @@ BIBXML_BASE_PATH = '/a/www/ietf-ftp/xml2rfc'
TZDATA_ICS_PATH = BASE_DIR + '/../vzic/zoneinfo/'
CHANGELOG_PATH = BASE_DIR + '/../changelog'
MEETING_VALID_UPLOAD_MIME_TYPES = {
'agenda': ['text/plain', 'text/html', 'text/markdown', ],
'minutes': ['text/plain', 'text/html', 'application/pdf', 'text/markdown', ],
'slides': [],
'bluesheets': ['application/pdf', 'text/plain', ],
}
SECR_BLUE_SHEET_PATH = '/a/www/ietf-datatracker/documents/blue_sheet.rtf'
SECR_BLUE_SHEET_URL = '//datatracker.ietf.org/documents/blue_sheet.rtf'
SECR_INTERIM_LISTING_DIR = '/a/www/www6/meeting/interim'

View file

@ -0,0 +1,37 @@
# Copyright The IETF Trust 2015-2019, All Rights Reserved
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
from django.core.exceptions import ValidationError
from django.core.files.uploadedfile import SimpleUploadedFile
from .textupload import get_cleaned_text_file_content
from ietf.utils.test_utils import TestCase
class GetCleanedTextFileContentTest(TestCase):
def test_no_file(self):
self.assertEqual(get_cleaned_text_file_content(None), "")
def test_valid_file(self):
data = 'testing 👾'
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
self.assertEqual(get_cleaned_text_file_content(uploaded_file), data)
def test_invalid_mime_type_gif(self):
data = 'GIF89a;'
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
with self.assertRaises(ValidationError) as context:
get_cleaned_text_file_content(uploaded_file)
self.assertIn('does not appear to be a text file', context.exception.message)
self.assertIn('image/gif', context.exception.message)
def test_invalid_mime_type_rst(self):
data = r'{\rtf1}'
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
with self.assertRaises(ValidationError) as context:
get_cleaned_text_file_content(uploaded_file)
self.assertIn('does not appear to be a text file', context.exception.message)
self.assertIn('text/rtf', context.exception.message)

View file

@ -9,6 +9,8 @@ import re
from django.core.exceptions import ValidationError
import debug # pyflakes:ignore
from ietf.settings import DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES
def get_cleaned_text_file_content(uploaded_file):
"""Read uploaded file, try to fix up encoding to UTF-8 and
@ -36,8 +38,10 @@ def get_cleaned_text_file_content(uploaded_file):
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
if not filetype.startswith("text"):
raise ValidationError("Uploaded file does not appear to be a text file.")
if not filetype.startswith(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES):
raise ValidationError("Uploaded file does not appear to be a text file. "
"Permitted MIME types are {}, this file is {}"
.format(', '.join(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES), filetype))
match = re.search(r"charset=([\w-]+)", filetype)
if not match: