From 3ec4dffd243dd73f7a28d51abfd2d198f025b124 Mon Sep 17 00:00:00 2001 From: Ole Laursen Date: Fri, 4 May 2012 11:37:16 +0000 Subject: [PATCH] Check and sanitize text file upload (code is factored out in a new helper so it can be reused elsewhere in the future). - Legacy-Id: 4380 --- ietf/utils/textupload.py | 43 ++++++++++++++++++++++++++++++++++++++++ ietf/wgcharter/tests.py | 17 +++++++++++++++- ietf/wgcharter/views.py | 15 ++++++++------ 3 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 ietf/utils/textupload.py diff --git a/ietf/utils/textupload.py b/ietf/utils/textupload.py new file mode 100644 index 000000000..eeb6d141d --- /dev/null +++ b/ietf/utils/textupload.py @@ -0,0 +1,43 @@ +import re + +import django.forms + +def get_cleaned_text_file_content(uploaded_file): + """Read uploaded file, try to fix up encoding to UTF-8 and + transform line endings into Unix style, then return the content as + a UTF-8 string. Errors are reported as + django.forms.ValidationError exceptions.""" + + if not uploaded_file: + return u"" + + if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000: + raise django.forms.ValidationError("Text file too large (size %s)." % uploaded_file.size) + + content = "".join(uploaded_file.chunks()) + + # try to fixup encoding + import magic + m = magic.open(magic.MAGIC_MIME) + m.load() + + filetype = m.buffer(content) # should look like "text/plain; charset=us-ascii" + + if not filetype.startswith("text"): + raise django.forms.ValidationError("Uploaded file does not appear to be a text file.") + + match = re.search("charset=([\w-]+)", filetype) + if not match: + raise django.forms.ValidationError("File has unknown encoding.") + + encoding = match.group(1) + if "ascii" not in encoding: + try: + content = content.decode(encoding) + except Exception as e: + raise django.forms.ValidationError("Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e)) + + # turn line-endings into Unix style + content = content.replace("\r\n", "\n").replace("\r", "\n") + + return content.encode("utf-8") diff --git a/ietf/wgcharter/tests.py b/ietf/wgcharter/tests.py index d3f570a30..e2a10e3d4 100644 --- a/ietf/wgcharter/tests.py +++ b/ietf/wgcharter/tests.py @@ -131,9 +131,20 @@ class EditCharterTestCase(django.test.TestCase): q = PyQuery(r.content) self.assertEquals(len(q('form input[name=txt]')), 1) + # faulty post + test_file = StringIO("\x10\x11\x12") # post binary file + test_file.name = "unnamed" + + r = self.client.post(url, dict(txt=test_file)) + self.assertEquals(r.status_code, 200) + self.assertTrue("does not appear to be a text file" in r.content) + + # post prev_rev = charter.rev - test_file = StringIO("hello world") + latin_1_snippet = '\xe5' * 10 + utf_8_snippet = '\xc3\xa5' * 10 + test_file = StringIO("Windows line\r\nMac line\rUnix line\n" + latin_1_snippet) test_file.name = "unnamed" r = self.client.post(url, dict(txt=test_file)) @@ -143,6 +154,10 @@ class EditCharterTestCase(django.test.TestCase): self.assertEquals(charter.rev, next_revision(prev_rev)) self.assertTrue("new_revision" in charter.latest_event().type) + with open(os.path.join(self.charter_dir, charter.canonical_name() + "-" + charter.rev + ".txt")) as f: + self.assertEquals(f.read(), + "Windows line\nMac line\nUnix line\n" + utf_8_snippet) + class CharterApproveBallotTestCase(django.test.TestCase): fixtures = ['names'] diff --git a/ietf/wgcharter/views.py b/ietf/wgcharter/views.py index 7662f5b44..6d469357a 100644 --- a/ietf/wgcharter/views.py +++ b/ietf/wgcharter/views.py @@ -14,6 +14,7 @@ from django.utils.safestring import mark_safe from django.conf import settings from ietf.utils.mail import send_mail_text, send_mail_preformatted +from ietf.utils.textupload import get_cleaned_text_file_content from ietf.ietfauth.decorators import has_role, role_required from ietf.iesg.models import TelechatDate from ietf.doc.models import * @@ -229,13 +230,14 @@ class UploadForm(forms.Form): def clean_content(self): return self.cleaned_data["content"].replace("\r", "") + def clean_txt(self): + return get_cleaned_text_file_content(self.cleaned_data["txt"]) + def save(self, wg, rev): - fd = self.cleaned_data['txt'] filename = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (wg.charter.canonical_name(), rev)) - with open(filename, 'wb+') as destination: - if fd: - for chunk in fd.chunks(): - destination.write(chunk) + with open(filename, 'wb') as destination: + if self.cleaned_data['txt']: + destination.write(self.cleaned_data['txt']) else: destination.write(self.cleaned_data['content']) @@ -246,7 +248,8 @@ def submit(request, name): login = request.user.get_profile() - not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev))) + path = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev)) + not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(path) if not_uploaded_yet: # this case is special - we recently chartered or rechartered and have no file yet