Check and sanitize text file upload (code is factored out in a new

helper so it can be reused elsewhere in the future).
 - Legacy-Id: 4380
This commit is contained in:
Ole Laursen 2012-05-04 11:37:16 +00:00
parent eaf09d9dc5
commit 3ec4dffd24
3 changed files with 68 additions and 7 deletions

43
ietf/utils/textupload.py Normal file
View file

@ -0,0 +1,43 @@
import re
import django.forms
def get_cleaned_text_file_content(uploaded_file):
"""Read uploaded file, try to fix up encoding to UTF-8 and
transform line endings into Unix style, then return the content as
a UTF-8 string. Errors are reported as
django.forms.ValidationError exceptions."""
if not uploaded_file:
return u""
if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000:
raise django.forms.ValidationError("Text file too large (size %s)." % uploaded_file.size)
content = "".join(uploaded_file.chunks())
# try to fixup encoding
import magic
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content) # should look like "text/plain; charset=us-ascii"
if not filetype.startswith("text"):
raise django.forms.ValidationError("Uploaded file does not appear to be a text file.")
match = re.search("charset=([\w-]+)", filetype)
if not match:
raise django.forms.ValidationError("File has unknown encoding.")
encoding = match.group(1)
if "ascii" not in encoding:
try:
content = content.decode(encoding)
except Exception as e:
raise django.forms.ValidationError("Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e))
# turn line-endings into Unix style
content = content.replace("\r\n", "\n").replace("\r", "\n")
return content.encode("utf-8")

View file

@ -131,9 +131,20 @@ class EditCharterTestCase(django.test.TestCase):
q = PyQuery(r.content)
self.assertEquals(len(q('form input[name=txt]')), 1)
# faulty post
test_file = StringIO("\x10\x11\x12") # post binary file
test_file.name = "unnamed"
r = self.client.post(url, dict(txt=test_file))
self.assertEquals(r.status_code, 200)
self.assertTrue("does not appear to be a text file" in r.content)
# post
prev_rev = charter.rev
test_file = StringIO("hello world")
latin_1_snippet = '\xe5' * 10
utf_8_snippet = '\xc3\xa5' * 10
test_file = StringIO("Windows line\r\nMac line\rUnix line\n" + latin_1_snippet)
test_file.name = "unnamed"
r = self.client.post(url, dict(txt=test_file))
@ -143,6 +154,10 @@ class EditCharterTestCase(django.test.TestCase):
self.assertEquals(charter.rev, next_revision(prev_rev))
self.assertTrue("new_revision" in charter.latest_event().type)
with open(os.path.join(self.charter_dir, charter.canonical_name() + "-" + charter.rev + ".txt")) as f:
self.assertEquals(f.read(),
"Windows line\nMac line\nUnix line\n" + utf_8_snippet)
class CharterApproveBallotTestCase(django.test.TestCase):
fixtures = ['names']

View file

@ -14,6 +14,7 @@ from django.utils.safestring import mark_safe
from django.conf import settings
from ietf.utils.mail import send_mail_text, send_mail_preformatted
from ietf.utils.textupload import get_cleaned_text_file_content
from ietf.ietfauth.decorators import has_role, role_required
from ietf.iesg.models import TelechatDate
from ietf.doc.models import *
@ -229,13 +230,14 @@ class UploadForm(forms.Form):
def clean_content(self):
return self.cleaned_data["content"].replace("\r", "")
def clean_txt(self):
return get_cleaned_text_file_content(self.cleaned_data["txt"])
def save(self, wg, rev):
fd = self.cleaned_data['txt']
filename = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (wg.charter.canonical_name(), rev))
with open(filename, 'wb+') as destination:
if fd:
for chunk in fd.chunks():
destination.write(chunk)
with open(filename, 'wb') as destination:
if self.cleaned_data['txt']:
destination.write(self.cleaned_data['txt'])
else:
destination.write(self.cleaned_data['content'])
@ -246,7 +248,8 @@ def submit(request, name):
login = request.user.get_profile()
not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev)))
path = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev))
not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(path)
if not_uploaded_yet:
# this case is special - we recently chartered or rechartered and have no file yet