Check and sanitize text file upload (code is factored out in a new
helper so it can be reused elsewhere in the future). - Legacy-Id: 4380
This commit is contained in:
parent
eaf09d9dc5
commit
3ec4dffd24
43
ietf/utils/textupload.py
Normal file
43
ietf/utils/textupload.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
import re
|
||||
|
||||
import django.forms
|
||||
|
||||
def get_cleaned_text_file_content(uploaded_file):
|
||||
"""Read uploaded file, try to fix up encoding to UTF-8 and
|
||||
transform line endings into Unix style, then return the content as
|
||||
a UTF-8 string. Errors are reported as
|
||||
django.forms.ValidationError exceptions."""
|
||||
|
||||
if not uploaded_file:
|
||||
return u""
|
||||
|
||||
if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000:
|
||||
raise django.forms.ValidationError("Text file too large (size %s)." % uploaded_file.size)
|
||||
|
||||
content = "".join(uploaded_file.chunks())
|
||||
|
||||
# try to fixup encoding
|
||||
import magic
|
||||
m = magic.open(magic.MAGIC_MIME)
|
||||
m.load()
|
||||
|
||||
filetype = m.buffer(content) # should look like "text/plain; charset=us-ascii"
|
||||
|
||||
if not filetype.startswith("text"):
|
||||
raise django.forms.ValidationError("Uploaded file does not appear to be a text file.")
|
||||
|
||||
match = re.search("charset=([\w-]+)", filetype)
|
||||
if not match:
|
||||
raise django.forms.ValidationError("File has unknown encoding.")
|
||||
|
||||
encoding = match.group(1)
|
||||
if "ascii" not in encoding:
|
||||
try:
|
||||
content = content.decode(encoding)
|
||||
except Exception as e:
|
||||
raise django.forms.ValidationError("Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e))
|
||||
|
||||
# turn line-endings into Unix style
|
||||
content = content.replace("\r\n", "\n").replace("\r", "\n")
|
||||
|
||||
return content.encode("utf-8")
|
|
@ -131,9 +131,20 @@ class EditCharterTestCase(django.test.TestCase):
|
|||
q = PyQuery(r.content)
|
||||
self.assertEquals(len(q('form input[name=txt]')), 1)
|
||||
|
||||
# faulty post
|
||||
test_file = StringIO("\x10\x11\x12") # post binary file
|
||||
test_file.name = "unnamed"
|
||||
|
||||
r = self.client.post(url, dict(txt=test_file))
|
||||
self.assertEquals(r.status_code, 200)
|
||||
self.assertTrue("does not appear to be a text file" in r.content)
|
||||
|
||||
# post
|
||||
prev_rev = charter.rev
|
||||
|
||||
test_file = StringIO("hello world")
|
||||
latin_1_snippet = '\xe5' * 10
|
||||
utf_8_snippet = '\xc3\xa5' * 10
|
||||
test_file = StringIO("Windows line\r\nMac line\rUnix line\n" + latin_1_snippet)
|
||||
test_file.name = "unnamed"
|
||||
|
||||
r = self.client.post(url, dict(txt=test_file))
|
||||
|
@ -143,6 +154,10 @@ class EditCharterTestCase(django.test.TestCase):
|
|||
self.assertEquals(charter.rev, next_revision(prev_rev))
|
||||
self.assertTrue("new_revision" in charter.latest_event().type)
|
||||
|
||||
with open(os.path.join(self.charter_dir, charter.canonical_name() + "-" + charter.rev + ".txt")) as f:
|
||||
self.assertEquals(f.read(),
|
||||
"Windows line\nMac line\nUnix line\n" + utf_8_snippet)
|
||||
|
||||
class CharterApproveBallotTestCase(django.test.TestCase):
|
||||
fixtures = ['names']
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ from django.utils.safestring import mark_safe
|
|||
from django.conf import settings
|
||||
|
||||
from ietf.utils.mail import send_mail_text, send_mail_preformatted
|
||||
from ietf.utils.textupload import get_cleaned_text_file_content
|
||||
from ietf.ietfauth.decorators import has_role, role_required
|
||||
from ietf.iesg.models import TelechatDate
|
||||
from ietf.doc.models import *
|
||||
|
@ -229,13 +230,14 @@ class UploadForm(forms.Form):
|
|||
def clean_content(self):
|
||||
return self.cleaned_data["content"].replace("\r", "")
|
||||
|
||||
def clean_txt(self):
|
||||
return get_cleaned_text_file_content(self.cleaned_data["txt"])
|
||||
|
||||
def save(self, wg, rev):
|
||||
fd = self.cleaned_data['txt']
|
||||
filename = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (wg.charter.canonical_name(), rev))
|
||||
with open(filename, 'wb+') as destination:
|
||||
if fd:
|
||||
for chunk in fd.chunks():
|
||||
destination.write(chunk)
|
||||
with open(filename, 'wb') as destination:
|
||||
if self.cleaned_data['txt']:
|
||||
destination.write(self.cleaned_data['txt'])
|
||||
else:
|
||||
destination.write(self.cleaned_data['content'])
|
||||
|
||||
|
@ -246,7 +248,8 @@ def submit(request, name):
|
|||
|
||||
login = request.user.get_profile()
|
||||
|
||||
not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev)))
|
||||
path = os.path.join(settings.CHARTER_PATH, '%s-%s.txt' % (charter.canonical_name(), charter.rev))
|
||||
not_uploaded_yet = charter.rev.endswith("-00") and not os.path.exists(path)
|
||||
|
||||
if not_uploaded_yet:
|
||||
# this case is special - we recently chartered or rechartered and have no file yet
|
||||
|
|
Loading…
Reference in a new issue