Added handling for when file magic doesn't return a definitive encoding for a file. Added a test case to excercise error cases.

- Legacy-Id: 14782
This commit is contained in:
Henrik Levkowetz 2018-03-13 16:45:50 +00:00
parent 8cc61e0cf7
commit 278d868d28
2 changed files with 11 additions and 4 deletions

View file

@ -1745,6 +1745,12 @@ class MaterialsTests(TestCase):
self.assertEqual(doc.rev,'02')
self.assertTrue(session2.sessionpresentation_set.filter(document__type_id=doctype))
# Test bad encoding
test_file = StringIO(u'<html><h1>Title</h1><section>Some\x93text</section></html>'.encode('latin1'))
test_file.name = "some.html"
r = self.client.post(url,dict(file=test_file))
self.assertContains(r, 'Could not identify the file encoding')
def test_upload_minutes_agenda_unscheduled(self):
for doctype in ('minutes','agenda'):
session = SessionFactory(meeting__type_id='ietf', add_to_schedule=False)

View file

@ -39,14 +39,15 @@ def handle_upload_file(file,filename,meeting,subdir, request=None, encoding=None
file.open()
text = file.read()
if encoding:
text = text.decode(encoding)
try:
text = text.decode(encoding)
except LookupError as e:
return "Failure trying to save '%s': Could not identify the file encoding, got '%s'. Hint: Try to upload as UTF-8." % (filename, str(e)[:120])
else:
try:
text = smart_text(text)
except UnicodeDecodeError as e:
msg = "Failure trying to save '%s': %s..." % (filename, str(e)[:120])
return msg
return "Failure trying to save '%s'. Hint: Try to upload as UTF-8: %s..." % (filename, str(e)[:120])
# Whole file sanitization; add back what's missing from a complete
# document (sanitize will remove these).
clean = sanitize_document(text)