From 278d868d288b8955b8ba81292560b76ff8363376 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Tue, 13 Mar 2018 16:45:50 +0000 Subject: [PATCH] Added handling for when file magic doesn't return a definitive encoding for a file. Added a test case to excercise error cases. - Legacy-Id: 14782 --- ietf/meeting/tests_views.py | 6 ++++++ ietf/secr/proceedings/utils.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index bcef50ab0..a1994201b 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -1745,6 +1745,12 @@ class MaterialsTests(TestCase): self.assertEqual(doc.rev,'02') self.assertTrue(session2.sessionpresentation_set.filter(document__type_id=doctype)) + # Test bad encoding + test_file = StringIO(u'

Title

Some\x93text
'.encode('latin1')) + test_file.name = "some.html" + r = self.client.post(url,dict(file=test_file)) + self.assertContains(r, 'Could not identify the file encoding') + def test_upload_minutes_agenda_unscheduled(self): for doctype in ('minutes','agenda'): session = SessionFactory(meeting__type_id='ietf', add_to_schedule=False) diff --git a/ietf/secr/proceedings/utils.py b/ietf/secr/proceedings/utils.py index 707bcb075..2ae0ead9f 100644 --- a/ietf/secr/proceedings/utils.py +++ b/ietf/secr/proceedings/utils.py @@ -39,14 +39,15 @@ def handle_upload_file(file,filename,meeting,subdir, request=None, encoding=None file.open() text = file.read() if encoding: - text = text.decode(encoding) + try: + text = text.decode(encoding) + except LookupError as e: + return "Failure trying to save '%s': Could not identify the file encoding, got '%s'. Hint: Try to upload as UTF-8." % (filename, str(e)[:120]) else: try: text = smart_text(text) except UnicodeDecodeError as e: - msg = "Failure trying to save '%s': %s..." % (filename, str(e)[:120]) - return msg - + return "Failure trying to save '%s'. Hint: Try to upload as UTF-8: %s..." % (filename, str(e)[:120]) # Whole file sanitization; add back what's missing from a complete # document (sanitize will remove these). clean = sanitize_document(text)