From 274548f1b1e9efa0f776817efa1fb66de494d351 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Sun, 2 Jul 2017 21:32:44 +0000 Subject: [PATCH] Check the whole draft submission text when checking mime type, and catch decoding errors and raise ValidationError when converting submission form txt file to unicode for Draft(). - Legacy-Id: 13759 --- ietf/submit/forms.py | 11 +++++++++-- ietf/submit/parsers/base.py | 2 +- ietf/submit/parsers/plain_parser.py | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 8c7d0eeae..6ea057454 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -203,11 +203,18 @@ class SubmissionUploadForm(forms.Form): # try to parse it txt_file = self.cleaned_data['txt'] txt_file.seek(0) - self.parsed_draft = Draft(txt_file.read().decode('utf8'), txt_file.name) + bytes = txt_file.read() + txt_file.seek(0) + try: + text = bytes.decode('utf8') + except UnicodeDecodeError as e: + raise forms.ValidationError('Failed decoding the uploaded file: "%s"' % str(e)) + # + self.parsed_draft = Draft(text, txt_file.name) self.filename = self.parsed_draft.filename self.revision = self.parsed_draft.revision self.title = self.parsed_draft.get_title() - txt_file.seek(0) + if not self.filename: raise forms.ValidationError("Could not extract a valid draft name from the upload" diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 2c4707b3a..94c928a87 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -76,7 +76,7 @@ class FileParser(object): def parse_file_type(self): self.fd.file.seek(0) - content = self.fd.file.read(4096) + content = self.fd.file.read() mimetype = magic.from_buffer(content, mime=True) if not mimetype in self.mimetypes: self.parsed_info.add_error(u'Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype)) diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index daf948840..4de4ff0b6 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -21,7 +21,7 @@ class PlainParser(FileParser): def parse_file_charset(self): import magic self.fd.file.seek(0) - content = self.fd.file.read(4096) + content = self.fd.file.read() if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() @@ -31,8 +31,8 @@ class PlainParser(FileParser): m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) - if not 'ascii' in filetype: - self.parsed_info.add_error('A plain text ASCII document must be submitted.') + if not 'ascii' in filetype and not 'utf-8' in filetype: + self.parsed_info.add_error('A plain text ASCII document is required. Found an unexpected encoding: "%s"' % filetype) def parse_name(self): self.fd.file.seek(0)