Check the whole draft submission text when checking mime type, and catch decoding errors and raise ValidationError when converting submission form txt file to unicode for Draft().

- Legacy-Id: 13759
This commit is contained in:
Henrik Levkowetz 2017-07-02 21:32:44 +00:00
parent 4b28605774
commit 274548f1b1
3 changed files with 13 additions and 6 deletions

View file

@ -203,11 +203,18 @@ class SubmissionUploadForm(forms.Form):
# try to parse it
txt_file = self.cleaned_data['txt']
txt_file.seek(0)
self.parsed_draft = Draft(txt_file.read().decode('utf8'), txt_file.name)
bytes = txt_file.read()
txt_file.seek(0)
try:
text = bytes.decode('utf8')
except UnicodeDecodeError as e:
raise forms.ValidationError('Failed decoding the uploaded file: "%s"' % str(e))
#
self.parsed_draft = Draft(text, txt_file.name)
self.filename = self.parsed_draft.filename
self.revision = self.parsed_draft.revision
self.title = self.parsed_draft.get_title()
txt_file.seek(0)
if not self.filename:
raise forms.ValidationError("Could not extract a valid draft name from the upload"

View file

@ -76,7 +76,7 @@ class FileParser(object):
def parse_file_type(self):
self.fd.file.seek(0)
content = self.fd.file.read(4096)
content = self.fd.file.read()
mimetype = magic.from_buffer(content, mime=True)
if not mimetype in self.mimetypes:
self.parsed_info.add_error(u'Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype))

View file

@ -21,7 +21,7 @@ class PlainParser(FileParser):
def parse_file_charset(self):
import magic
self.fd.file.seek(0)
content = self.fd.file.read(4096)
content = self.fd.file.read()
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
@ -31,8 +31,8 @@ class PlainParser(FileParser):
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
if not 'ascii' in filetype:
self.parsed_info.add_error('A plain text ASCII document must be submitted.')
if not 'ascii' in filetype and not 'utf-8' in filetype:
self.parsed_info.add_error('A plain text ASCII document is required. Found an unexpected encoding: "%s"' % filetype)
def parse_name(self):
self.fd.file.seek(0)