From 0df0a87a480c2689a04589233e1be073f9ad5ac9 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Tue, 7 Apr 2020 18:33:24 +0000 Subject: [PATCH] Added a workaround for the current libmagic which quite easily can mischaracterise text/plain documents as text/x-Algol68. Fixes issues #2941 and #2956. - Legacy-Id: 17594 --- ietf/meeting/forms.py | 2 +- ietf/meeting/views.py | 4 ++-- ietf/submit/parsers/base.py | 18 +++--------------- ietf/utils/mime.py | 11 +++++++++-- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/ietf/meeting/forms.py b/ietf/meeting/forms.py index 2c145816f..30b6910a8 100644 --- a/ietf/meeting/forms.py +++ b/ietf/meeting/forms.py @@ -341,7 +341,7 @@ class FileUploadForm(forms.Form): mime_type, encoding = validate_mime_type(file, self.mime_types) if not hasattr(self, 'file_encoding'): self.file_encoding = {} - self.file_encoding[file.name] = encoding.replace('charset=','') if encoding else None + self.file_encoding[file.name] = encoding or None if self.mime_types: if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]: raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type)) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 51a124523..d5bfae822 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -86,7 +86,7 @@ from ietf.utils.mail import send_mail_message, send_mail_text from ietf.utils.pipe import pipe from ietf.utils.pdf import pdf_pages from ietf.utils.text import xslugify -from ietf.utils.validators import get_mime_type +from ietf.utils.mime import get_mime_type from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm, InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,) @@ -221,7 +221,7 @@ def materials_document(request, document, num=None, ext=None): bytes = file.read() mtype, chset = get_mime_type(bytes) - content_type = "%s; %s" % (mtype, chset) + content_type = "%s; charset=%s" % (mtype, chset) file_ext = os.path.splitext(filename) if len(file_ext) == 2 and file_ext[1] == '.md' and mtype == 'text/plain': diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 05674420f..4eb3d3b8f 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -5,7 +5,6 @@ from __future__ import absolute_import, print_function, unicode_literals import re -import magic import datetime import debug # pyflakes:ignore import six @@ -15,6 +14,8 @@ if six.PY3: from django.conf import settings from django.template.defaultfilters import filesizeformat +from ietf.utils.mime import get_mime_type + class MetaData(object): rev = None name = None @@ -85,20 +86,7 @@ class FileParser(object): def parse_file_type(self): self.fd.file.seek(0) content = self.fd.file.read(64*1024) - if hasattr(magic, "open"): - m = magic.open(magic.MAGIC_MIME) - m.load() - filetype = m.buffer(content) - else: - m = magic.Magic() - m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) - magic.magic_load(m.cookie, None) - filetype = m.from_buffer(content) - if ';' in filetype and 'charset=' in filetype: - mimetype, charset = re.split('; *charset=', filetype) - else: - mimetype = re.split(';', filetype)[0] - charset = 'utf-8' + mimetype, charset = get_mime_type(content) if not mimetype in self.mimetypes: self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype)) self.parsed_info.mimetype = mimetype diff --git a/ietf/utils/mime.py b/ietf/utils/mime.py index 4c58d6629..84d197fc7 100644 --- a/ietf/utils/mime.py +++ b/ietf/utils/mime.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, print_function, unicode_literals import magic +import re def get_mime_type(content): # try to fixup encoding @@ -16,6 +17,12 @@ def get_mime_type(content): m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) - - return filetype.split('; ', 1) + # Work around silliness in libmagic on OpenSUSE 15.1 + filetype = filetype.replace('text/x-Algol68;', 'text/plain;') + if ';' in filetype and 'charset=' in filetype: + mimetype, charset = re.split('; *charset=', filetype) + else: + mimetype = re.split(';', filetype)[0] + charset = 'utf-8' + return mimetype, charset