Added a workaround for the current libmagic which quite easily can mischaracterise text/plain documents as text/x-Algol68. Fixes issues #2941 and #2956.

- Legacy-Id: 17594
This commit is contained in:
Henrik Levkowetz 2020-04-07 18:33:24 +00:00
parent 1561120c27
commit 0df0a87a48
4 changed files with 15 additions and 20 deletions

View file

@ -341,7 +341,7 @@ class FileUploadForm(forms.Form):
mime_type, encoding = validate_mime_type(file, self.mime_types)
if not hasattr(self, 'file_encoding'):
self.file_encoding = {}
self.file_encoding[file.name] = encoding.replace('charset=','') if encoding else None
self.file_encoding[file.name] = encoding or None
if self.mime_types:
if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]:
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))

View file

@ -86,7 +86,7 @@ from ietf.utils.mail import send_mail_message, send_mail_text
from ietf.utils.pipe import pipe
from ietf.utils.pdf import pdf_pages
from ietf.utils.text import xslugify
from ietf.utils.validators import get_mime_type
from ietf.utils.mime import get_mime_type
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,)
@ -221,7 +221,7 @@ def materials_document(request, document, num=None, ext=None):
bytes = file.read()
mtype, chset = get_mime_type(bytes)
content_type = "%s; %s" % (mtype, chset)
content_type = "%s; charset=%s" % (mtype, chset)
file_ext = os.path.splitext(filename)
if len(file_ext) == 2 and file_ext[1] == '.md' and mtype == 'text/plain':

View file

@ -5,7 +5,6 @@
from __future__ import absolute_import, print_function, unicode_literals
import re
import magic
import datetime
import debug # pyflakes:ignore
import six
@ -15,6 +14,8 @@ if six.PY3:
from django.conf import settings
from django.template.defaultfilters import filesizeformat
from ietf.utils.mime import get_mime_type
class MetaData(object):
rev = None
name = None
@ -85,20 +86,7 @@ class FileParser(object):
def parse_file_type(self):
self.fd.file.seek(0)
content = self.fd.file.read(64*1024)
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
if ';' in filetype and 'charset=' in filetype:
mimetype, charset = re.split('; *charset=', filetype)
else:
mimetype = re.split(';', filetype)[0]
charset = 'utf-8'
mimetype, charset = get_mime_type(content)
if not mimetype in self.mimetypes:
self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype))
self.parsed_info.mimetype = mimetype

View file

@ -4,6 +4,7 @@
from __future__ import absolute_import, print_function, unicode_literals
import magic
import re
def get_mime_type(content):
# try to fixup encoding
@ -16,6 +17,12 @@ def get_mime_type(content):
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
return filetype.split('; ', 1)
# Work around silliness in libmagic on OpenSUSE 15.1
filetype = filetype.replace('text/x-Algol68;', 'text/plain;')
if ';' in filetype and 'charset=' in filetype:
mimetype, charset = re.split('; *charset=', filetype)
else:
mimetype = re.split(';', filetype)[0]
charset = 'utf-8'
return mimetype, charset