Refactored submission code to be clearer and only do mimetype extraction in one place, made the point where files are saved less obscure, fixed bytes/str issues for file read and write, fixed regex strings, fixed variable name visibility due to scope changes in py3.

- Legacy-Id: 16375
This commit is contained in:
Henrik Levkowetz 2019-07-04 15:49:42 +00:00
parent 2e92652eaa
commit 1225f8af6b
7 changed files with 112 additions and 88 deletions

View file

@ -108,9 +108,9 @@ class SubmissionBaseUploadForm(forms.Form):
if not f:
return f
parsed_info = parser_class(f).critical_parse()
if parsed_info.errors:
raise forms.ValidationError(parsed_info.errors)
self.parsed_info = parser_class(f).critical_parse()
if self.parsed_info.errors:
raise forms.ValidationError(self.parsed_info.errors)
return f
@ -215,7 +215,7 @@ class SubmissionBaseUploadForm(forms.Form):
bytes = txt_file.read()
txt_file.seek(0)
try:
text = bytes.decode('utf8')
text = bytes.decode(self.parsed_info.charset)
except UnicodeDecodeError as e:
raise forms.ValidationError('Failed decoding the uploaded file: "%s"' % str(e))
#

View file

@ -12,6 +12,7 @@ from django.urls import reverse as urlreverse
from django.core.validators import ValidationError
from django.contrib.sites.models import Site
from django.template.loader import render_to_string
from django.utils.encoding import force_text
import debug # pyflakes:ignore
@ -31,13 +32,13 @@ def send_submission_confirmation(request, submission, chair_notice=False):
from_email = settings.IDSUBMIT_FROM_EMAIL
(to_email, cc) = gather_address_lists('sub_confirmation_requested',submission=submission)
confirm_url = settings.IDTRACKER_BASE_URL + urlreverse('ietf.submit.views.confirm_submission', kwargs=dict(submission_id=submission.pk, auth_token=generate_access_token(submission.auth_key)))
confirmation_url = settings.IDTRACKER_BASE_URL + urlreverse('ietf.submit.views.confirm_submission', kwargs=dict(submission_id=submission.pk, auth_token=generate_access_token(submission.auth_key)))
status_url = settings.IDTRACKER_BASE_URL + urlreverse('ietf.submit.views.submission_status', kwargs=dict(submission_id=submission.pk, access_token=submission.access_token()))
send_mail(request, to_email, from_email, subject, 'submit/confirm_submission.txt',
{
'submission': submission,
'confirm_url': confirm_url,
'confirmation_url': confirmation_url,
'status_url': status_url,
'chair_notice': chair_notice,
},
@ -172,7 +173,7 @@ def get_reply_to():
address with "plus addressing" using a random string. Guaranteed to be unique"""
local,domain = get_base_submission_message_address().split('@')
while True:
rand = base64.urlsafe_b64encode(os.urandom(12))
rand = force_text(base64.urlsafe_b64encode(os.urandom(12)))
address = "{}+{}@{}".format(local,rand,domain)
q = Message.objects.filter(reply_to=address)
if not q:

View file

@ -77,7 +77,23 @@ class FileParser(object):
def parse_file_type(self):
self.fd.file.seek(0)
content = self.fd.file.read()
mimetype = magic.from_buffer(content, mime=True)
content = self.fd.file.read(64*1024)
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
if ';' in filetype and 'charset=' in filetype:
mimetype, charset = re.split('; *charset=', filetype)
else:
mimetype = re.split(';', filetype)[0]
charset = 'utf-8'
if not mimetype in self.mimetypes:
self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype))
self.parsed_info.mimetype = mimetype
self.parsed_info.charset = charset

View file

@ -1,6 +1,9 @@
# Copyright The IETF Trust 2011-2019, All Rights Reserved
import re
import debug # pyflakes:ignore
from ietf.submit.parsers.base import FileParser
@ -15,58 +18,51 @@ class PlainParser(FileParser):
# no other file parsing is recommended
def critical_parse(self):
super(PlainParser, self).critical_parse()
self.parse_file_charset()
self.check_file_charset()
self.parse_name()
return self.parsed_info
def parse_file_charset(self):
import magic
self.fd.file.seek(0)
content = self.fd.file.read()
if hasattr(magic, "open"):
m = magic.open(magic.MAGIC_MIME)
m.load()
filetype = m.buffer(content)
else:
m = magic.Magic()
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
magic.magic_load(m.cookie, None)
filetype = m.from_buffer(content)
if not 'ascii' in filetype and not 'utf-8' in filetype:
def check_file_charset(self):
charset = self.parsed_info.charset
if not charset in ['us-ascii', 'utf-8',]:
self.parsed_info.add_error('A plain text ASCII document is required. '
'Found an unexpected encoding: "%s". '
'You probably have one or more non-ascii characters in your file.' % filetype
'You probably have one or more non-ascii characters in your file.' % charset
)
if self.fd.charset and charset != self.fd.charset:
self.parsed_info.add_error("Unexpected charset mismatch: upload: %s, libmagic: %s" % (self.fd.charset, charset))
def parse_name(self):
self.fd.file.seek(0)
draftre = re.compile('(draft-\S+)')
revisionre = re.compile('.*-(\d+)$')
draftre = re.compile(r'(draft-\S+)')
revisionre = re.compile(r'.*-(\d+)$')
limit = 80
while limit:
limit -= 1
line = self.fd.readline()
match = draftre.search(line)
if not match:
continue
name = match.group(1)
name = re.sub('^[^\w]+', '', name)
name = re.sub('[^\w]+$', '', name)
name = re.sub('\.txt$', '', name)
extra_chars = re.sub('[0-9a-z\-]', '', name)
if extra_chars:
if len(extra_chars) == 1:
self.parsed_info.add_error(('The document name on the first page, "%s", contains a disallowed character with byte code: %s ' % (name.decode('utf-8','replace'), ord(extra_chars[0]))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
if self.parsed_info.charset in ['us-ascii', 'utf-8']:
while limit:
limit -= 1
line = self.fd.readline().decode(self.parsed_info.charset)
match = draftre.search(line)
if not match:
continue
name = match.group(1)
name = re.sub(r'^[^\w]+', '', name)
name = re.sub(r'[^\w]+$', '', name)
name = re.sub(r'\.txt$', '', name)
extra_chars = re.sub(r'[0-9a-z\-]', '', name)
if extra_chars:
if len(extra_chars) == 1:
self.parsed_info.add_error(('The document name on the first page, "%s", contains a disallowed character with byte code: %s ' % (name.decode('utf-8','replace'), ord(extra_chars[0]))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
else:
self.parsed_info.add_error(('The document name on the first page, "%s", contains disallowed characters with byte codes: %s ' % (name.decode('utf-8','replace'), (', '.join([ str(ord(c)) for c in extra_chars] )))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
match_revision = revisionre.match(name)
if match_revision:
self.parsed_info.metadata.rev = match_revision.group(1)
else:
self.parsed_info.add_error(('The document name on the first page, "%s", contains disallowed characters with byte codes: %s ' % (name.decode('utf-8','replace'), (', '.join([ str(ord(c)) for c in extra_chars] )))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
match_revision = revisionre.match(name)
if match_revision:
self.parsed_info.metadata.rev = match_revision.group(1)
else:
self.parsed_info.add_error('The name found on the first page of the document does not contain a revision: "%s"' % (name.decode('utf-8','replace'),))
name = re.sub('-\d+$', '', name)
self.parsed_info.metadata.name = name
return
self.parsed_info.add_error('The first page of the document does not contain a legitimate name that start with draft-*')
self.parsed_info.add_error('The name found on the first page of the document does not contain a revision: "%s"' % (name.decode('utf-8','replace'),))
name = re.sub(r'-\d+$', '', name)
self.parsed_info.metadata.name = name
return
self.parsed_info.add_error('The first page of the document does not contain a legitimate name that starts with draft-*')

View file

@ -1,7 +1,6 @@
# Copyright The IETF Trust 2011-2019, All Rights Reserved
# -*- coding: utf-8 -*-
import datetime
import email
import os
@ -35,7 +34,7 @@ from ietf.submit.models import Submission, Preapproval
from ietf.submit.mail import add_submission_email, process_response_email
from ietf.utils.mail import outbox, empty_outbox
from ietf.utils.models import VersionInfo
from ietf.utils.test_utils import login_testing_unauthorized, unicontent, TestCase
from ietf.utils.test_utils import login_testing_unauthorized, TestCase
from ietf.utils.draft import Draft
@ -185,17 +184,18 @@ class SubmitTests(TestCase):
return r
def extract_confirm_url(self, confirm_email):
# dig out confirm_email link
msg = confirm_email.get_payload(decode=True)
def extract_confirmation_url(self, confirmation_email):
# dig out confirmation_email link
charset = confirmation_email.get_content_charset()
msg = confirmation_email.get_payload(decode=True).decode(charset)
line_start = "http"
confirm_url = None
confirmation_url = None
for line in msg.split("\n"):
if line.strip().startswith(line_start):
confirm_url = line.strip()
self.assertTrue(confirm_url)
confirmation_url = line.strip()
self.assertTrue(confirmation_url)
return confirm_url
return confirmation_url
def submit_new_wg(self, formats):
# submit new -> supply submitter info -> approve
@ -420,16 +420,16 @@ class SubmitTests(TestCase):
self.assertNotIn("chairs have been copied", str(confirm_email))
self.assertNotIn("mars-chairs@", confirm_email["To"].lower())
confirm_url = self.extract_confirm_url(confirm_email)
confirmation_url = self.extract_confirmation_url(confirm_email)
# go to confirm page
r = self.client.get(confirm_url)
r = self.client.get(confirmation_url)
q = PyQuery(r.content)
self.assertEqual(len(q('[type=submit]:contains("Confirm")')), 1)
# confirm
mailbox_before = len(outbox)
r = self.client.post(confirm_url, {'action':'confirm'})
r = self.client.post(confirmation_url, {'action':'confirm'})
self.assertEqual(r.status_code, 302)
new_docevents = draft.docevent_set.exclude(pk__in=[event.pk for event in old_docevents])
@ -558,16 +558,16 @@ class SubmitTests(TestCase):
self.assertTrue("submitter@example.com" in confirm_email["To"])
self.assertFalse("chairs have been copied" in str(confirm_email))
confirm_url = self.extract_confirm_url(outbox[-1])
confirmation_url = self.extract_confirmation_url(outbox[-1])
# go to confirm page
r = self.client.get(confirm_url)
r = self.client.get(confirmation_url)
q = PyQuery(r.content)
self.assertEqual(len(q('[type=submit]:contains("Confirm")')), 1)
# confirm
mailbox_before = len(outbox)
r = self.client.post(confirm_url, {'action':'confirm'})
r = self.client.post(confirmation_url, {'action':'confirm'})
self.assertEqual(r.status_code, 302)
draft = Document.objects.get(docalias__name=name)
@ -613,10 +613,10 @@ class SubmitTests(TestCase):
status_url = r["Location"]
r = self.client.get(status_url)
self.assertEqual(len(outbox), mailbox_before + 1)
confirm_url = self.extract_confirm_url(outbox[-1])
confirmation_url = self.extract_confirmation_url(outbox[-1])
self.assertFalse("chairs have been copied" in str(outbox[-1]))
mailbox_before = len(outbox)
r = self.client.post(confirm_url, {'action':'confirm'})
r = self.client.post(confirmation_url, {'action':'confirm'})
self.assertEqual(r.status_code, 302)
self.assertEqual(len(outbox), mailbox_before+3)
draft = Document.objects.get(docalias__name=name)
@ -641,9 +641,9 @@ class SubmitTests(TestCase):
status_url = r["Location"]
r = self.client.get(status_url)
self.assertEqual(len(outbox), mailbox_before + 1)
confirm_url = self.extract_confirm_url(outbox[-1])
confirmation_url = self.extract_confirmation_url(outbox[-1])
mailbox_before = len(outbox)
r = self.client.post(confirm_url, {'action':'cancel'})
r = self.client.post(confirmation_url, {'action':'cancel'})
self.assertEqual(r.status_code, 302)
self.assertEqual(len(outbox), mailbox_before)
draft = Document.objects.get(docalias__name=name)
@ -818,7 +818,8 @@ class SubmitTests(TestCase):
# status page as unpriviliged => no edit button
r = self.client.get(unprivileged_status_url)
self.assertContains(r, "submission status of %s" % name)
print(r.content)
self.assertContains(r, "Submission status of %s" % name)
q = PyQuery(r.content)
adjust_button = q('[type=submit]:contains("Adjust")')
self.assertEqual(len(adjust_button), 0)
@ -1688,7 +1689,7 @@ class RefsTests(TestCase):
group = None
file, __ = submission_file('draft-some-subject', '00', group, 'txt', "test_submission.txt", )
draft = Draft(file.read().decode('utf-8'), file.name)
draft = Draft(file.read(), file.name)
refs = draft.get_refs()
self.assertEqual(refs['rfc2119'], 'norm')
self.assertEqual(refs['rfc8174'], 'norm')

View file

@ -458,7 +458,7 @@ def ensure_person_email_info_exists(name, email, docname):
person.name = name
person.name_from_draft = name
log.assertion('isinstance(person.name, six.text_type)')
person.ascii = unidecode_name(person.name).decode('ascii')
person.ascii = unidecode_name(person.name)
person.save()
else:
person.name_from_draft = name
@ -595,11 +595,8 @@ def expire_submission(submission, by):
SubmissionEvent.objects.create(submission=submission, by=by, desc="Cancelled expired submission")
def get_draft_meta(form):
authors = []
def save_files(form):
file_name = {}
abstract = None
file_size = None
for ext in list(form.fields.keys()):
if not ext in form.formats:
continue
@ -612,7 +609,13 @@ def get_draft_meta(form):
with open(name, 'wb+') as destination:
for chunk in f.chunks():
destination.write(chunk)
return file_name
def get_draft_meta(form, saved_files):
authors = []
file_name = saved_files
abstract = None
file_size = None
if form.cleaned_data['xml']:
if not ('txt' in form.cleaned_data and form.cleaned_data['txt']):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
@ -640,7 +643,7 @@ def get_draft_meta(form):
# be retrieved from the generated text file. Provide a
# parsed draft object to get at that kind of information.
with open(file_name['txt']) as txt_file:
form.parsed_draft = Draft(txt_file.read().decode('utf8'), txt_file.name)
form.parsed_draft = Draft(txt_file.read(), txt_file.name)
else:
file_size = form.cleaned_data['txt'].size

View file

@ -31,18 +31,19 @@ from ietf.submit.models import (Submission, Preapproval,
from ietf.submit.utils import ( approvable_submissions_for_user, preapprovals_for_user,
recently_approved_by_user, validate_submission, create_submission_event, docevent_from_submission,
post_submission, cancel_submission, rename_submission_files, remove_submission_files, get_draft_meta,
get_submission, fill_in_submission, apply_checkers, send_confirmation_emails )
get_submission, fill_in_submission, apply_checkers, send_confirmation_emails, save_files )
from ietf.stats.utils import clean_country_name
from ietf.utils.accesstoken import generate_access_token
from ietf.utils.log import log
from ietf.utils.mail import send_mail_message
from ietf.utils.mail import parseaddr, send_mail_message
def upload_submission(request):
if request.method == 'POST':
try:
form = SubmissionManualUploadForm(request, data=request.POST, files=request.FILES)
if form.is_valid():
authors, abstract, file_name, file_size = get_draft_meta(form)
saved_files = save_files(form)
authors, abstract, file_name, file_size = get_draft_meta(form, saved_files)
submission = get_submission(form)
try:
@ -87,7 +88,7 @@ def api_submit(request):
if request.method == 'GET':
return render(request, 'submit/api_submit_info.html')
elif request.method == 'POST':
e = None
exception = None
try:
form = SubmissionAutoUploadForm(request, data=request.POST, files=request.FILES)
if form.is_valid():
@ -101,7 +102,8 @@ def api_submit(request):
if not hasattr(user, 'person'):
return err(400, "No person with username %s" % username)
authors, abstract, file_name, file_size = get_draft_meta(form)
saved_files = save_files(form)
authors, abstract, file_name, file_size = get_draft_meta(form, saved_files)
for a in authors:
if not a['email']:
raise ValidationError("Missing email address for author %s" % a)
@ -142,14 +144,17 @@ def api_submit(request):
else:
raise ValidationError(form.errors)
except IOError as e:
exception = e
return err(500, "IO Error: %s" % str(e))
except ValidationError as e:
exception = e
return err(400, "Validation Error: %s" % str(e))
except Exception as e:
exception = e
raise
return err(500, "Exception: %s" % str(e))
finally:
if e and submission:
if exception and submission:
remove_submission_files(submission)
submission.delete()
else:
@ -213,8 +218,10 @@ def submission_status(request, submission_id, access_token=None):
# Begin common code chunk
addrs = gather_address_lists('sub_confirmation_requested',submission=submission)
confirmation_list = addrs.to
confirmation_list.extend(addrs.cc)
addresses = addrs.to
addresses.extend(addrs.cc)
# Convert from RFC 2822 format if needed
confirmation_list = [ "%s <%s>" % parseaddr(a) for a in addresses ]
requires_group_approval = (submission.rev == '00'
and submission.group and submission.group.features.req_subm_approval