Moved the code that generates .txt and .html draft files from .xml to a place where potential errors can be caught and displayed better. Related to ticket #2814.

- Legacy-Id: 16862
This commit is contained in:
Henrik Levkowetz 2019-10-15 14:33:35 +00:00
parent 2d9ff6ca70
commit 994e0ba6cc
4 changed files with 106 additions and 75 deletions

View file

@ -11,15 +11,22 @@ import datetime
import email
import pytz
import six
import sys
import tempfile
import xml2rfc
from email.utils import formataddr
from unidecode import unidecode
if six.PY2:
from StringIO import StringIO
else:
from io import StringIO
from django import forms
from django.conf import settings
from django.utils.html import mark_safe
from django.utils.html import mark_safe # type:ignore
from django.urls import reverse as urlreverse
from django.utils.encoding import force_str
@ -39,6 +46,7 @@ from ietf.submit.parsers.pdf_parser import PDFParser
from ietf.submit.parsers.plain_parser import PlainParser
from ietf.submit.parsers.ps_parser import PSParser
from ietf.submit.parsers.xml_parser import XMLParser
from ietf.utils import log
from ietf.utils.draft import Draft
class SubmissionBaseUploadForm(forms.Form):
@ -128,6 +136,20 @@ class SubmissionBaseUploadForm(forms.Form):
return self.clean_file("xml", XMLParser)
def clean(self):
def format_messages(where, e, log):
out = log.write_out.getvalue().splitlines()
err = log.write_err.getvalue().splitlines()
m = str(e)
if m:
m = [ m ]
else:
import traceback
typ, val, tb = sys.exc_info()
m = traceback.format_exception(typ, val, tb)
m = [ l.replace('\n ', ':\n ') for l in m ]
msgs = [s for s in (["Error from xml2rfc (%s):" % (where,)] + m + out + err) if s]
return msgs
if self.shutdown and not has_role(self.request.user, "Secretariat"):
raise forms.ValidationError('The submission tool is currently shut down')
@ -145,6 +167,9 @@ class SubmissionBaseUploadForm(forms.Form):
xml_file = self.cleaned_data.get('xml')
name, ext = os.path.splitext(os.path.basename(xml_file.name))
tfh, tfn = tempfile.mkstemp(prefix=name+'-', suffix='.xml')
file_name = {}
xml2rfc.log.write_out = StringIO() # open(os.devnull, "w")
xml2rfc.log.write_err = StringIO() # open(os.devnull, "w")
try:
# We need to write the xml file to disk in order to hand it
# over to the xml parser. XXX FIXME: investigate updating
@ -154,33 +179,15 @@ class SubmissionBaseUploadForm(forms.Form):
for chunk in xml_file.chunks():
tf.write(chunk)
os.environ["XML_LIBRARY"] = settings.XML_LIBRARY
# --- Parse the xml ---
try:
parser = xml2rfc.XmlRfcParser(str(tfn), quiet=True)
self.xmltree = parser.parse(normalize=True)
root = self.xmltree.getroot()
ver = root.get('version', '2')
if ver == '2':
ok, errors = self.xmltree.validate()
else:
# XXX TODO: Add v3 validation
ok, errors = True, ''
except Exception as exc:
raise forms.ValidationError("An exception occurred when trying to process the XML file: %s" % exc)
if not ok:
# Each error has properties:
#
# message: the message text
# domain: the domain ID (see lxml.etree.ErrorDomains)
# type: the message type ID (see lxml.etree.ErrorTypes)
# level: the log level ID (see lxml.etree.ErrorLevels)
# line: the line at which the message originated (if applicable)
# column: the character column at which the message originated (if applicable)
# filename: the name of the file in which the message originated (if applicable)
raise forms.ValidationError(
[ forms.ValidationError("One or more XML validation errors occurred when processing the XML file:") ] +
[ forms.ValidationError("%s: Line %s: %s" % (xml_file.name, e.line, e.message), code="%s"%e.type) for e in errors ]
)
self.xmlroot = self.xmltree.getroot()
self.xmlroot = self.xmltree.getroot()
xml_version = self.xmlroot.get('version', '2')
except Exception as e:
raise forms.ValidationError("An exception occurred when trying to [arse the XML file: %s" % e)
draftname = self.xmlroot.attrib.get('docName')
if draftname is None:
raise forms.ValidationError("No docName attribute found in the xml root element")
@ -212,8 +219,76 @@ class SubmissionBaseUploadForm(forms.Form):
if info[item]:
info[item] = info[item].strip()
self.authors.append(info)
except forms.ValidationError:
raise
# --- Prep the xml ---
file_name['xml'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.%s' % (self.filename, self.revision, ext))
try:
if xml_version == '3':
prep = xml2rfc.PrepToolWriter(self.xmltree, quiet=True)
self.xmltree.tree = prep.prep()
if self.xmltree.tree == None:
raise forms.ValidationError("Error from xml2rfc (prep): %s" % prep.errors)
except Exception as e:
msgs = format_messages('prep', e, xml2rfc.log)
raise forms.ValidationError(msgs)
# --- Convert to txt ---
if not ('txt' in self.cleaned_data and self.cleaned_data['txt']):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (self.filename, self.revision))
try:
if xml_version != '3':
pagedwriter = xml2rfc.PaginatedTextRfcWriter(self.xmltree, quiet=True)
pagedwriter.write(file_name['txt'])
else:
writer = xml2rfc.TextWriter(self.xmltree, quiet=True)
writer.write(file_name['txt'])
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['txt']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
msgs = format_messages('txt', e, xml2rfc.log)
raise forms.ValidationError(msgs)
# --- Convert to xml ---
if xml_version == '3':
try:
file_name['html'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.html' % (self.filename, self.revision))
writer = xml2rfc.HtmlWriter(self.xmltree, quiet=True)
writer.write(file_name['html'])
self.file_types.append('.html')
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['html']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
msgs = format_messages('html', e, xml2rfc.log)
raise forms.ValidationError(msgs)
if xml_version == '2':
ok, errors = self.xmltree.validate()
else:
ok, errors = True, ''
if not ok:
# Each error has properties:
#
# message: the message text
# domain: the domain ID (see lxml.etree.ErrorDomains)
# type: the message type ID (see lxml.etree.ErrorTypes)
# level: the log level ID (see lxml.etree.ErrorLevels)
# line: the line at which the message originated (if applicable)
# column: the character column at which the message originated (if applicable)
# filename: the name of the file in which the message originated (if applicable)
raise forms.ValidationError(
[ forms.ValidationError("One or more XML validation errors occurred when processing the XML file:") ] +
[ forms.ValidationError("%s: Line %s: %s" % (xml_file.name, r.line, r.message), code="%s"%r.type) for r in errors ]
)
finally:
os.close(tfh)
os.unlink(tfn)

View file

@ -8,7 +8,7 @@ import os
from django import template
from django.conf import settings
from django.utils.html import mark_safe, escape
from django.utils.html import mark_safe, escape # type:ignore
register = template.Library()

View file

@ -1774,7 +1774,7 @@ class ApiSubmitTests(TestCase):
self.assertContains(r, expected, status_code=400)
def test_api_submit_no_title(self):
r, author, name = self.post_submission('00', title="")
r, author, name = self.post_submission('00', title=" ")
expected = "Could not extract a valid title from the upload"
self.assertContains(r, expected, status_code=400)

View file

@ -9,7 +9,6 @@ import io
import os
import re
import six # pyflakes:ignore
import xml2rfc
if six.PY3:
from typing import Callable, Optional # pyflakes:ignore
@ -649,57 +648,14 @@ def get_draft_meta(form, saved_files):
file_name = saved_files
abstract = None
file_size = None
xml2rfc.log.write_out = open(os.devnull, "w")
xml2rfc.log.write_err = open(os.devnull, "w")
if form.cleaned_data['xml']:
try:
xmlroot = form.xmltree.getroot()
xml_version = xmlroot.get('version', '2')
if xml_version == '3':
prep = xml2rfc.PrepToolWriter(form.xmltree, quiet=True)
form.xmltree.tree = prep.prep()
if form.xmltree.tree == None:
raise ValidationError("Error from xml2rfc (prep): %s" % prep.errors)
except Exception as e:
raise ValidationError("Error from xml2rfc (prep): %s" % e)
if not ('txt' in form.cleaned_data and form.cleaned_data['txt']):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
try:
if xml_version != '3':
pagedwriter = xml2rfc.PaginatedTextRfcWriter(form.xmltree, quiet=True)
pagedwriter.write(file_name['txt'])
else:
writer = xml2rfc.TextWriter(form.xmltree, quiet=True)
writer.write(file_name['txt'])
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['txt']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
raise ValidationError("Error from xml2rfc (text): %s" % e)
file_size = os.stat(file_name['txt']).st_size
if xml_version == '3':
try:
file_name['html'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.html' % (form.filename, form.revision))
writer = xml2rfc.HtmlWriter(form.xmltree, quiet=True)
writer.write(file_name['html'])
form.file_types.append('.html')
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['html']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
raise ValidationError("Error from xml2rfc (html): %s" % e)
# Some meta-information, such as the page-count, can only
# be retrieved from the generated text file. Provide a
# parsed draft object to get at that kind of information.
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
file_size = os.stat(file_name['txt']).st_size
with io.open(file_name['txt']) as txt_file:
form.parsed_draft = Draft(txt_file.read(), txt_file.name)
else:
file_size = form.cleaned_data['txt'].size