Moved the code that generates .txt and .html draft files from .xml to a place where potential errors can be caught and displayed better. Related to ticket #2814.

- Legacy-Id: 16862
This commit is contained in:
Henrik Levkowetz 2019-10-15 14:33:35 +00:00
parent 2d9ff6ca70
commit 994e0ba6cc
4 changed files with 106 additions and 75 deletions

View file

@ -11,15 +11,22 @@ import datetime
import email import email
import pytz import pytz
import six import six
import sys
import tempfile import tempfile
import xml2rfc import xml2rfc
from email.utils import formataddr from email.utils import formataddr
from unidecode import unidecode from unidecode import unidecode
if six.PY2:
from StringIO import StringIO
else:
from io import StringIO
from django import forms from django import forms
from django.conf import settings from django.conf import settings
from django.utils.html import mark_safe from django.utils.html import mark_safe # type:ignore
from django.urls import reverse as urlreverse from django.urls import reverse as urlreverse
from django.utils.encoding import force_str from django.utils.encoding import force_str
@ -39,6 +46,7 @@ from ietf.submit.parsers.pdf_parser import PDFParser
from ietf.submit.parsers.plain_parser import PlainParser from ietf.submit.parsers.plain_parser import PlainParser
from ietf.submit.parsers.ps_parser import PSParser from ietf.submit.parsers.ps_parser import PSParser
from ietf.submit.parsers.xml_parser import XMLParser from ietf.submit.parsers.xml_parser import XMLParser
from ietf.utils import log
from ietf.utils.draft import Draft from ietf.utils.draft import Draft
class SubmissionBaseUploadForm(forms.Form): class SubmissionBaseUploadForm(forms.Form):
@ -128,6 +136,20 @@ class SubmissionBaseUploadForm(forms.Form):
return self.clean_file("xml", XMLParser) return self.clean_file("xml", XMLParser)
def clean(self): def clean(self):
def format_messages(where, e, log):
out = log.write_out.getvalue().splitlines()
err = log.write_err.getvalue().splitlines()
m = str(e)
if m:
m = [ m ]
else:
import traceback
typ, val, tb = sys.exc_info()
m = traceback.format_exception(typ, val, tb)
m = [ l.replace('\n ', ':\n ') for l in m ]
msgs = [s for s in (["Error from xml2rfc (%s):" % (where,)] + m + out + err) if s]
return msgs
if self.shutdown and not has_role(self.request.user, "Secretariat"): if self.shutdown and not has_role(self.request.user, "Secretariat"):
raise forms.ValidationError('The submission tool is currently shut down') raise forms.ValidationError('The submission tool is currently shut down')
@ -145,6 +167,9 @@ class SubmissionBaseUploadForm(forms.Form):
xml_file = self.cleaned_data.get('xml') xml_file = self.cleaned_data.get('xml')
name, ext = os.path.splitext(os.path.basename(xml_file.name)) name, ext = os.path.splitext(os.path.basename(xml_file.name))
tfh, tfn = tempfile.mkstemp(prefix=name+'-', suffix='.xml') tfh, tfn = tempfile.mkstemp(prefix=name+'-', suffix='.xml')
file_name = {}
xml2rfc.log.write_out = StringIO() # open(os.devnull, "w")
xml2rfc.log.write_err = StringIO() # open(os.devnull, "w")
try: try:
# We need to write the xml file to disk in order to hand it # We need to write the xml file to disk in order to hand it
# over to the xml parser. XXX FIXME: investigate updating # over to the xml parser. XXX FIXME: investigate updating
@ -154,33 +179,15 @@ class SubmissionBaseUploadForm(forms.Form):
for chunk in xml_file.chunks(): for chunk in xml_file.chunks():
tf.write(chunk) tf.write(chunk)
os.environ["XML_LIBRARY"] = settings.XML_LIBRARY os.environ["XML_LIBRARY"] = settings.XML_LIBRARY
# --- Parse the xml ---
try: try:
parser = xml2rfc.XmlRfcParser(str(tfn), quiet=True) parser = xml2rfc.XmlRfcParser(str(tfn), quiet=True)
self.xmltree = parser.parse(normalize=True) self.xmltree = parser.parse(normalize=True)
root = self.xmltree.getroot() self.xmlroot = self.xmltree.getroot()
ver = root.get('version', '2') xml_version = self.xmlroot.get('version', '2')
if ver == '2': except Exception as e:
ok, errors = self.xmltree.validate() raise forms.ValidationError("An exception occurred when trying to [arse the XML file: %s" % e)
else:
# XXX TODO: Add v3 validation
ok, errors = True, ''
except Exception as exc:
raise forms.ValidationError("An exception occurred when trying to process the XML file: %s" % exc)
if not ok:
# Each error has properties:
#
# message: the message text
# domain: the domain ID (see lxml.etree.ErrorDomains)
# type: the message type ID (see lxml.etree.ErrorTypes)
# level: the log level ID (see lxml.etree.ErrorLevels)
# line: the line at which the message originated (if applicable)
# column: the character column at which the message originated (if applicable)
# filename: the name of the file in which the message originated (if applicable)
raise forms.ValidationError(
[ forms.ValidationError("One or more XML validation errors occurred when processing the XML file:") ] +
[ forms.ValidationError("%s: Line %s: %s" % (xml_file.name, e.line, e.message), code="%s"%e.type) for e in errors ]
)
self.xmlroot = self.xmltree.getroot()
draftname = self.xmlroot.attrib.get('docName') draftname = self.xmlroot.attrib.get('docName')
if draftname is None: if draftname is None:
raise forms.ValidationError("No docName attribute found in the xml root element") raise forms.ValidationError("No docName attribute found in the xml root element")
@ -212,8 +219,76 @@ class SubmissionBaseUploadForm(forms.Form):
if info[item]: if info[item]:
info[item] = info[item].strip() info[item] = info[item].strip()
self.authors.append(info) self.authors.append(info)
except forms.ValidationError:
raise # --- Prep the xml ---
file_name['xml'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.%s' % (self.filename, self.revision, ext))
try:
if xml_version == '3':
prep = xml2rfc.PrepToolWriter(self.xmltree, quiet=True)
self.xmltree.tree = prep.prep()
if self.xmltree.tree == None:
raise forms.ValidationError("Error from xml2rfc (prep): %s" % prep.errors)
except Exception as e:
msgs = format_messages('prep', e, xml2rfc.log)
raise forms.ValidationError(msgs)
# --- Convert to txt ---
if not ('txt' in self.cleaned_data and self.cleaned_data['txt']):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (self.filename, self.revision))
try:
if xml_version != '3':
pagedwriter = xml2rfc.PaginatedTextRfcWriter(self.xmltree, quiet=True)
pagedwriter.write(file_name['txt'])
else:
writer = xml2rfc.TextWriter(self.xmltree, quiet=True)
writer.write(file_name['txt'])
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['txt']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
msgs = format_messages('txt', e, xml2rfc.log)
raise forms.ValidationError(msgs)
# --- Convert to xml ---
if xml_version == '3':
try:
file_name['html'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.html' % (self.filename, self.revision))
writer = xml2rfc.HtmlWriter(self.xmltree, quiet=True)
writer.write(file_name['html'])
self.file_types.append('.html')
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['html']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
msgs = format_messages('html', e, xml2rfc.log)
raise forms.ValidationError(msgs)
if xml_version == '2':
ok, errors = self.xmltree.validate()
else:
ok, errors = True, ''
if not ok:
# Each error has properties:
#
# message: the message text
# domain: the domain ID (see lxml.etree.ErrorDomains)
# type: the message type ID (see lxml.etree.ErrorTypes)
# level: the log level ID (see lxml.etree.ErrorLevels)
# line: the line at which the message originated (if applicable)
# column: the character column at which the message originated (if applicable)
# filename: the name of the file in which the message originated (if applicable)
raise forms.ValidationError(
[ forms.ValidationError("One or more XML validation errors occurred when processing the XML file:") ] +
[ forms.ValidationError("%s: Line %s: %s" % (xml_file.name, r.line, r.message), code="%s"%r.type) for r in errors ]
)
finally: finally:
os.close(tfh) os.close(tfh)
os.unlink(tfn) os.unlink(tfn)

View file

@ -8,7 +8,7 @@ import os
from django import template from django import template
from django.conf import settings from django.conf import settings
from django.utils.html import mark_safe, escape from django.utils.html import mark_safe, escape # type:ignore
register = template.Library() register = template.Library()

View file

@ -1774,7 +1774,7 @@ class ApiSubmitTests(TestCase):
self.assertContains(r, expected, status_code=400) self.assertContains(r, expected, status_code=400)
def test_api_submit_no_title(self): def test_api_submit_no_title(self):
r, author, name = self.post_submission('00', title="") r, author, name = self.post_submission('00', title=" ")
expected = "Could not extract a valid title from the upload" expected = "Could not extract a valid title from the upload"
self.assertContains(r, expected, status_code=400) self.assertContains(r, expected, status_code=400)

View file

@ -9,7 +9,6 @@ import io
import os import os
import re import re
import six # pyflakes:ignore import six # pyflakes:ignore
import xml2rfc
if six.PY3: if six.PY3:
from typing import Callable, Optional # pyflakes:ignore from typing import Callable, Optional # pyflakes:ignore
@ -649,57 +648,14 @@ def get_draft_meta(form, saved_files):
file_name = saved_files file_name = saved_files
abstract = None abstract = None
file_size = None file_size = None
xml2rfc.log.write_out = open(os.devnull, "w")
xml2rfc.log.write_err = open(os.devnull, "w")
if form.cleaned_data['xml']: if form.cleaned_data['xml']:
try:
xmlroot = form.xmltree.getroot()
xml_version = xmlroot.get('version', '2')
if xml_version == '3':
prep = xml2rfc.PrepToolWriter(form.xmltree, quiet=True)
form.xmltree.tree = prep.prep()
if form.xmltree.tree == None:
raise ValidationError("Error from xml2rfc (prep): %s" % prep.errors)
except Exception as e:
raise ValidationError("Error from xml2rfc (prep): %s" % e)
if not ('txt' in form.cleaned_data and form.cleaned_data['txt']):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
try:
if xml_version != '3':
pagedwriter = xml2rfc.PaginatedTextRfcWriter(form.xmltree, quiet=True)
pagedwriter.write(file_name['txt'])
else:
writer = xml2rfc.TextWriter(form.xmltree, quiet=True)
writer.write(file_name['txt'])
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['txt']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
raise ValidationError("Error from xml2rfc (text): %s" % e)
file_size = os.stat(file_name['txt']).st_size
if xml_version == '3':
try:
file_name['html'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.html' % (form.filename, form.revision))
writer = xml2rfc.HtmlWriter(form.xmltree, quiet=True)
writer.write(file_name['html'])
form.file_types.append('.html')
log.log("In %s: xml2rfc %s generated %s from %s (version %s)" %
( os.path.dirname(file_name['xml']),
xml2rfc.__version__,
os.path.basename(file_name['html']),
os.path.basename(file_name['xml']),
xml_version))
except Exception as e:
raise ValidationError("Error from xml2rfc (html): %s" % e)
# Some meta-information, such as the page-count, can only # Some meta-information, such as the page-count, can only
# be retrieved from the generated text file. Provide a # be retrieved from the generated text file. Provide a
# parsed draft object to get at that kind of information. # parsed draft object to get at that kind of information.
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
file_size = os.stat(file_name['txt']).st_size
with io.open(file_name['txt']) as txt_file: with io.open(file_name['txt']) as txt_file:
form.parsed_draft = Draft(txt_file.read(), txt_file.name) form.parsed_draft = Draft(txt_file.read(), txt_file.name)
else: else:
file_size = form.cleaned_data['txt'].size file_size = form.cleaned_data['txt'].size