fix: more submission date feedback; refactor xml2rfc log capture (#8621)

* feat: catch and report any <date> parsing error

* refactor: error handling in a more testable way

* fix: no bare `except`

* test: exception cases for test_parse_creation_date

* fix: explicitly reject non-numeric day/year

* test: suppress xml2rfc output in test

* refactor: context manager to capture xml2rfc output

* refactor: more capture_xml2rfc_output usage

* fix: capture_xml2rfc_output exception handling
This commit is contained in:
Jennifer Richards 2025-03-03 11:21:39 -04:00 committed by GitHub
parent a9a8f9ba01
commit cb8ef96f36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 232 additions and 103 deletions

View file

@ -58,7 +58,7 @@ from ietf.utils.draft import PlaintextDraft
from ietf.utils.mail import is_valid_email from ietf.utils.mail import is_valid_email
from ietf.utils.text import parse_unicode, normalize_text from ietf.utils.text import parse_unicode, normalize_text
from ietf.utils.timezone import date_today from ietf.utils.timezone import date_today
from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft, capture_xml2rfc_output
from ietf.person.name import unidecode_name from ietf.person.name import unidecode_name
@ -926,105 +926,101 @@ def render_missing_formats(submission):
If a txt file already exists, leaves it in place. Overwrites an existing html file If a txt file already exists, leaves it in place. Overwrites an existing html file
if there is one. if there is one.
""" """
# Capture stdio/stdout from xml2rfc with capture_xml2rfc_output() as xml2rfc_logs:
xml2rfc_stdout = io.StringIO() xml_path = staging_path(submission.name, submission.rev, '.xml')
xml2rfc_stderr = io.StringIO() parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True)
xml2rfc.log.write_out = xml2rfc_stdout
xml2rfc.log.write_err = xml2rfc_stderr
xml_path = staging_path(submission.name, submission.rev, '.xml')
parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True)
try:
# --- Parse the xml ---
xmltree = parser.parse(remove_comments=False)
except Exception as err:
raise XmlRfcError(
"Error parsing XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# If we have v2, run it through v2v3. Keep track of the submitted version, though.
xmlroot = xmltree.getroot()
xml_version = xmlroot.get('version', '2')
if xml_version == '2':
v2v3 = xml2rfc.V2v3XmlWriter(xmltree)
try: try:
xmltree.tree = v2v3.convert2to3() # --- Parse the xml ---
xmltree = parser.parse(remove_comments=False)
except Exception as err: except Exception as err:
raise XmlRfcError( raise XmlRfcError(
"Error converting v2 XML to v3", "Error parsing XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(), xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(), xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err ) from err
# If we have v2, run it through v2v3. Keep track of the submitted version, though.
# --- Prep the xml --- xmlroot = xmltree.getroot()
today = date_today() xml_version = xmlroot.get('version', '2')
prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET]) if xml_version == '2':
prep.options.accept_prepped = True v2v3 = xml2rfc.V2v3XmlWriter(xmltree)
prep.options.date = today try:
try: xmltree.tree = v2v3.convert2to3()
xmltree.tree = prep.prep() except Exception as err:
except RfcWriterError: raise XmlRfcError(
raise XmlRfcError( "Error converting v2 XML to v3",
f"Error during xml2rfc prep: {prep.errors}", xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stdout=xml2rfc_stdout.getvalue(), xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(), ) from err
)
except Exception as err: # --- Prep the xml ---
raise XmlRfcError( today = date_today()
"Unexpected error during xml2rfc prep", prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
xml2rfc_stdout=xml2rfc_stdout.getvalue(), prep.options.accept_prepped = True
xml2rfc_stderr=xml2rfc_stderr.getvalue(), prep.options.date = today
) from err try:
xmltree.tree = prep.prep()
# --- Convert to txt --- except RfcWriterError:
txt_path = staging_path(submission.name, submission.rev, '.txt') raise XmlRfcError(
if not txt_path.exists(): f"Error during xml2rfc prep: {prep.errors}",
writer = xml2rfc.TextWriter(xmltree, quiet=True) xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
writer.options.accept_prepped = True xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
)
except Exception as err:
raise XmlRfcError(
"Unexpected error during xml2rfc prep",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
# --- Convert to txt ---
txt_path = staging_path(submission.name, submission.rev, '.txt')
if not txt_path.exists():
writer = xml2rfc.TextWriter(xmltree, quiet=True)
writer.options.accept_prepped = True
writer.options.date = today
try:
writer.write(txt_path)
except Exception as err:
raise XmlRfcError(
"Error generating text format from XML",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent),
xml2rfc.__version__,
txt_path.name,
xml_path.name,
xml_version,
)
)
# When the blobstores become autoritative - the guard at the
# containing if statement needs to be based on the store
with Path(txt_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.txt", f)
# --- Convert to html ---
html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today writer.options.date = today
try: try:
writer.write(txt_path) writer.write(str(html_path))
except Exception as err: except Exception as err:
raise XmlRfcError( raise XmlRfcError(
"Error generating text format from XML", "Error generating HTML format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(), xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(), xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err ) from err
log.log( log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % ( 'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent), str(xml_path.parent),
xml2rfc.__version__, xml2rfc.__version__,
txt_path.name, html_path.name,
xml_path.name, xml_path.name,
xml_version, xml_version,
) )
) )
# When the blobstores become autoritative - the guard at the
# containing if statement needs to be based on the store
with Path(txt_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.txt", f)
# --- Convert to html ---
html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today
try:
writer.write(str(html_path))
except Exception as err:
raise XmlRfcError(
"Error generating HTML format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent),
xml2rfc.__version__,
html_path.name,
xml_path.name,
xml_version,
)
)
with Path(html_path).open("rb") as f: with Path(html_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.html", f) store_file("staging", f"{submission.name}-{submission.rev}.html", f)

View file

@ -23,6 +23,8 @@ from fnmatch import fnmatch
from importlib import import_module from importlib import import_module
from textwrap import dedent from textwrap import dedent
from tempfile import mkdtemp from tempfile import mkdtemp
from xml2rfc import log as xml2rfc_log
from xml2rfc.util.date import extract_date as xml2rfc_extract_date
from django.apps import apps from django.apps import apps
from django.contrib.auth.models import User from django.contrib.auth.models import User
@ -57,7 +59,7 @@ from ietf.utils.test_runner import get_template_paths, set_coverage_checking
from ietf.utils.test_utils import TestCase, unicontent from ietf.utils.test_utils import TestCase, unicontent
from ietf.utils.text import parse_unicode from ietf.utils.text import parse_unicode
from ietf.utils.timezone import timezone_not_near_midnight from ietf.utils.timezone import timezone_not_near_midnight
from ietf.utils.xmldraft import XMLDraft from ietf.utils.xmldraft import XMLDraft, InvalidMetadataError, capture_xml2rfc_output
class SendingMail(TestCase): class SendingMail(TestCase):
@ -544,7 +546,7 @@ class XMLDraftTests(TestCase):
def test_parse_creation_date(self): def test_parse_creation_date(self):
# override date_today to avoid skew when test runs around midnight # override date_today to avoid skew when test runs around midnight
today = datetime.date.today() today = datetime.date.today()
with patch("ietf.utils.xmldraft.date_today", return_value=today): with capture_xml2rfc_output(), patch("ietf.utils.xmldraft.date_today", return_value=today):
# Note: using a dict as a stand-in for XML elements, which rely on the get() method # Note: using a dict as a stand-in for XML elements, which rely on the get() method
self.assertEqual( self.assertEqual(
XMLDraft.parse_creation_date({"year": "2022", "month": "11", "day": "24"}), XMLDraft.parse_creation_date({"year": "2022", "month": "11", "day": "24"}),
@ -590,6 +592,74 @@ class XMLDraftTests(TestCase):
), ),
datetime.date(today.year, 1 if today.month != 1 else 2, 15), datetime.date(today.year, 1 if today.month != 1 else 2, 15),
) )
# Some exeception-inducing conditions
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError if a year-only date is not current",
):
XMLDraft.parse_creation_date(
{
"year": str(today.year - 1),
"month": "",
"day": "",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a non-numeric year"
):
XMLDraft.parse_creation_date(
{
"year": "two thousand twenty-five",
"month": "2",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for an invalid month"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "13",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a misspelled month"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "Oktobur",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for an invalid day"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "feb",
"day": "31",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a non-numeric day"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "feb",
"day": "twenty-four",
}
)
def test_parse_docname(self): def test_parse_docname(self):
with self.assertRaises(ValueError) as cm: with self.assertRaises(ValueError) as cm:
@ -671,6 +741,39 @@ class XMLDraftTests(TestCase):
"J. Q.", "J. Q.",
) )
def test_capture_xml2rfc_output(self):
"""capture_xml2rfc_output reroutes and captures xml2rfc logs"""
orig_write_out = xml2rfc_log.write_out
orig_write_err = xml2rfc_log.write_err
with capture_xml2rfc_output() as outer_log_streams: # ensure no output
# such meta! very Inception!
with capture_xml2rfc_output() as inner_log_streams:
# arbitrary xml2rfc method that triggers a log, nothing special otherwise
xml2rfc_extract_date({"year": "fish"}, datetime.date(2025,3,1))
self.assertNotEqual(inner_log_streams, outer_log_streams)
self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored")
self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored")
# don't happen to get any output on stdout and not paranoid enough to force some, just test stderr
self.assertGreater(len(inner_log_streams["stderr"].getvalue()), 0, "want output on inner streams")
self.assertEqual(len(outer_log_streams["stdout"].getvalue()), 0, "no output on outer streams")
self.assertEqual(len(outer_log_streams["stderr"].getvalue()), 0, "no output on outer streams")
def test_capture_xml2rfc_output_exception_handling(self):
"""capture_xml2rfc_output restores streams after an exception"""
orig_write_out = xml2rfc_log.write_out
orig_write_err = xml2rfc_log.write_err
with capture_xml2rfc_output() as outer_log_streams: # ensure no output
with self.assertRaises(RuntimeError), capture_xml2rfc_output() as inner_log_streams:
raise RuntimeError("nooo")
self.assertNotEqual(inner_log_streams, outer_log_streams)
self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored")
self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored")
class NameTests(TestCase): class NameTests(TestCase):

View file

@ -7,7 +7,7 @@ import xml2rfc
import debug # pyflakes: ignore import debug # pyflakes: ignore
from contextlib import ExitStack from contextlib import contextmanager
from lxml.etree import XMLSyntaxError from lxml.etree import XMLSyntaxError
from xml2rfc.util.date import augment_date, extract_date from xml2rfc.util.date import augment_date, extract_date
from ietf.utils.timezone import date_today from ietf.utils.timezone import date_today
@ -15,6 +15,21 @@ from ietf.utils.timezone import date_today
from .draft import Draft from .draft import Draft
@contextmanager
def capture_xml2rfc_output():
orig_write_out = xml2rfc.log.write_out
orig_write_err = xml2rfc.log.write_err
parser_out = io.StringIO()
parser_err = io.StringIO()
xml2rfc.log.write_out = parser_out
xml2rfc.log.write_err = parser_err
try:
yield {"stdout": parser_out, "stderr": parser_err}
finally:
xml2rfc.log.write_out = orig_write_out
xml2rfc.log.write_err = orig_write_err
class XMLDraft(Draft): class XMLDraft(Draft):
"""Draft from XML source """Draft from XML source
@ -38,27 +53,18 @@ class XMLDraft(Draft):
Converts to xml2rfc v3 schema, then returns the root of the v3 tree and the original Converts to xml2rfc v3 schema, then returns the root of the v3 tree and the original
xml version. xml version.
""" """
orig_write_out = xml2rfc.log.write_out
orig_write_err = xml2rfc.log.write_err
parser_out = io.StringIO()
parser_err = io.StringIO()
with ExitStack() as stack:
@stack.callback
def cleanup(): # called when context exited, even if there's an exception
xml2rfc.log.write_out = orig_write_out
xml2rfc.log.write_err = orig_write_err
xml2rfc.log.write_out = parser_out
xml2rfc.log.write_err = parser_err
with capture_xml2rfc_output() as parser_logs:
parser = xml2rfc.XmlRfcParser(filename, quiet=True) parser = xml2rfc.XmlRfcParser(filename, quiet=True)
try: try:
tree = parser.parse() tree = parser.parse()
except XMLSyntaxError: except XMLSyntaxError:
raise InvalidXMLError() raise InvalidXMLError()
except Exception as e: except Exception as e:
raise XMLParseError(parser_out.getvalue(), parser_err.getvalue()) from e raise XMLParseError(
parser_logs["stdout"].getvalue(),
parser_logs["stderr"].getvalue(),
) from e
xml_version = tree.getroot().get('version', '2') xml_version = tree.getroot().get('version', '2')
if xml_version == '2': if xml_version == '2':
@ -147,10 +153,31 @@ class XMLDraft(Draft):
def parse_creation_date(date_elt): def parse_creation_date(date_elt):
if date_elt is None: if date_elt is None:
return None return None
today = date_today() today = date_today()
# ths mimics handling of date elements in the xml2rfc text/html writers
year, month, day = extract_date(date_elt, today) # Outright reject non-numeric year / day (xml2rfc's extract_date does not do this)
year, month, day = augment_date(year, month, day, today) # (n.b., "year" can be non-numeric in a <reference> section per RFC 7991)
year = date_elt.get("year")
day = date_elt.get("day")
non_numeric_year = year and not year.isdigit()
non_numeric_day = day and not day.isdigit()
if non_numeric_day or non_numeric_year:
raise InvalidMetadataError(
"Unable to parse the <date> element in the <front> section: "
"year and day must be numeric values if specified."
)
try:
# ths mimics handling of date elements in the xml2rfc text/html writers
year, month, day = extract_date(date_elt, today)
year, month, day = augment_date(year, month, day, today)
except Exception as err:
# Give a generic error if anything goes wrong so far...
raise InvalidMetadataError(
"Unable to parse the <date> element in the <front> section."
) from err
if not day: if not day:
# Must choose a day for a datetime.date. Per RFC 7991 sect 2.17, we use # Must choose a day for a datetime.date. Per RFC 7991 sect 2.17, we use
# today's date if it is consistent with the rest of the date. Otherwise, # today's date if it is consistent with the rest of the date. Otherwise,
@ -159,9 +186,12 @@ class XMLDraft(Draft):
day = today.day day = today.day
else: else:
day = 15 day = 15
try: try:
creation_date = datetime.date(year, month, day) creation_date = datetime.date(year, month, day)
except Exception: except Exception:
# If everything went well, we should have had a valid datetime, but we didn't.
# The parsing _worked_ but not in a way that we can go forward with.
raise InvalidMetadataError( raise InvalidMetadataError(
"The <date> element in the <front> section specified an incomplete date " "The <date> element in the <front> section specified an incomplete date "
"that was not consistent with today's date. If you specify only a year, " "that was not consistent with today's date. If you specify only a year, "