fix: more submission date feedback; refactor xml2rfc log capture (#8621)

* feat: catch and report any <date> parsing error

* refactor: error handling in a more testable way

* fix: no bare `except`

* test: exception cases for test_parse_creation_date

* fix: explicitly reject non-numeric day/year

* test: suppress xml2rfc output in test

* refactor: context manager to capture xml2rfc output

* refactor: more capture_xml2rfc_output usage

* fix: capture_xml2rfc_output exception handling
This commit is contained in:
Jennifer Richards 2025-03-03 11:21:39 -04:00 committed by GitHub
parent a9a8f9ba01
commit cb8ef96f36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 232 additions and 103 deletions

View file

@ -58,7 +58,7 @@ from ietf.utils.draft import PlaintextDraft
from ietf.utils.mail import is_valid_email
from ietf.utils.text import parse_unicode, normalize_text
from ietf.utils.timezone import date_today
from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft
from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft, capture_xml2rfc_output
from ietf.person.name import unidecode_name
@ -926,105 +926,101 @@ def render_missing_formats(submission):
If a txt file already exists, leaves it in place. Overwrites an existing html file
if there is one.
"""
# Capture stdio/stdout from xml2rfc
xml2rfc_stdout = io.StringIO()
xml2rfc_stderr = io.StringIO()
xml2rfc.log.write_out = xml2rfc_stdout
xml2rfc.log.write_err = xml2rfc_stderr
xml_path = staging_path(submission.name, submission.rev, '.xml')
parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True)
try:
# --- Parse the xml ---
xmltree = parser.parse(remove_comments=False)
except Exception as err:
raise XmlRfcError(
"Error parsing XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# If we have v2, run it through v2v3. Keep track of the submitted version, though.
xmlroot = xmltree.getroot()
xml_version = xmlroot.get('version', '2')
if xml_version == '2':
v2v3 = xml2rfc.V2v3XmlWriter(xmltree)
with capture_xml2rfc_output() as xml2rfc_logs:
xml_path = staging_path(submission.name, submission.rev, '.xml')
parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True)
try:
xmltree.tree = v2v3.convert2to3()
# --- Parse the xml ---
xmltree = parser.parse(remove_comments=False)
except Exception as err:
raise XmlRfcError(
"Error converting v2 XML to v3",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
"Error parsing XML",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
# --- Prep the xml ---
today = date_today()
prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
prep.options.accept_prepped = True
prep.options.date = today
try:
xmltree.tree = prep.prep()
except RfcWriterError:
raise XmlRfcError(
f"Error during xml2rfc prep: {prep.errors}",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
)
except Exception as err:
raise XmlRfcError(
"Unexpected error during xml2rfc prep",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# --- Convert to txt ---
txt_path = staging_path(submission.name, submission.rev, '.txt')
if not txt_path.exists():
writer = xml2rfc.TextWriter(xmltree, quiet=True)
writer.options.accept_prepped = True
# If we have v2, run it through v2v3. Keep track of the submitted version, though.
xmlroot = xmltree.getroot()
xml_version = xmlroot.get('version', '2')
if xml_version == '2':
v2v3 = xml2rfc.V2v3XmlWriter(xmltree)
try:
xmltree.tree = v2v3.convert2to3()
except Exception as err:
raise XmlRfcError(
"Error converting v2 XML to v3",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
# --- Prep the xml ---
today = date_today()
prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
prep.options.accept_prepped = True
prep.options.date = today
try:
xmltree.tree = prep.prep()
except RfcWriterError:
raise XmlRfcError(
f"Error during xml2rfc prep: {prep.errors}",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
)
except Exception as err:
raise XmlRfcError(
"Unexpected error during xml2rfc prep",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
# --- Convert to txt ---
txt_path = staging_path(submission.name, submission.rev, '.txt')
if not txt_path.exists():
writer = xml2rfc.TextWriter(xmltree, quiet=True)
writer.options.accept_prepped = True
writer.options.date = today
try:
writer.write(txt_path)
except Exception as err:
raise XmlRfcError(
"Error generating text format from XML",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent),
xml2rfc.__version__,
txt_path.name,
xml_path.name,
xml_version,
)
)
# When the blobstores become autoritative - the guard at the
# containing if statement needs to be based on the store
with Path(txt_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.txt", f)
# --- Convert to html ---
html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today
try:
writer.write(txt_path)
writer.write(str(html_path))
except Exception as err:
raise XmlRfcError(
"Error generating text format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
"Error generating HTML format from XML",
xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(),
xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(),
) from err
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent),
xml2rfc.__version__,
txt_path.name,
html_path.name,
xml_path.name,
xml_version,
)
)
# When the blobstores become autoritative - the guard at the
# containing if statement needs to be based on the store
with Path(txt_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.txt", f)
# --- Convert to html ---
html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today
try:
writer.write(str(html_path))
except Exception as err:
raise XmlRfcError(
"Error generating HTML format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent),
xml2rfc.__version__,
html_path.name,
xml_path.name,
xml_version,
)
)
with Path(html_path).open("rb") as f:
store_file("staging", f"{submission.name}-{submission.rev}.html", f)

View file

@ -23,6 +23,8 @@ from fnmatch import fnmatch
from importlib import import_module
from textwrap import dedent
from tempfile import mkdtemp
from xml2rfc import log as xml2rfc_log
from xml2rfc.util.date import extract_date as xml2rfc_extract_date
from django.apps import apps
from django.contrib.auth.models import User
@ -57,7 +59,7 @@ from ietf.utils.test_runner import get_template_paths, set_coverage_checking
from ietf.utils.test_utils import TestCase, unicontent
from ietf.utils.text import parse_unicode
from ietf.utils.timezone import timezone_not_near_midnight
from ietf.utils.xmldraft import XMLDraft
from ietf.utils.xmldraft import XMLDraft, InvalidMetadataError, capture_xml2rfc_output
class SendingMail(TestCase):
@ -544,7 +546,7 @@ class XMLDraftTests(TestCase):
def test_parse_creation_date(self):
# override date_today to avoid skew when test runs around midnight
today = datetime.date.today()
with patch("ietf.utils.xmldraft.date_today", return_value=today):
with capture_xml2rfc_output(), patch("ietf.utils.xmldraft.date_today", return_value=today):
# Note: using a dict as a stand-in for XML elements, which rely on the get() method
self.assertEqual(
XMLDraft.parse_creation_date({"year": "2022", "month": "11", "day": "24"}),
@ -590,6 +592,74 @@ class XMLDraftTests(TestCase):
),
datetime.date(today.year, 1 if today.month != 1 else 2, 15),
)
# Some exeception-inducing conditions
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError if a year-only date is not current",
):
XMLDraft.parse_creation_date(
{
"year": str(today.year - 1),
"month": "",
"day": "",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a non-numeric year"
):
XMLDraft.parse_creation_date(
{
"year": "two thousand twenty-five",
"month": "2",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for an invalid month"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "13",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a misspelled month"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "Oktobur",
"day": "28",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for an invalid day"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "feb",
"day": "31",
}
)
with self.assertRaises(
InvalidMetadataError,
msg="raise an InvalidMetadataError for a non-numeric day"
):
XMLDraft.parse_creation_date(
{
"year": "2024",
"month": "feb",
"day": "twenty-four",
}
)
def test_parse_docname(self):
with self.assertRaises(ValueError) as cm:
@ -671,6 +741,39 @@ class XMLDraftTests(TestCase):
"J. Q.",
)
def test_capture_xml2rfc_output(self):
"""capture_xml2rfc_output reroutes and captures xml2rfc logs"""
orig_write_out = xml2rfc_log.write_out
orig_write_err = xml2rfc_log.write_err
with capture_xml2rfc_output() as outer_log_streams: # ensure no output
# such meta! very Inception!
with capture_xml2rfc_output() as inner_log_streams:
# arbitrary xml2rfc method that triggers a log, nothing special otherwise
xml2rfc_extract_date({"year": "fish"}, datetime.date(2025,3,1))
self.assertNotEqual(inner_log_streams, outer_log_streams)
self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored")
self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored")
# don't happen to get any output on stdout and not paranoid enough to force some, just test stderr
self.assertGreater(len(inner_log_streams["stderr"].getvalue()), 0, "want output on inner streams")
self.assertEqual(len(outer_log_streams["stdout"].getvalue()), 0, "no output on outer streams")
self.assertEqual(len(outer_log_streams["stderr"].getvalue()), 0, "no output on outer streams")
def test_capture_xml2rfc_output_exception_handling(self):
"""capture_xml2rfc_output restores streams after an exception"""
orig_write_out = xml2rfc_log.write_out
orig_write_err = xml2rfc_log.write_err
with capture_xml2rfc_output() as outer_log_streams: # ensure no output
with self.assertRaises(RuntimeError), capture_xml2rfc_output() as inner_log_streams:
raise RuntimeError("nooo")
self.assertNotEqual(inner_log_streams, outer_log_streams)
self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored")
self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored")
self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored")
class NameTests(TestCase):

View file

@ -7,7 +7,7 @@ import xml2rfc
import debug # pyflakes: ignore
from contextlib import ExitStack
from contextlib import contextmanager
from lxml.etree import XMLSyntaxError
from xml2rfc.util.date import augment_date, extract_date
from ietf.utils.timezone import date_today
@ -15,6 +15,21 @@ from ietf.utils.timezone import date_today
from .draft import Draft
@contextmanager
def capture_xml2rfc_output():
orig_write_out = xml2rfc.log.write_out
orig_write_err = xml2rfc.log.write_err
parser_out = io.StringIO()
parser_err = io.StringIO()
xml2rfc.log.write_out = parser_out
xml2rfc.log.write_err = parser_err
try:
yield {"stdout": parser_out, "stderr": parser_err}
finally:
xml2rfc.log.write_out = orig_write_out
xml2rfc.log.write_err = orig_write_err
class XMLDraft(Draft):
"""Draft from XML source
@ -38,27 +53,18 @@ class XMLDraft(Draft):
Converts to xml2rfc v3 schema, then returns the root of the v3 tree and the original
xml version.
"""
orig_write_out = xml2rfc.log.write_out
orig_write_err = xml2rfc.log.write_err
parser_out = io.StringIO()
parser_err = io.StringIO()
with ExitStack() as stack:
@stack.callback
def cleanup(): # called when context exited, even if there's an exception
xml2rfc.log.write_out = orig_write_out
xml2rfc.log.write_err = orig_write_err
xml2rfc.log.write_out = parser_out
xml2rfc.log.write_err = parser_err
with capture_xml2rfc_output() as parser_logs:
parser = xml2rfc.XmlRfcParser(filename, quiet=True)
try:
tree = parser.parse()
except XMLSyntaxError:
raise InvalidXMLError()
except Exception as e:
raise XMLParseError(parser_out.getvalue(), parser_err.getvalue()) from e
raise XMLParseError(
parser_logs["stdout"].getvalue(),
parser_logs["stderr"].getvalue(),
) from e
xml_version = tree.getroot().get('version', '2')
if xml_version == '2':
@ -147,10 +153,31 @@ class XMLDraft(Draft):
def parse_creation_date(date_elt):
if date_elt is None:
return None
today = date_today()
# ths mimics handling of date elements in the xml2rfc text/html writers
year, month, day = extract_date(date_elt, today)
year, month, day = augment_date(year, month, day, today)
# Outright reject non-numeric year / day (xml2rfc's extract_date does not do this)
# (n.b., "year" can be non-numeric in a <reference> section per RFC 7991)
year = date_elt.get("year")
day = date_elt.get("day")
non_numeric_year = year and not year.isdigit()
non_numeric_day = day and not day.isdigit()
if non_numeric_day or non_numeric_year:
raise InvalidMetadataError(
"Unable to parse the <date> element in the <front> section: "
"year and day must be numeric values if specified."
)
try:
# ths mimics handling of date elements in the xml2rfc text/html writers
year, month, day = extract_date(date_elt, today)
year, month, day = augment_date(year, month, day, today)
except Exception as err:
# Give a generic error if anything goes wrong so far...
raise InvalidMetadataError(
"Unable to parse the <date> element in the <front> section."
) from err
if not day:
# Must choose a day for a datetime.date. Per RFC 7991 sect 2.17, we use
# today's date if it is consistent with the rest of the date. Otherwise,
@ -159,9 +186,12 @@ class XMLDraft(Draft):
day = today.day
else:
day = 15
try:
creation_date = datetime.date(year, month, day)
except Exception:
# If everything went well, we should have had a valid datetime, but we didn't.
# The parsing _worked_ but not in a way that we can go forward with.
raise InvalidMetadataError(
"The <date> element in the <front> section specified an incomplete date "
"that was not consistent with today's date. If you specify only a year, "