From cb8ef96f364c3e76d6726f12889d534ab1b49a66 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 3 Mar 2025 11:21:39 -0400 Subject: [PATCH] fix: more submission date feedback; refactor xml2rfc log capture (#8621) * feat: catch and report any parsing error * refactor: error handling in a more testable way * fix: no bare `except` * test: exception cases for test_parse_creation_date * fix: explicitly reject non-numeric day/year * test: suppress xml2rfc output in test * refactor: context manager to capture xml2rfc output * refactor: more capture_xml2rfc_output usage * fix: capture_xml2rfc_output exception handling --- ietf/submit/utils.py | 162 ++++++++++++++++++++--------------------- ietf/utils/tests.py | 107 ++++++++++++++++++++++++++- ietf/utils/xmldraft.py | 66 ++++++++++++----- 3 files changed, 232 insertions(+), 103 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 61274c011..a0c7dd851 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -58,7 +58,7 @@ from ietf.utils.draft import PlaintextDraft from ietf.utils.mail import is_valid_email from ietf.utils.text import parse_unicode, normalize_text from ietf.utils.timezone import date_today -from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft +from ietf.utils.xmldraft import InvalidMetadataError, XMLDraft, capture_xml2rfc_output from ietf.person.name import unidecode_name @@ -926,105 +926,101 @@ def render_missing_formats(submission): If a txt file already exists, leaves it in place. Overwrites an existing html file if there is one. """ - # Capture stdio/stdout from xml2rfc - xml2rfc_stdout = io.StringIO() - xml2rfc_stderr = io.StringIO() - xml2rfc.log.write_out = xml2rfc_stdout - xml2rfc.log.write_err = xml2rfc_stderr - xml_path = staging_path(submission.name, submission.rev, '.xml') - parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True) - try: - # --- Parse the xml --- - xmltree = parser.parse(remove_comments=False) - except Exception as err: - raise XmlRfcError( - "Error parsing XML", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), - ) from err - # If we have v2, run it through v2v3. Keep track of the submitted version, though. - xmlroot = xmltree.getroot() - xml_version = xmlroot.get('version', '2') - if xml_version == '2': - v2v3 = xml2rfc.V2v3XmlWriter(xmltree) + with capture_xml2rfc_output() as xml2rfc_logs: + xml_path = staging_path(submission.name, submission.rev, '.xml') + parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True) try: - xmltree.tree = v2v3.convert2to3() + # --- Parse the xml --- + xmltree = parser.parse(remove_comments=False) except Exception as err: raise XmlRfcError( - "Error converting v2 XML to v3", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), + "Error parsing XML", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), ) from err - - # --- Prep the xml --- - today = date_today() - prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET]) - prep.options.accept_prepped = True - prep.options.date = today - try: - xmltree.tree = prep.prep() - except RfcWriterError: - raise XmlRfcError( - f"Error during xml2rfc prep: {prep.errors}", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), - ) - except Exception as err: - raise XmlRfcError( - "Unexpected error during xml2rfc prep", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), - ) from err - - # --- Convert to txt --- - txt_path = staging_path(submission.name, submission.rev, '.txt') - if not txt_path.exists(): - writer = xml2rfc.TextWriter(xmltree, quiet=True) - writer.options.accept_prepped = True + # If we have v2, run it through v2v3. Keep track of the submitted version, though. + xmlroot = xmltree.getroot() + xml_version = xmlroot.get('version', '2') + if xml_version == '2': + v2v3 = xml2rfc.V2v3XmlWriter(xmltree) + try: + xmltree.tree = v2v3.convert2to3() + except Exception as err: + raise XmlRfcError( + "Error converting v2 XML to v3", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), + ) from err + + # --- Prep the xml --- + today = date_today() + prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET]) + prep.options.accept_prepped = True + prep.options.date = today + try: + xmltree.tree = prep.prep() + except RfcWriterError: + raise XmlRfcError( + f"Error during xml2rfc prep: {prep.errors}", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), + ) + except Exception as err: + raise XmlRfcError( + "Unexpected error during xml2rfc prep", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), + ) from err + + # --- Convert to txt --- + txt_path = staging_path(submission.name, submission.rev, '.txt') + if not txt_path.exists(): + writer = xml2rfc.TextWriter(xmltree, quiet=True) + writer.options.accept_prepped = True + writer.options.date = today + try: + writer.write(txt_path) + except Exception as err: + raise XmlRfcError( + "Error generating text format from XML", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), + ) from err + log.log( + 'In %s: xml2rfc %s generated %s from %s (version %s)' % ( + str(xml_path.parent), + xml2rfc.__version__, + txt_path.name, + xml_path.name, + xml_version, + ) + ) + # When the blobstores become autoritative - the guard at the + # containing if statement needs to be based on the store + with Path(txt_path).open("rb") as f: + store_file("staging", f"{submission.name}-{submission.rev}.txt", f) + + # --- Convert to html --- + html_path = staging_path(submission.name, submission.rev, '.html') + writer = xml2rfc.HtmlWriter(xmltree, quiet=True) writer.options.date = today try: - writer.write(txt_path) + writer.write(str(html_path)) except Exception as err: raise XmlRfcError( - "Error generating text format from XML", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), + "Error generating HTML format from XML", + xml2rfc_stdout=xml2rfc_logs["stdout"].getvalue(), + xml2rfc_stderr=xml2rfc_logs["stderr"].getvalue(), ) from err log.log( 'In %s: xml2rfc %s generated %s from %s (version %s)' % ( str(xml_path.parent), xml2rfc.__version__, - txt_path.name, + html_path.name, xml_path.name, xml_version, ) ) - # When the blobstores become autoritative - the guard at the - # containing if statement needs to be based on the store - with Path(txt_path).open("rb") as f: - store_file("staging", f"{submission.name}-{submission.rev}.txt", f) - - # --- Convert to html --- - html_path = staging_path(submission.name, submission.rev, '.html') - writer = xml2rfc.HtmlWriter(xmltree, quiet=True) - writer.options.date = today - try: - writer.write(str(html_path)) - except Exception as err: - raise XmlRfcError( - "Error generating HTML format from XML", - xml2rfc_stdout=xml2rfc_stdout.getvalue(), - xml2rfc_stderr=xml2rfc_stderr.getvalue(), - ) from err - log.log( - 'In %s: xml2rfc %s generated %s from %s (version %s)' % ( - str(xml_path.parent), - xml2rfc.__version__, - html_path.name, - xml_path.name, - xml_version, - ) - ) with Path(html_path).open("rb") as f: store_file("staging", f"{submission.name}-{submission.rev}.html", f) diff --git a/ietf/utils/tests.py b/ietf/utils/tests.py index 0a1986a60..2dd861cd1 100644 --- a/ietf/utils/tests.py +++ b/ietf/utils/tests.py @@ -23,6 +23,8 @@ from fnmatch import fnmatch from importlib import import_module from textwrap import dedent from tempfile import mkdtemp +from xml2rfc import log as xml2rfc_log +from xml2rfc.util.date import extract_date as xml2rfc_extract_date from django.apps import apps from django.contrib.auth.models import User @@ -57,7 +59,7 @@ from ietf.utils.test_runner import get_template_paths, set_coverage_checking from ietf.utils.test_utils import TestCase, unicontent from ietf.utils.text import parse_unicode from ietf.utils.timezone import timezone_not_near_midnight -from ietf.utils.xmldraft import XMLDraft +from ietf.utils.xmldraft import XMLDraft, InvalidMetadataError, capture_xml2rfc_output class SendingMail(TestCase): @@ -544,7 +546,7 @@ class XMLDraftTests(TestCase): def test_parse_creation_date(self): # override date_today to avoid skew when test runs around midnight today = datetime.date.today() - with patch("ietf.utils.xmldraft.date_today", return_value=today): + with capture_xml2rfc_output(), patch("ietf.utils.xmldraft.date_today", return_value=today): # Note: using a dict as a stand-in for XML elements, which rely on the get() method self.assertEqual( XMLDraft.parse_creation_date({"year": "2022", "month": "11", "day": "24"}), @@ -590,6 +592,74 @@ class XMLDraftTests(TestCase): ), datetime.date(today.year, 1 if today.month != 1 else 2, 15), ) + # Some exeception-inducing conditions + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError if a year-only date is not current", + ): + XMLDraft.parse_creation_date( + { + "year": str(today.year - 1), + "month": "", + "day": "", + } + ) + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError for a non-numeric year" + ): + XMLDraft.parse_creation_date( + { + "year": "two thousand twenty-five", + "month": "2", + "day": "28", + } + ) + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError for an invalid month" + ): + XMLDraft.parse_creation_date( + { + "year": "2024", + "month": "13", + "day": "28", + } + ) + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError for a misspelled month" + ): + XMLDraft.parse_creation_date( + { + "year": "2024", + "month": "Oktobur", + "day": "28", + } + ) + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError for an invalid day" + ): + XMLDraft.parse_creation_date( + { + "year": "2024", + "month": "feb", + "day": "31", + } + ) + with self.assertRaises( + InvalidMetadataError, + msg="raise an InvalidMetadataError for a non-numeric day" + ): + XMLDraft.parse_creation_date( + { + "year": "2024", + "month": "feb", + "day": "twenty-four", + } + ) + def test_parse_docname(self): with self.assertRaises(ValueError) as cm: @@ -671,6 +741,39 @@ class XMLDraftTests(TestCase): "J. Q.", ) + def test_capture_xml2rfc_output(self): + """capture_xml2rfc_output reroutes and captures xml2rfc logs""" + orig_write_out = xml2rfc_log.write_out + orig_write_err = xml2rfc_log.write_err + with capture_xml2rfc_output() as outer_log_streams: # ensure no output + # such meta! very Inception! + with capture_xml2rfc_output() as inner_log_streams: + # arbitrary xml2rfc method that triggers a log, nothing special otherwise + xml2rfc_extract_date({"year": "fish"}, datetime.date(2025,3,1)) + self.assertNotEqual(inner_log_streams, outer_log_streams) + self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored") + self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored") + self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored") + self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored") + + # don't happen to get any output on stdout and not paranoid enough to force some, just test stderr + self.assertGreater(len(inner_log_streams["stderr"].getvalue()), 0, "want output on inner streams") + self.assertEqual(len(outer_log_streams["stdout"].getvalue()), 0, "no output on outer streams") + self.assertEqual(len(outer_log_streams["stderr"].getvalue()), 0, "no output on outer streams") + + def test_capture_xml2rfc_output_exception_handling(self): + """capture_xml2rfc_output restores streams after an exception""" + orig_write_out = xml2rfc_log.write_out + orig_write_err = xml2rfc_log.write_err + with capture_xml2rfc_output() as outer_log_streams: # ensure no output + with self.assertRaises(RuntimeError), capture_xml2rfc_output() as inner_log_streams: + raise RuntimeError("nooo") + self.assertNotEqual(inner_log_streams, outer_log_streams) + self.assertEqual(xml2rfc_log.write_out, outer_log_streams["stdout"], "out stream should be restored") + self.assertEqual(xml2rfc_log.write_err, outer_log_streams["stderr"], "err stream should be restored") + self.assertEqual(xml2rfc_log.write_out, orig_write_out, "original out stream should be restored") + self.assertEqual(xml2rfc_log.write_err, orig_write_err, "original err stream should be restored") + class NameTests(TestCase): diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py index 73baf917d..3ac9a269c 100644 --- a/ietf/utils/xmldraft.py +++ b/ietf/utils/xmldraft.py @@ -7,7 +7,7 @@ import xml2rfc import debug # pyflakes: ignore -from contextlib import ExitStack +from contextlib import contextmanager from lxml.etree import XMLSyntaxError from xml2rfc.util.date import augment_date, extract_date from ietf.utils.timezone import date_today @@ -15,6 +15,21 @@ from ietf.utils.timezone import date_today from .draft import Draft +@contextmanager +def capture_xml2rfc_output(): + orig_write_out = xml2rfc.log.write_out + orig_write_err = xml2rfc.log.write_err + parser_out = io.StringIO() + parser_err = io.StringIO() + xml2rfc.log.write_out = parser_out + xml2rfc.log.write_err = parser_err + try: + yield {"stdout": parser_out, "stderr": parser_err} + finally: + xml2rfc.log.write_out = orig_write_out + xml2rfc.log.write_err = orig_write_err + + class XMLDraft(Draft): """Draft from XML source @@ -38,27 +53,18 @@ class XMLDraft(Draft): Converts to xml2rfc v3 schema, then returns the root of the v3 tree and the original xml version. """ - orig_write_out = xml2rfc.log.write_out - orig_write_err = xml2rfc.log.write_err - parser_out = io.StringIO() - parser_err = io.StringIO() - - with ExitStack() as stack: - @stack.callback - def cleanup(): # called when context exited, even if there's an exception - xml2rfc.log.write_out = orig_write_out - xml2rfc.log.write_err = orig_write_err - - xml2rfc.log.write_out = parser_out - xml2rfc.log.write_err = parser_err + with capture_xml2rfc_output() as parser_logs: parser = xml2rfc.XmlRfcParser(filename, quiet=True) try: tree = parser.parse() except XMLSyntaxError: raise InvalidXMLError() except Exception as e: - raise XMLParseError(parser_out.getvalue(), parser_err.getvalue()) from e + raise XMLParseError( + parser_logs["stdout"].getvalue(), + parser_logs["stderr"].getvalue(), + ) from e xml_version = tree.getroot().get('version', '2') if xml_version == '2': @@ -147,10 +153,31 @@ class XMLDraft(Draft): def parse_creation_date(date_elt): if date_elt is None: return None + today = date_today() - # ths mimics handling of date elements in the xml2rfc text/html writers - year, month, day = extract_date(date_elt, today) - year, month, day = augment_date(year, month, day, today) + + # Outright reject non-numeric year / day (xml2rfc's extract_date does not do this) + # (n.b., "year" can be non-numeric in a section per RFC 7991) + year = date_elt.get("year") + day = date_elt.get("day") + non_numeric_year = year and not year.isdigit() + non_numeric_day = day and not day.isdigit() + if non_numeric_day or non_numeric_year: + raise InvalidMetadataError( + "Unable to parse the element in the section: " + "year and day must be numeric values if specified." + ) + + try: + # ths mimics handling of date elements in the xml2rfc text/html writers + year, month, day = extract_date(date_elt, today) + year, month, day = augment_date(year, month, day, today) + except Exception as err: + # Give a generic error if anything goes wrong so far... + raise InvalidMetadataError( + "Unable to parse the element in the section." + ) from err + if not day: # Must choose a day for a datetime.date. Per RFC 7991 sect 2.17, we use # today's date if it is consistent with the rest of the date. Otherwise, @@ -159,9 +186,12 @@ class XMLDraft(Draft): day = today.day else: day = 15 + try: creation_date = datetime.date(year, month, day) except Exception: + # If everything went well, we should have had a valid datetime, but we didn't. + # The parsing _worked_ but not in a way that we can go forward with. raise InvalidMetadataError( "The element in the section specified an incomplete date " "that was not consistent with today's date. If you specify only a year, "