feat: report xml2rfc errors for submissions (#8000)

* feat: capture xml2rfc output on exception

* fix: chain exception properly

* feat: log xml2rfc stderr/stdout

May want to pass these back to the submitter, but
let's watch them to see what sorts of sanitizing we
should do first.

* Revert "feat: log xml2rfc stderr/stdout"

This reverts commit 959c54d30733a4a3df7ed0250fef347ed6f03a08.

* feat: log xml2rfc stderr/stdout

This time without reformatting all the imports

* feat: path sanitization for submission errors

* fix: parentheses in regex

* test: test SubmissionError sanitization

* style: consolidate imports

* fix: apply sanitization to all args

---------

Co-authored-by: Robert Sparks <rjsparks@nostrum.com>
This commit is contained in:
Jennifer Richards 2024-10-17 18:23:13 -03:00 committed by GitHub
parent a338df16f2
commit de2e66ede3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 144 additions and 22 deletions

View file

@ -27,11 +27,6 @@ from django.utils import timezone
from django.utils.encoding import force_str from django.utils.encoding import force_str
import debug # pyflakes:ignore import debug # pyflakes:ignore
from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
post_submission, validate_submission_name, validate_submission_rev,
process_and_accept_uploaded_submission, SubmissionError, process_submission_text,
process_submission_xml, process_uploaded_submission,
process_and_validate_submission)
from ietf.doc.factories import (DocumentFactory, WgDraftFactory, IndividualDraftFactory, from ietf.doc.factories import (DocumentFactory, WgDraftFactory, IndividualDraftFactory,
ReviewFactory, WgRfcFactory) ReviewFactory, WgRfcFactory)
from ietf.doc.models import ( Document, DocEvent, State, from ietf.doc.models import ( Document, DocEvent, State,
@ -49,7 +44,12 @@ from ietf.submit.factories import SubmissionFactory, SubmissionExtResourceFactor
from ietf.submit.forms import SubmissionBaseUploadForm, SubmissionAutoUploadForm from ietf.submit.forms import SubmissionBaseUploadForm, SubmissionAutoUploadForm
from ietf.submit.models import Submission, Preapproval, SubmissionExtResource from ietf.submit.models import Submission, Preapproval, SubmissionExtResource
from ietf.submit.tasks import cancel_stale_submissions, process_and_accept_uploaded_submission_task from ietf.submit.tasks import cancel_stale_submissions, process_and_accept_uploaded_submission_task
from ietf.submit.utils import apply_yang_checker_to_draft, run_all_yang_model_checks from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
post_submission, validate_submission_name, validate_submission_rev,
process_and_accept_uploaded_submission, SubmissionError, process_submission_text,
process_submission_xml, process_uploaded_submission,
process_and_validate_submission, apply_yang_checker_to_draft,
run_all_yang_model_checks)
from ietf.utils import tool_version from ietf.utils import tool_version
from ietf.utils.accesstoken import generate_access_token from ietf.utils.accesstoken import generate_access_token
from ietf.utils.mail import outbox, get_payload_text from ietf.utils.mail import outbox, get_payload_text
@ -3384,3 +3384,29 @@ class YangCheckerTests(TestCase):
apply_yang_checker_to_draft(checker, draft) apply_yang_checker_to_draft(checker, draft)
self.assertEqual(checker.check_file_txt.call_args, mock.call(draft.get_file_name())) self.assertEqual(checker.check_file_txt.call_args, mock.call(draft.get_file_name()))
@override_settings(IDSUBMIT_REPOSITORY_PATH="/some/path/", IDSUBMIT_STAGING_PATH="/some/other/path")
class SubmissionErrorTests(TestCase):
def test_sanitize_message(self):
sanitized = SubmissionError.sanitize_message(
"This refers to /some/path/with-a-file\n"
"and also /some/other/path/with-a-different-file isn't that neat?\n"
"and has /some/path//////with-slashes"
)
self.assertEqual(
sanitized,
"This refers to **/with-a-file\n"
"and also **/with-a-different-file isn't that neat?\n"
"and has **/with-slashes"
)
@mock.patch.object(SubmissionError, "sanitize_message")
def test_submissionerror(self, mock_sanitize_message):
SubmissionError()
self.assertFalse(mock_sanitize_message.called)
SubmissionError("hi", "there")
self.assertTrue(mock_sanitize_message.called)
self.assertCountEqual(
mock_sanitize_message.call_args_list,
[mock.call("hi"), mock.call("there")],
)

View file

@ -17,6 +17,7 @@ from pathlib import Path
from shutil import move from shutil import move
from typing import Optional, Union # pyflakes:ignore from typing import Optional, Union # pyflakes:ignore
from unidecode import unidecode from unidecode import unidecode
from xml2rfc import RfcWriterError
from xym import xym from xym import xym
from django.conf import settings from django.conf import settings
@ -918,8 +919,51 @@ def accept_submission_requires_group_approval(submission):
class SubmissionError(Exception): class SubmissionError(Exception):
"""Exception for errors during submission processing""" """Exception for errors during submission processing
pass
Sanitizes paths appearing in exception messages.
"""
def __init__(self, *args):
if len(args) > 0:
args = (self.sanitize_message(arg) for arg in args)
super().__init__(*args)
@staticmethod
def sanitize_message(msg):
# Paths likely to appear in submission-related errors
paths = [
p for p in (
getattr(settings, "ALL_ID_DOWNLOAD_DIR", None),
getattr(settings, "BIBXML_BASE_PATH", None),
getattr(settings, "DERIVED_DIR", None),
getattr(settings, "FTP_DIR", None),
getattr(settings, "IDSUBMIT_REPOSITORY_PATH", None),
getattr(settings, "IDSUBMIT_STAGING_PATH", None),
getattr(settings, "INTERNET_ALL_DRAFTS_ARCHIVE_DIR", None),
getattr(settings, "INTERNET_DRAFT_PATH", None),
getattr(settings, "INTERNET_DRAFT_ARCHIVE_DIR", None),
getattr(settings, "INTERNET_DRAFT_PDF_PATH", None),
getattr(settings, "RFC_PATH", None),
getattr(settings, "SUBMIT_YANG_CATALOG_MODEL_DIR", None),
getattr(settings, "SUBMIT_YANG_DRAFT_MODEL_DIR", None),
getattr(settings, "SUBMIT_YANG_IANA_MODEL_DIR", None),
getattr(settings, "SUBMIT_YANG_RFC_MODEL_DIR", None),
"/tmp/",
) if p is not None
]
return re.sub(fr"({'|'.join(paths)})/*", "**/", msg)
class XmlRfcError(SubmissionError):
"""SubmissionError caused by xml2rfc
Includes the output from xml2rfc, if any, in xml2rfc_stdout / xml2rfc_stderr
"""
def __init__(self, *args, xml2rfc_stdout: str, xml2rfc_stderr: str):
super().__init__(*args)
self.xml2rfc_stderr = xml2rfc_stderr
self.xml2rfc_stdout = xml2rfc_stdout
class InconsistentRevisionError(SubmissionError): class InconsistentRevisionError(SubmissionError):
"""SubmissionError caused by an inconsistent revision""" """SubmissionError caused by an inconsistent revision"""
@ -937,27 +981,55 @@ def render_missing_formats(submission):
If a txt file already exists, leaves it in place. Overwrites an existing html file If a txt file already exists, leaves it in place. Overwrites an existing html file
if there is one. if there is one.
""" """
xml2rfc.log.write_out = io.StringIO() # open(os.devnull, "w") # Capture stdio/stdout from xml2rfc
xml2rfc.log.write_err = io.StringIO() # open(os.devnull, "w") xml2rfc_stdout = io.StringIO()
xml2rfc_stderr = io.StringIO()
xml2rfc.log.write_out = xml2rfc_stdout
xml2rfc.log.write_err = xml2rfc_stderr
xml_path = staging_path(submission.name, submission.rev, '.xml') xml_path = staging_path(submission.name, submission.rev, '.xml')
parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True) parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True)
try:
# --- Parse the xml --- # --- Parse the xml ---
xmltree = parser.parse(remove_comments=False) xmltree = parser.parse(remove_comments=False)
except Exception as err:
raise XmlRfcError(
"Error parsing XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# If we have v2, run it through v2v3. Keep track of the submitted version, though. # If we have v2, run it through v2v3. Keep track of the submitted version, though.
xmlroot = xmltree.getroot() xmlroot = xmltree.getroot()
xml_version = xmlroot.get('version', '2') xml_version = xmlroot.get('version', '2')
if xml_version == '2': if xml_version == '2':
v2v3 = xml2rfc.V2v3XmlWriter(xmltree) v2v3 = xml2rfc.V2v3XmlWriter(xmltree)
try:
xmltree.tree = v2v3.convert2to3() xmltree.tree = v2v3.convert2to3()
except Exception as err:
raise XmlRfcError(
"Error converting v2 XML to v3",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# --- Prep the xml --- # --- Prep the xml ---
today = date_today() today = date_today()
prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET]) prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
prep.options.accept_prepped = True prep.options.accept_prepped = True
prep.options.date = today prep.options.date = today
try:
xmltree.tree = prep.prep() xmltree.tree = prep.prep()
if xmltree.tree == None: except RfcWriterError:
raise SubmissionError(f'Error from xml2rfc (prep): {prep.errors}') raise XmlRfcError(
f"Error during xml2rfc prep: {prep.errors}",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
)
except Exception as err:
raise XmlRfcError(
"Unexpected error during xml2rfc prep",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
# --- Convert to txt --- # --- Convert to txt ---
txt_path = staging_path(submission.name, submission.rev, '.txt') txt_path = staging_path(submission.name, submission.rev, '.txt')
@ -965,7 +1037,14 @@ def render_missing_formats(submission):
writer = xml2rfc.TextWriter(xmltree, quiet=True) writer = xml2rfc.TextWriter(xmltree, quiet=True)
writer.options.accept_prepped = True writer.options.accept_prepped = True
writer.options.date = today writer.options.date = today
try:
writer.write(txt_path) writer.write(txt_path)
except Exception as err:
raise XmlRfcError(
"Error generating text format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
log.log( log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % ( 'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent), str(xml_path.parent),
@ -980,7 +1059,14 @@ def render_missing_formats(submission):
html_path = staging_path(submission.name, submission.rev, '.html') html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True) writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today writer.options.date = today
try:
writer.write(str(html_path)) writer.write(str(html_path))
except Exception as err:
raise XmlRfcError(
"Error generating HTML format from XML",
xml2rfc_stdout=xml2rfc_stdout.getvalue(),
xml2rfc_stderr=xml2rfc_stderr.getvalue(),
) from err
log.log( log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % ( 'In %s: xml2rfc %s generated %s from %s (version %s)' % (
str(xml_path.parent), str(xml_path.parent),
@ -1263,7 +1349,7 @@ def process_submission_text(filename, revision):
def process_and_validate_submission(submission): def process_and_validate_submission(submission):
"""Process and validate a submission """Process and validate a submission
Raises SubmissionError if an error is encountered. Raises SubmissionError or a subclass if an error is encountered.
""" """
if len(set(submission.file_types.split(",")).intersection({".xml", ".txt"})) == 0: if len(set(submission.file_types.split(",")).intersection({".xml", ".txt"})) == 0:
raise SubmissionError("Require XML and/or text format to process an Internet-Draft submission.") raise SubmissionError("Require XML and/or text format to process an Internet-Draft submission.")
@ -1273,7 +1359,16 @@ def process_and_validate_submission(submission):
# Parse XML first, if we have it # Parse XML first, if we have it
if ".xml" in submission.file_types: if ".xml" in submission.file_types:
xml_metadata = process_submission_xml(submission.name, submission.rev) xml_metadata = process_submission_xml(submission.name, submission.rev)
try:
render_missing_formats(submission) # makes HTML and text, unless text was uploaded render_missing_formats(submission) # makes HTML and text, unless text was uploaded
except XmlRfcError as err:
# log stdio/stderr
log.log(
f"xml2rfc failure when rendering missing formats for {submission.name}-{submission.rev}:\n"
f">> stdout:\n{err.xml2rfc_stdout}\n"
f">> stderr:\n{err.xml2rfc_stderr}"
)
raise
# Parse text, whether uploaded or generated from XML # Parse text, whether uploaded or generated from XML
text_metadata = process_submission_text(submission.name, submission.rev) text_metadata = process_submission_text(submission.name, submission.rev)
@ -1332,11 +1427,11 @@ def process_and_validate_submission(submission):
raise SubmissionError('Checks failed: ' + ' / '.join(errors)) raise SubmissionError('Checks failed: ' + ' / '.join(errors))
except SubmissionError: except SubmissionError:
raise # pass SubmissionErrors up the stack raise # pass SubmissionErrors up the stack
except Exception: except Exception as err:
# convert other exceptions into SubmissionErrors # convert other exceptions into SubmissionErrors
log.log(f'Unexpected exception while processing submission {submission.pk}.') log.log(f'Unexpected exception while processing submission {submission.pk}.')
log.log(traceback.format_exc()) log.log(traceback.format_exc())
raise SubmissionError('A system error occurred while processing the submission.') raise SubmissionError('A system error occurred while processing the submission.') from err
def submitter_is_author(submission): def submitter_is_author(submission):
@ -1428,6 +1523,7 @@ def process_uploaded_submission(submission):
create_submission_event(None, submission, desc="Uploaded submission (diverted to manual process)") create_submission_event(None, submission, desc="Uploaded submission (diverted to manual process)")
send_manual_post_request(None, submission, errors=dict(consistency=str(consistency_error))) send_manual_post_request(None, submission, errors=dict(consistency=str(consistency_error)))
except SubmissionError as err: except SubmissionError as err:
# something generic went wrong
submission.refresh_from_db() # guard against incomplete changes in submission validation / processing submission.refresh_from_db() # guard against incomplete changes in submission validation / processing
cancel_submission(submission) # changes Submission.state cancel_submission(submission) # changes Submission.state
create_submission_event(None, submission, f"Submission rejected: {err}") create_submission_event(None, submission, f"Submission rejected: {err}")