feat: Extract document creation date from XML draft (#5733)
* fix: Extract document creation date from XML draft * test: Fix test
This commit is contained in:
parent
8d4780d304
commit
5a2708283b
|
@ -3354,7 +3354,7 @@ class AsyncSubmissionTests(BaseSubmitTestCase):
|
|||
self.assertEqual(output["title"], "Correct Draft Title")
|
||||
self.assertIsNone(output["abstract"])
|
||||
self.assertEqual(len(output["authors"]), 1) # not checking in detail, parsing is unreliable
|
||||
self.assertIsNone(output["document_date"])
|
||||
self.assertEqual(output["document_date"], date_today())
|
||||
self.assertIsNone(output["pages"])
|
||||
self.assertIsNone(output["words"])
|
||||
self.assertIsNone(output["first_two_pages"])
|
||||
|
|
|
@ -1159,7 +1159,7 @@ def process_submission_xml(filename, revision):
|
|||
for auth in xml_draft.get_author_list()
|
||||
],
|
||||
"abstract": None, # not supported from XML
|
||||
"document_date": None, # not supported from XML
|
||||
"document_date": xml_draft.get_creation_date(),
|
||||
"pages": None, # not supported from XML
|
||||
"words": None, # not supported from XML
|
||||
"first_two_pages": None, # not supported from XML
|
||||
|
@ -1287,9 +1287,14 @@ def process_and_validate_submission(submission):
|
|||
if not submission.title:
|
||||
raise SubmissionError("Could not determine the title of the draft")
|
||||
|
||||
# Items to get from text only when not available from XML
|
||||
if xml_metadata and xml_metadata.get("document_date", None) is not None:
|
||||
submission.document_date = xml_metadata["document_date"]
|
||||
else:
|
||||
submission.document_date = text_metadata["document_date"]
|
||||
|
||||
# Items always to get from text, even when XML is available
|
||||
submission.abstract = text_metadata["abstract"]
|
||||
submission.document_date = text_metadata["document_date"]
|
||||
submission.pages = text_metadata["pages"]
|
||||
submission.words = text_metadata["words"]
|
||||
submission.first_two_pages = text_metadata["first_two_pages"]
|
||||
|
|
|
@ -189,6 +189,46 @@ class Draft:
|
|||
|
||||
def get_wordcount(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def _construct_creation_date(year, month, day=None):
|
||||
"""Construct a date for the document
|
||||
|
||||
Roughly follows RFC 7991 section 2.17, but only allows missing day and
|
||||
assumes the 15th if day is not specified month/year are not current.
|
||||
|
||||
year: integer or string with 4-digit year
|
||||
month: integer or string with numeric or English month. Some abbreviations recognized.
|
||||
day: integer or string with numeric day of month. Optional.
|
||||
|
||||
Raises ValueError if there is a problem interpreting the data
|
||||
"""
|
||||
year = int(year)
|
||||
day = int(day)
|
||||
if isinstance(month, str):
|
||||
month = month.lower()
|
||||
if month in month_names:
|
||||
month = month_names.index(month) + 1
|
||||
elif month in month_names_abbrev3:
|
||||
month = month_names_abbrev3.index(month) + 1
|
||||
elif month in month_names_abbrev4:
|
||||
month = month_names_abbrev4.index(month) + 1
|
||||
elif month.isdigit() and int(month) in range(1, 13):
|
||||
month = int(month)
|
||||
else:
|
||||
raise ValueError("Unrecognized month")
|
||||
today = date_today()
|
||||
if not day:
|
||||
# if the date was given with only month and year, use
|
||||
# today's date if month and year is today's month and
|
||||
# year, otherwise pick the middle of the month.
|
||||
# Don't use today's day for month and year in the past
|
||||
if month == today.month and year == today.year:
|
||||
day = today.day
|
||||
else:
|
||||
day = 15
|
||||
return datetime.date(year, month, day)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
@ -460,27 +500,7 @@ class PlaintextDraft(Draft):
|
|||
day = int( md.get( 'day', 0 ) )
|
||||
year = int( md['year'] )
|
||||
try:
|
||||
if mon in month_names:
|
||||
month = month_names.index( mon ) + 1
|
||||
elif mon in month_names_abbrev3:
|
||||
month = month_names_abbrev3.index( mon ) + 1
|
||||
elif mon in month_names_abbrev4:
|
||||
month = month_names_abbrev4.index( mon ) + 1
|
||||
elif mon.isdigit() and int(mon) in range(1,13):
|
||||
month = int(mon)
|
||||
else:
|
||||
continue
|
||||
today = date_today()
|
||||
if day==0:
|
||||
# if the date was given with only month and year, use
|
||||
# today's date if month and year is today's month and
|
||||
# year, otherwise pick the middle of the month.
|
||||
# Don't use today's day for month and year in the past
|
||||
if month==today.month and year==today.year:
|
||||
day = today.day
|
||||
else:
|
||||
day = 15
|
||||
self._creation_date = datetime.date(year, month, day)
|
||||
self._creation_date = self._construct_creation_date(year, mon, day)
|
||||
return self._creation_date
|
||||
except ValueError:
|
||||
# mon abbreviation not in _MONTH_NAMES
|
||||
|
|
|
@ -133,6 +133,17 @@ class XMLDraft(Draft):
|
|||
def get_title(self):
|
||||
return self.xmlroot.findtext('front/title').strip()
|
||||
|
||||
def get_creation_date(self):
|
||||
date_elt = self.xmlroot.find("front/date")
|
||||
if date_elt is not None:
|
||||
try:
|
||||
year = date_elt.get("year")
|
||||
month = date_elt.get("month")
|
||||
return self._construct_creation_date(year, month, date_elt.get("day", None))
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
# todo fix the implementation of XMLDraft.get_abstract()
|
||||
#
|
||||
# This code was pulled from ietf.submit.forms where it existed for some time.
|
||||
|
|
Loading…
Reference in a new issue