feat: Extract document creation date from XML draft (#5733)
* fix: Extract document creation date from XML draft * test: Fix test
This commit is contained in:
parent
8d4780d304
commit
5a2708283b
|
@ -3354,7 +3354,7 @@ class AsyncSubmissionTests(BaseSubmitTestCase):
|
||||||
self.assertEqual(output["title"], "Correct Draft Title")
|
self.assertEqual(output["title"], "Correct Draft Title")
|
||||||
self.assertIsNone(output["abstract"])
|
self.assertIsNone(output["abstract"])
|
||||||
self.assertEqual(len(output["authors"]), 1) # not checking in detail, parsing is unreliable
|
self.assertEqual(len(output["authors"]), 1) # not checking in detail, parsing is unreliable
|
||||||
self.assertIsNone(output["document_date"])
|
self.assertEqual(output["document_date"], date_today())
|
||||||
self.assertIsNone(output["pages"])
|
self.assertIsNone(output["pages"])
|
||||||
self.assertIsNone(output["words"])
|
self.assertIsNone(output["words"])
|
||||||
self.assertIsNone(output["first_two_pages"])
|
self.assertIsNone(output["first_two_pages"])
|
||||||
|
|
|
@ -1159,7 +1159,7 @@ def process_submission_xml(filename, revision):
|
||||||
for auth in xml_draft.get_author_list()
|
for auth in xml_draft.get_author_list()
|
||||||
],
|
],
|
||||||
"abstract": None, # not supported from XML
|
"abstract": None, # not supported from XML
|
||||||
"document_date": None, # not supported from XML
|
"document_date": xml_draft.get_creation_date(),
|
||||||
"pages": None, # not supported from XML
|
"pages": None, # not supported from XML
|
||||||
"words": None, # not supported from XML
|
"words": None, # not supported from XML
|
||||||
"first_two_pages": None, # not supported from XML
|
"first_two_pages": None, # not supported from XML
|
||||||
|
@ -1287,9 +1287,14 @@ def process_and_validate_submission(submission):
|
||||||
if not submission.title:
|
if not submission.title:
|
||||||
raise SubmissionError("Could not determine the title of the draft")
|
raise SubmissionError("Could not determine the title of the draft")
|
||||||
|
|
||||||
|
# Items to get from text only when not available from XML
|
||||||
|
if xml_metadata and xml_metadata.get("document_date", None) is not None:
|
||||||
|
submission.document_date = xml_metadata["document_date"]
|
||||||
|
else:
|
||||||
|
submission.document_date = text_metadata["document_date"]
|
||||||
|
|
||||||
# Items always to get from text, even when XML is available
|
# Items always to get from text, even when XML is available
|
||||||
submission.abstract = text_metadata["abstract"]
|
submission.abstract = text_metadata["abstract"]
|
||||||
submission.document_date = text_metadata["document_date"]
|
|
||||||
submission.pages = text_metadata["pages"]
|
submission.pages = text_metadata["pages"]
|
||||||
submission.words = text_metadata["words"]
|
submission.words = text_metadata["words"]
|
||||||
submission.first_two_pages = text_metadata["first_two_pages"]
|
submission.first_two_pages = text_metadata["first_two_pages"]
|
||||||
|
|
|
@ -189,6 +189,46 @@ class Draft:
|
||||||
|
|
||||||
def get_wordcount(self):
|
def get_wordcount(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _construct_creation_date(year, month, day=None):
|
||||||
|
"""Construct a date for the document
|
||||||
|
|
||||||
|
Roughly follows RFC 7991 section 2.17, but only allows missing day and
|
||||||
|
assumes the 15th if day is not specified month/year are not current.
|
||||||
|
|
||||||
|
year: integer or string with 4-digit year
|
||||||
|
month: integer or string with numeric or English month. Some abbreviations recognized.
|
||||||
|
day: integer or string with numeric day of month. Optional.
|
||||||
|
|
||||||
|
Raises ValueError if there is a problem interpreting the data
|
||||||
|
"""
|
||||||
|
year = int(year)
|
||||||
|
day = int(day)
|
||||||
|
if isinstance(month, str):
|
||||||
|
month = month.lower()
|
||||||
|
if month in month_names:
|
||||||
|
month = month_names.index(month) + 1
|
||||||
|
elif month in month_names_abbrev3:
|
||||||
|
month = month_names_abbrev3.index(month) + 1
|
||||||
|
elif month in month_names_abbrev4:
|
||||||
|
month = month_names_abbrev4.index(month) + 1
|
||||||
|
elif month.isdigit() and int(month) in range(1, 13):
|
||||||
|
month = int(month)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unrecognized month")
|
||||||
|
today = date_today()
|
||||||
|
if not day:
|
||||||
|
# if the date was given with only month and year, use
|
||||||
|
# today's date if month and year is today's month and
|
||||||
|
# year, otherwise pick the middle of the month.
|
||||||
|
# Don't use today's day for month and year in the past
|
||||||
|
if month == today.month and year == today.year:
|
||||||
|
day = today.day
|
||||||
|
else:
|
||||||
|
day = 15
|
||||||
|
return datetime.date(year, month, day)
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -460,27 +500,7 @@ class PlaintextDraft(Draft):
|
||||||
day = int( md.get( 'day', 0 ) )
|
day = int( md.get( 'day', 0 ) )
|
||||||
year = int( md['year'] )
|
year = int( md['year'] )
|
||||||
try:
|
try:
|
||||||
if mon in month_names:
|
self._creation_date = self._construct_creation_date(year, mon, day)
|
||||||
month = month_names.index( mon ) + 1
|
|
||||||
elif mon in month_names_abbrev3:
|
|
||||||
month = month_names_abbrev3.index( mon ) + 1
|
|
||||||
elif mon in month_names_abbrev4:
|
|
||||||
month = month_names_abbrev4.index( mon ) + 1
|
|
||||||
elif mon.isdigit() and int(mon) in range(1,13):
|
|
||||||
month = int(mon)
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
today = date_today()
|
|
||||||
if day==0:
|
|
||||||
# if the date was given with only month and year, use
|
|
||||||
# today's date if month and year is today's month and
|
|
||||||
# year, otherwise pick the middle of the month.
|
|
||||||
# Don't use today's day for month and year in the past
|
|
||||||
if month==today.month and year==today.year:
|
|
||||||
day = today.day
|
|
||||||
else:
|
|
||||||
day = 15
|
|
||||||
self._creation_date = datetime.date(year, month, day)
|
|
||||||
return self._creation_date
|
return self._creation_date
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# mon abbreviation not in _MONTH_NAMES
|
# mon abbreviation not in _MONTH_NAMES
|
||||||
|
|
|
@ -133,6 +133,17 @@ class XMLDraft(Draft):
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
return self.xmlroot.findtext('front/title').strip()
|
return self.xmlroot.findtext('front/title').strip()
|
||||||
|
|
||||||
|
def get_creation_date(self):
|
||||||
|
date_elt = self.xmlroot.find("front/date")
|
||||||
|
if date_elt is not None:
|
||||||
|
try:
|
||||||
|
year = date_elt.get("year")
|
||||||
|
month = date_elt.get("month")
|
||||||
|
return self._construct_creation_date(year, month, date_elt.get("day", None))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
# todo fix the implementation of XMLDraft.get_abstract()
|
# todo fix the implementation of XMLDraft.get_abstract()
|
||||||
#
|
#
|
||||||
# This code was pulled from ietf.submit.forms where it existed for some time.
|
# This code was pulled from ietf.submit.forms where it existed for some time.
|
||||||
|
|
Loading…
Reference in a new issue