diff --git a/bin/add-old-drafts-from-archive.py b/bin/add-old-drafts-from-archive.py index d53ab32a7..f968cd11d 100755 --- a/bin/add-old-drafts-from-archive.py +++ b/bin/add-old-drafts-from-archive.py @@ -14,7 +14,7 @@ django.setup() from django.conf import settings from django.core.validators import validate_email, ValidationError -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft from ietf.submit.utils import update_authors import debug # pyflakes:ignore @@ -61,7 +61,7 @@ for name in sorted(names): except UnicodeDecodeError: text = raw.decode('latin1') try: - draft = Draft(text, txt_file.name, name_from_source=True) + draft = PlaintextDraft(text, txt_file.name, name_from_source=True) except Exception as e: print name, rev, "Can't parse", p,":",e continue diff --git a/ietf/doc/tests_utils.py b/ietf/doc/tests_utils.py index d626c9c34..aef6eb69a 100644 --- a/ietf/doc/tests_utils.py +++ b/ietf/doc/tests_utils.py @@ -1,16 +1,22 @@ # Copyright The IETF Trust 2020, All Rights Reserved import datetime +import debug # pyflakes:ignore + +from unittest.mock import patch from django.db import IntegrityError from ietf.group.factories import GroupFactory, RoleFactory from ietf.name.models import DocTagName from ietf.person.factories import PersonFactory -from ietf.utils.test_utils import TestCase +from ietf.utils.test_utils import TestCase, name_of_file_containing from ietf.person.models import Person -from ietf.doc.factories import DocumentFactory, WgRfcFactory +from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor, Document -from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors, fuzzy_find_documents +from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors, + fuzzy_find_documents, rebuild_reference_relations) +from ietf.utils.draft import Draft, PlaintextDraft +from ietf.utils.xmldraft import XMLDraft class ActionHoldersTests(TestCase): @@ -285,3 +291,140 @@ class MiscTests(TestCase): self.do_fuzzy_find_documents_rfc_test('draft-name-with-number-01') self.do_fuzzy_find_documents_rfc_test('draft-name-that-has-two-02-04') self.do_fuzzy_find_documents_rfc_test('draft-wild-01-numbers-0312') + + +class RebuildReferenceRelationsTests(TestCase): + def setUp(self): + super().setUp() + self.doc = WgDraftFactory() # document under test + # Other documents that should be found by rebuild_reference_relations + self.normative, self.informative, self.unknown = WgRfcFactory.create_batch(3) + for relationship in ['refnorm', 'refinfo', 'refunk', 'refold']: + self.doc.relateddocument_set.create( + target=WgRfcFactory().docalias.first(), + relationship_id=relationship, + ) + self.updated = WgRfcFactory() # related document that should be left alone + self.doc.relateddocument_set.create(target=self.updated.docalias.first(), relationship_id='updates') + self.assertCountEqual(self.doc.relateddocument_set.values_list('relationship__slug', flat=True), + ['refnorm', 'refinfo', 'refold', 'refunk', 'updates'], + 'Test conditions set up incorrectly: wrong prior document relationships') + for other_doc in [self.normative, self.informative, self.unknown]: + self.assertEqual( + self.doc.relateddocument_set.filter(target__name=other_doc.canonical_name()).count(), + 0, + 'Test conditions set up incorrectly: new documents already related', + ) + + def _get_refs_return_value(self): + return { + self.normative.canonical_name(): Draft.REF_TYPE_NORMATIVE, + self.informative.canonical_name(): Draft.REF_TYPE_INFORMATIVE, + self.unknown.canonical_name(): Draft.REF_TYPE_UNKNOWN, + 'draft-not-found': Draft.REF_TYPE_NORMATIVE, + } + + def test_requires_txt_or_xml(self): + result = rebuild_reference_relations(self.doc, {}) + self.assertCountEqual(result.keys(), ['errors']) + self.assertEqual(len(result['errors']), 1) + self.assertIn('No draft text available', result['errors'][0], + 'Error should be reported if no draft file is given') + + result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'}) + self.assertCountEqual(result.keys(), ['errors']) + self.assertEqual(len(result['errors']), 1) + self.assertIn('No draft text available', result['errors'][0], + 'Error should be reported if no XML or plaintext file is given') + + @patch.object(XMLDraft, 'get_refs') + @patch.object(XMLDraft, '__init__', return_value=None) + def test_xml(self, mock_init, mock_get_refs): + """Should build reference relations with only XML""" + mock_get_refs.return_value = self._get_refs_return_value() + + result = rebuild_reference_relations(self.doc, {'xml': 'file.xml'}) + + # if the method of calling the XMLDraft() constructor changes, this will need to be updated + xmldraft_init_args, _ = mock_init.call_args + self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments') + self.assertEqual( + result, + { + 'warnings': ['There were 1 references with no matching DocAlias'], + 'unfound': ['draft-not-found'], + } + ) + + self.assertCountEqual( + self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'), + [ + (self.normative.canonical_name(), 'refnorm'), + (self.informative.canonical_name(), 'refinfo'), + (self.unknown.canonical_name(), 'refunk'), + (self.updated.docalias.first().name, 'updates'), + ] + ) + + @patch.object(PlaintextDraft, 'get_refs') + @patch.object(PlaintextDraft, '__init__', return_value=None) + def test_plaintext(self, mock_init, mock_get_refs): + """Should build reference relations with only plaintext""" + mock_get_refs.return_value = self._get_refs_return_value() + + with name_of_file_containing('contents') as temp_file_name: + result = rebuild_reference_relations(self.doc, {'txt': temp_file_name}) + + # if the method of calling the PlaintextDraft() constructor changes, this test will need to be updated + _, mock_init_kwargs = mock_init.call_args + self.assertEqual(mock_init_kwargs, {'text': 'contents', 'source': temp_file_name}, + 'PlaintextDraft initialized with unexpected arguments') + self.assertEqual( + result, + { + 'warnings': ['There were 1 references with no matching DocAlias'], + 'unfound': ['draft-not-found'], + } + ) + + self.assertCountEqual( + self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'), + [ + (self.normative.canonical_name(), 'refnorm'), + (self.informative.canonical_name(), 'refinfo'), + (self.unknown.canonical_name(), 'refunk'), + (self.updated.docalias.first().name, 'updates'), + ] + ) + + @patch.object(PlaintextDraft, '__init__') + @patch.object(XMLDraft, 'get_refs') + @patch.object(XMLDraft, '__init__', return_value=None) + def test_xml_and_plaintext(self, mock_init, mock_get_refs, mock_plaintext_init): + """Should build reference relations with XML when plaintext also available""" + mock_get_refs.return_value = self._get_refs_return_value() + + result = rebuild_reference_relations(self.doc, {'txt': 'file.txt', 'xml': 'file.xml'}) + + self.assertFalse(mock_plaintext_init.called, 'PlaintextDraft should not be used when XML is available') + + # if the method of calling the XMLDraft() constructor changes, this will need to be updated + xmldraft_init_args, _ = mock_init.call_args + self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments') + self.assertEqual( + result, + { + 'warnings': ['There were 1 references with no matching DocAlias'], + 'unfound': ['draft-not-found'], + } + ) + + self.assertCountEqual( + self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'), + [ + (self.normative.canonical_name(), 'refnorm'), + (self.informative.canonical_name(), 'refinfo'), + (self.unknown.canonical_name(), 'refunk'), + (self.updated.docalias.first().name, 'updates'), + ] + ) diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 61cf00813..c4d82e02d 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -39,6 +39,8 @@ from ietf.utils import draft, text from ietf.utils.mail import send_mail from ietf.mailtrigger.utils import gather_address_lists from ietf.utils import log +from ietf.utils.xmldraft import XMLDraft + def save_document_in_history(doc): """Save a snapshot of document and related objects in the database.""" @@ -742,21 +744,25 @@ def update_telechat(request, doc, by, new_telechat_date, new_returning_item=None return e -def rebuild_reference_relations(doc,filename=None): +def rebuild_reference_relations(doc, filenames): + """Rebuild reference relations for a document + + filenames should be a dict mapping file ext (i.e., type) to the full path of each file. + """ if doc.type.slug != 'draft': return None - if not filename: - if doc.get_state_slug() == 'rfc': - filename=os.path.join(settings.RFC_PATH,doc.canonical_name()+".txt") - else: - filename=os.path.join(settings.INTERNET_DRAFT_PATH,doc.filename_with_rev()) - - try: - with io.open(filename, 'rb') as file: - refs = draft.Draft(file.read().decode('utf8'), filename).get_refs() - except IOError as e: - return { 'errors': ["%s :%s" % (e.strerror, filename)] } + # try XML first + if 'xml' in filenames: + refs = XMLDraft(filenames['xml']).get_refs() + elif 'txt' in filenames: + filename = filenames['txt'] + try: + refs = draft.PlaintextDraft.from_file(filename).get_refs() + except IOError as e: + return { 'errors': ["%s :%s" % (e.strerror, filename)] } + else: + return {'errors': ['No draft text available for rebuilding reference relations. Need XML or plaintext.']} doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete() @@ -764,6 +770,7 @@ def rebuild_reference_relations(doc,filename=None): errors = [] unfound = set() for ( ref, refType ) in refs.items(): + # As of Dec 2021, DocAlias has a unique constraint on the name field, so count > 1 should not occur refdoc = DocAlias.objects.filter( name=ref ) count = refdoc.count() if count == 0: diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index 898e6daa9..5d4211de6 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -80,7 +80,7 @@ from ietf.review.models import ReviewAssignment from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs from ietf.review.utils import no_review_from_teams_on_doc from ietf.utils import markup_txt, log, markdown -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft from ietf.utils.response import permission_denied from ietf.utils.text import maybe_split @@ -1842,7 +1842,7 @@ def idnits2_state(request, name, rev=None): else: text = doc.text() if text: - parsed_draft = Draft(text=doc.text(), source=name, name_from_source=False) + parsed_draft = PlaintextDraft(text=doc.text(), source=name, name_from_source=False) doc.deststatus = parsed_draft.get_status() else: doc.deststatus="Unknown" diff --git a/ietf/stats/backfill_data.py b/ietf/stats/backfill_data.py index c8ee39531..176ee3335 100755 --- a/ietf/stats/backfill_data.py +++ b/ietf/stats/backfill_data.py @@ -29,7 +29,7 @@ import debug # pyflakes:ignore from ietf.doc.models import Document from ietf.name.models import FormalLanguageName -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft parser = argparse.ArgumentParser() parser.add_argument("--document", help="specific document name") @@ -89,7 +89,7 @@ for doc in docs_qs.prefetch_related("docalias", "formal_languages", "documentaut with io.open(path, 'rb') as f: say("\nProcessing %s" % doc.name) sys.stdout.flush() - d = Draft(unicode(f.read()), path) + d = PlaintextDraft(unicode(f.read()), path) updated = False diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 0ac825c47..e935a8072 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -38,7 +38,7 @@ from ietf.submit.parsers.pdf_parser import PDFParser from ietf.submit.parsers.plain_parser import PlainParser from ietf.submit.parsers.xml_parser import XMLParser from ietf.utils import log -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft from ietf.utils.text import normalize_text class SubmissionBaseUploadForm(forms.Form): @@ -302,7 +302,7 @@ class SubmissionBaseUploadForm(forms.Form): try: text = bytes.decode(self.file_info['txt'].charset) # - self.parsed_draft = Draft(text, txt_file.name) + self.parsed_draft = PlaintextDraft(text, txt_file.name) if self.filename == None: self.filename = self.parsed_draft.filename elif self.filename != self.parsed_draft.filename: diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py index 949c75d66..3c4d7c675 100644 --- a/ietf/submit/tests.py +++ b/ietf/submit/tests.py @@ -13,14 +13,19 @@ import mock from io import StringIO from pyquery import PyQuery +from pathlib import Path + from django.conf import settings +from django.test import override_settings +from django.test.client import RequestFactory from django.urls import reverse as urlreverse from django.utils.encoding import force_str, force_text import debug # pyflakes:ignore -from ietf.submit.utils import expirable_submissions, expire_submission -from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory +from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames, + post_submission) +from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory, IndividualRfcFactory from ietf.doc.models import ( Document, DocAlias, DocEvent, State, BallotPositionDocEvent, DocumentAuthor, SubmissionDocEvent ) from ietf.doc.utils import create_ballot_if_not_open, can_edit_docextresources, update_action_holders @@ -40,7 +45,7 @@ from ietf.utils.accesstoken import generate_access_token from ietf.utils.mail import outbox, empty_outbox, get_payload_text from ietf.utils.models import VersionInfo from ietf.utils.test_utils import login_testing_unauthorized, TestCase -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft class BaseSubmitTestCase(TestCase): @@ -2860,10 +2865,62 @@ class RefsTests(BaseSubmitTestCase): group = None file, __ = submission_file('draft-some-subject', '00', group, 'txt', "test_submission.txt", ) - draft = Draft(file.read(), file.name) + draft = PlaintextDraft(file.read(), file.name) refs = draft.get_refs() self.assertEqual(refs['rfc2119'], 'norm') self.assertEqual(refs['rfc8174'], 'norm') self.assertEqual(refs['rfc8126'], 'info') self.assertEqual(refs['rfc8175'], 'info') - \ No newline at end of file + + +class PostSubmissionTests(BaseSubmitTestCase): + @override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md')) + def test_find_submission_filenames_rfc(self): + """Posting an RFC submission should use RFC_FILE_TYPES""" + rfc = IndividualRfcFactory() + path = Path(self.staging_dir) + for ext in ['txt', 'xml', 'pdf', 'md']: + (path / f'{rfc.name}-{rfc.rev}.{ext}').touch() + files = find_submission_filenames(rfc) + self.assertCountEqual( + files, + { + 'txt': f'{path}/{rfc.name}-{rfc.rev}.txt', + 'xml': f'{path}/{rfc.name}-{rfc.rev}.xml', + # should NOT find the pdf or md + } + ) + + @override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md')) + def test_find_submission_filenames_draft(self): + """Posting an I-D submission should use IDSUBMIT_FILE_TYPES""" + draft = WgDraftFactory() + path = Path(self.staging_dir) + for ext in ['txt', 'xml', 'pdf', 'md']: + (path / f'{draft.name}-{draft.rev}.{ext}').touch() + files = find_submission_filenames(draft) + self.assertCountEqual( + files, + { + 'pdf': f'{path}/{draft.name}-{draft.rev}.pdf', + 'md': f'{path}/{draft.name}-{draft.rev}.md', + # should NOT find the txt or xml + } + ) + + @mock.patch('ietf.submit.utils.rebuild_reference_relations') + @mock.patch('ietf.submit.utils.find_submission_filenames') + def test_post_submission_rebuilds_ref_relations(self, mock_find_filenames, mock_rebuild_reference_relations): + """The post_submission method should rebuild reference relations from correct files + + This tests that the post_submission() utility function gets the list of files to handle from the + find_submission_filenames() method and passes them along to rebuild_reference_relations(). + """ + submission = SubmissionFactory() + mock_find_filenames.return_value = {'xml': f'{self.staging_dir}/{submission.name}.xml'} + request = RequestFactory() + request.user = PersonFactory().user + post_submission(request, submission, 'doc_desc', 'subm_desc') + args, kwargs = mock_rebuild_reference_relations.call_args + self.assertEqual(args[1], mock_find_filenames.return_value) + diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 9e9ef274c..495759dd6 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -40,7 +40,7 @@ from ietf.submit.models import ( Submission, SubmissionEvent, Preapproval, Draft SubmissionCheck, SubmissionExtResource ) from ietf.utils import log from ietf.utils.accesstoken import generate_random_key -from ietf.utils.draft import Draft +from ietf.utils.draft import PlaintextDraft from ietf.utils.mail import is_valid_email from ietf.utils.text import parse_unicode from ietf.person.name import unidecode_name @@ -262,6 +262,18 @@ def post_rev00_submission_events(draft, submission, submitter): return events +def find_submission_filenames(draft): + """Find uploaded files corresponding to the draft + + Returns a dict mapping file extension to the corresponding filename (including the full path). + """ + path = pathlib.Path(settings.IDSUBMIT_STAGING_PATH) + stem = f'{draft.name}-{draft.rev}' + allowed_types = settings.RFC_FILE_TYPES if draft.get_state_slug() == 'rfc' else settings.IDSUBMIT_FILE_TYPES + candidates = {ext: path / f'{stem}.{ext}' for ext in allowed_types} + return {ext: str(filename) for ext, filename in candidates.items() if filename.exists()} + + @transaction.atomic def post_submission(request, submission, approved_doc_desc, approved_subm_desc): log.log(f"{submission.name}: start") @@ -352,7 +364,7 @@ def post_submission(request, submission, approved_doc_desc, approved_subm_desc): log.log(f"{submission.name}: updated state and info") - trouble = rebuild_reference_relations(draft, filename=os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (submission.name, submission.rev))) + trouble = rebuild_reference_relations(draft, find_submission_filenames(draft)) if trouble: log.log('Rebuild_reference_relations trouble: %s'%trouble) log.log(f"{submission.name}: rebuilt reference relations") @@ -723,8 +735,7 @@ def save_files(form): def get_draft_meta(form, saved_files): authors = [] file_name = saved_files - abstract = None - file_size = None + if form.cleaned_data['xml']: # Some meta-information, such as the page-count, can only # be retrieved from the generated text file. Provide a @@ -732,7 +743,7 @@ def get_draft_meta(form, saved_files): file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision)) file_size = os.stat(file_name['txt']).st_size with io.open(file_name['txt']) as txt_file: - form.parsed_draft = Draft(txt_file.read(), txt_file.name) + form.parsed_draft = PlaintextDraft(txt_file.read(), txt_file.name) else: file_size = form.cleaned_data['txt'].size diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index eb8dda4c1..78705637f 100755 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# Copyright The IETF Trust 2009-2020, All Rights Reserved +# Copyright The IETF Trust 2009-2021, All Rights Reserved # -*- coding: utf-8 -*- # -*- python -*- @@ -129,11 +129,61 @@ def acronym_match(s, l): #_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym)) return s == acronym +class Draft: + """Base class for drafts + + Extracted from PlaintextDraft, formerly named Draft. If I missed part of its public interface + that is relevant for other draft formats, those should be added to this base class. + """ + REF_TYPE_NORMATIVE = 'norm' + REF_TYPE_INFORMATIVE = 'info' + REF_TYPE_UNKNOWN = 'unk' + + def get_abstract(self): + raise NotImplementedError + + def get_author_list(self): + raise NotImplementedError + + def get_authors(self): + raise NotImplementedError + + def get_authors_with_firm(self): + raise NotImplementedError + + def get_creation_date(self): + raise NotImplementedError + + def get_formal_languages(self): + raise NotImplementedError + + def get_pagecount(self): + raise NotImplementedError + + def get_refs(self): + raise NotImplementedError + + def get_status(self): + raise NotImplementedError + + def get_title(self): + raise NotImplementedError + + def get_wordcount(self): + raise NotImplementedError + # ---------------------------------------------------------------------- -class Draft(): +class PlaintextDraft(Draft): def __init__(self, text, source, name_from_source=False): + """Initialize a Draft instance + + :param text: plaintext draft contents + :param source: name of file containing the contents + :param name_from_source: if True, fall back to source to determine draft name not found from text + """ + super().__init__() assert isinstance(text, str) self.source = source self.rawtext = text @@ -169,6 +219,11 @@ class Draft(): self._creation_date = None self._title = None + @classmethod + def from_file(cls, source, *args, **kwargs): + with open(source, 'r', encoding='utf8') as f: + return cls(text=f.read(), source=source, *args, **kwargs) + # ------------------------------------------------------------------ def _parse_draftname(self): draftname_regex = r"(draft-[a-z0-9-]*)-(\d\d)(\w|\.txt|\n|$)" @@ -1055,7 +1110,7 @@ class Draft(): refs = {} in_ref_sect = False in_norm_ref_sect = False - refType = 'unk' + refType = self.REF_TYPE_UNKNOWN for i in range( 15, len( self.lines ) ): line = self.lines[ i ].strip() @@ -1071,10 +1126,10 @@ class Draft(): if m: if not any( [ rule.search( line ) for rule in not_starting_regexes ]): in_ref_sect = True - refType = 'info' + refType = self.REF_TYPE_INFORMATIVE if line.lower().find("normative") > 1: in_norm_ref_sect = True - refType = 'norm' + refType = self.REF_TYPE_NORMATIVE # might be subsections within a references section if in_ref_sect and not in_norm_ref_sect: @@ -1088,7 +1143,7 @@ class Draft(): in_ref_sect = True if line.lower().find("normative") > 1: in_norm_ref_sect = True - refType = 'norm' + refType = self.REF_TYPE_NORMATIVE # look for the end of the normative reference section if in_norm_ref_sect: @@ -1100,7 +1155,7 @@ class Draft(): if m and line.lower().find("normative") < 0: in_norm_ref_sect = False - refType = 'info' + refType = self.REF_TYPE_INFORMATIVE # find references within the section if in_ref_sect: @@ -1216,9 +1271,9 @@ def getmeta(fn): timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(filename)[stat.ST_MTIME])) with io.open(filename, 'rb') as file: try: - draft = Draft(file.read().decode('utf8'), filename) + draft = PlaintextDraft(file.read().decode('utf8'), filename) except UnicodeDecodeError: - draft = Draft(file.read().decode('latin1'), filename) + draft = PlaintextDraft(file.read().decode('latin1'), filename) #_debug("\n".join(draft.lines)) fields["eventdate"] = timestamp diff --git a/ietf/utils/test_draft_with_references_v2.xml b/ietf/utils/test_draft_with_references_v2.xml new file mode 100644 index 000000000..7306ef2f7 --- /dev/null +++ b/ietf/utils/test_draft_with_references_v2.xml @@ -0,0 +1,103 @@ + + + + + Test Draft with References + +
+ aperson@example.com +
+
+
+ +
+ + Text here. + RFC0001 + RFC0255 + BCP6 + FYI7 + +
+
+ + + + + Host Software + + + + + + + + + + + + + Status of network hosts + + + + + + + + + + + Guidelines for creation, selection, and registration of an Autonomous System (AS) + + + + + + + + + This memo discusses when it is appropriate to register and utilize an Autonomous System + (AS), and lists criteria for such. This document specifies an Internet Best Current + Practices for the Internet Community, and requests discussion and suggestions for + improvements. + + + + + + + + + + + + + FYI on Questions and Answers: Answers to commonly asked "experienced Internet user" + questions + + + + + + + + + + + + + This FYI RFC is one of two FYI's called, "Questions and Answers" (Q/A), produced by + the User Services Working Group of the Internet Engineering Task Force (IETF). The goal is + to document the most commonly asked questions and answers in the Internet. This memo + provides information for the Internet community. It does not specify any standard. + + + + + + + + + +
\ No newline at end of file diff --git a/ietf/utils/test_draft_with_references_v3.xml b/ietf/utils/test_draft_with_references_v3.xml new file mode 100644 index 000000000..dc3406482 --- /dev/null +++ b/ietf/utils/test_draft_with_references_v3.xml @@ -0,0 +1,153 @@ + + + + + Test Draft with References + +
+
+ aperson@example.com +
+
+
+
+ +
+ First Section + + Text here. + RFC0001 + RFC0255 + BCP6 + FYI7 + +
+
+ + + Normative References + + + Host Software + + + + + + + + + + + Informative References + + + Status of network hosts + + + + + + + + + + + + Guidelines for creation, selection, and registration of an Autonomous System (AS) + + + + + + + + + This memo discusses when it is appropriate to register and utilize an Autonomous System + (AS), and lists criteria for such. This document specifies an Internet Best Current + Practices for the Internet Community, and requests discussion and suggestions for + improvements. + + + + + + + + + + Autonomous System (AS) Reservation for Private Use + + + + + + This document describes the reservation of Autonomous System Numbers (ASNs) that are for + Private Use only, known as Private Use ASNs, and provides operational guidance on their + use. This document enlarges the total space available for Private Use ASNs by + documenting the reservation of a second, larger range and updates RFC 1930 by replacing + Section 10 of that document. + + + + + + + + + + Reservation of Last Autonomous System (AS) Numbers + + + + + + + + + This document reserves two Autonomous System Numbers (ASNs) at the end of the 16-bit and + 32-bit ranges, described in this document as "Last ASNs", and provides + guidance to implementers and operators on their use. This document updates Section 10 of + RFC 1930. + + + + + + + + + + + Mysterious References + + + + FYI on Questions and Answers: Answers to commonly asked "experienced Internet user" + questions + + + + + + + + + + + + + This FYI RFC is one of two FYI's called, "Questions and Answers" (Q/A), produced by + the User Services Working Group of the Internet Engineering Task Force (IETF). The goal is + to document the most commonly asked questions and answers in the Internet. This memo + provides information for the Internet community. It does not specify any standard. + + + + + + + + + +
\ No newline at end of file diff --git a/ietf/utils/tests.py b/ietf/utils/tests.py index 87ed366a5..15f7c9873 100644 --- a/ietf/utils/tests.py +++ b/ietf/utils/tests.py @@ -38,12 +38,14 @@ from ietf.group.models import Group from ietf.person.name import name_parts, unidecode_name from ietf.submit.tests import submission_file from ietf.utils.bower_storage import BowerStorageFinder -from ietf.utils.draft import Draft, getmeta +from ietf.utils.draft import PlaintextDraft, getmeta from ietf.utils.log import unreachable, assertion from ietf.utils.mail import send_mail_preformatted, send_mail_text, send_mail_mime, outbox, get_payload_text from ietf.utils.test_runner import get_template_paths, set_coverage_checking from ietf.utils.test_utils import TestCase from ietf.utils.text import parse_unicode +from ietf.utils.xmldraft import XMLDraft + skip_wiki_glue_testing = False skip_message_svn = "" @@ -423,12 +425,12 @@ class TestBowerStaticFiles(TestCase): self.assertNotEqual(files,[]) -class DraftTests(TestCase): +class PlaintextDraftTests(TestCase): def setUp(self): super().setUp() file,_ = submission_file(name='draft-test-draft-class',rev='00',format='txt',templatename='test_submission.txt',group=None) - self.draft = Draft(text=file.getvalue(),source='draft-test-draft-class-00.txt',name_from_source=False) + self.draft = PlaintextDraft(text=file.getvalue(), source='draft-test-draft-class-00.txt', name_from_source=False) def test_get_status(self): self.assertEqual(self.draft.get_status(),'Informational') @@ -451,6 +453,32 @@ class DraftTests(TestCase): shutil.rmtree(tempdir) +class XMLDraftTests(TestCase): + def test_get_refs_v3(self): + draft = XMLDraft('ietf/utils/test_draft_with_references_v3.xml') + self.assertEqual( + draft.get_refs(), + { + 'rfc1': XMLDraft.REF_TYPE_NORMATIVE, + 'rfc255': XMLDraft.REF_TYPE_INFORMATIVE, + 'bcp6': XMLDraft.REF_TYPE_INFORMATIVE, + 'rfc1207': XMLDraft.REF_TYPE_UNKNOWN, + } + ) + + def test_get_refs_v2(self): + draft = XMLDraft('ietf/utils/test_draft_with_references_v2.xml') + self.assertEqual( + draft.get_refs(), + { + 'rfc1': XMLDraft.REF_TYPE_NORMATIVE, + 'rfc255': XMLDraft.REF_TYPE_INFORMATIVE, + 'bcp6': XMLDraft.REF_TYPE_INFORMATIVE, + 'rfc1207': XMLDraft.REF_TYPE_UNKNOWN, + } + ) + + class NameTests(TestCase): def test_name_parts(self): diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py new file mode 100644 index 000000000..54992eb80 --- /dev/null +++ b/ietf/utils/xmldraft.py @@ -0,0 +1,87 @@ +# Copyright The IETF Trust 2021, All Rights Reserved +# -*- coding: utf-8 -*- +import os +import xml2rfc + +import debug # pyflakes: ignore + +from contextlib import ExitStack + +from django.conf import settings + +from .draft import Draft + + +class XMLDraft(Draft): + """Draft from XML source + + Currently just a holding place for get_refs() for an XML file. Can eventually expand + to implement the other public methods of Draft as need arises. + """ + def __init__(self, xml_file): + """Initialize XMLDraft instance + + :parameter xml_file: path to file containing XML source + """ + super().__init__() + # cast xml_file to str so, e.g., this will work with a Path + self.xmltree = self.parse_xml(str(xml_file)) + self.xmlroot = self.xmltree.getroot() + + @staticmethod + def parse_xml(filename): + orig_write_out = xml2rfc.log.write_out + orig_write_err = xml2rfc.log.write_err + orig_xml_library = os.environ.get('XML_LIBRARY', None) + tree = None + with ExitStack() as stack: + @stack.callback + def cleanup(): # called when context exited, even if there's an exception + xml2rfc.log.write_out = orig_write_out + xml2rfc.log.write_err = orig_write_err + os.environ.pop('XML_LIBRARY') + if orig_xml_library is not None: + os.environ['XML_LIBRARY'] = orig_xml_library + + xml2rfc.log.write_out = open(os.devnull, 'w') + xml2rfc.log.write_err = open(os.devnull, 'w') + os.environ['XML_LIBRARY'] = settings.XML_LIBRARY + + parser = xml2rfc.XmlRfcParser(filename, quiet=True) + tree = parser.parse() + xml_version = tree.getroot().get('version', '2') + if xml_version == '2': + v2v3 = xml2rfc.V2v3XmlWriter(tree) + tree.tree = v2v3.convert2to3() + return tree + + def _document_name(self, anchor): + """Guess document name from reference anchor + + Looks for series numbers and removes leading 0s from the number. + """ + anchor = anchor.lower() # always give back lowercase + label = anchor.rstrip('0123456789') # remove trailing digits + if label in ['rfc', 'bcp', 'fyi', 'std']: + number = int(anchor[len(label):]) + return f'{label}{number}' + return anchor + + def _reference_section_type(self, section_name): + """Determine reference type from name of references section""" + section_name = section_name.lower() + if 'normative' in section_name: + return self.REF_TYPE_NORMATIVE + elif 'informative' in section_name: + return self.REF_TYPE_INFORMATIVE + return self.REF_TYPE_UNKNOWN + + def get_refs(self): + """Extract references from the draft""" + refs = {} + # accept nested sections + for section in self.xmlroot.findall('back//references'): + ref_type = self._reference_section_type(section.findtext('name')) + for ref in (section.findall('./reference') + section.findall('./referencegroup')): + refs[self._document_name(ref.get('anchor'))] = ref_type + return refs