Find references from submitted XML instead of rendering to text and parsing. Fixes #3342. Commit ready for merge.

- Legacy-Id: 19825
This commit is contained in:
Jennifer Richards 2022-01-07 17:53:23 +00:00
parent 5bf0638c55
commit cf62b46093
13 changed files with 689 additions and 45 deletions

View file

@ -14,7 +14,7 @@ django.setup()
from django.conf import settings
from django.core.validators import validate_email, ValidationError
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
from ietf.submit.utils import update_authors
import debug # pyflakes:ignore
@ -61,7 +61,7 @@ for name in sorted(names):
except UnicodeDecodeError:
text = raw.decode('latin1')
try:
draft = Draft(text, txt_file.name, name_from_source=True)
draft = PlaintextDraft(text, txt_file.name, name_from_source=True)
except Exception as e:
print name, rev, "Can't parse", p,":",e
continue

View file

@ -1,16 +1,22 @@
# Copyright The IETF Trust 2020, All Rights Reserved
import datetime
import debug # pyflakes:ignore
from unittest.mock import patch
from django.db import IntegrityError
from ietf.group.factories import GroupFactory, RoleFactory
from ietf.name.models import DocTagName
from ietf.person.factories import PersonFactory
from ietf.utils.test_utils import TestCase
from ietf.utils.test_utils import TestCase, name_of_file_containing
from ietf.person.models import Person
from ietf.doc.factories import DocumentFactory, WgRfcFactory
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor, Document
from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors, fuzzy_find_documents
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
fuzzy_find_documents, rebuild_reference_relations)
from ietf.utils.draft import Draft, PlaintextDraft
from ietf.utils.xmldraft import XMLDraft
class ActionHoldersTests(TestCase):
@ -285,3 +291,140 @@ class MiscTests(TestCase):
self.do_fuzzy_find_documents_rfc_test('draft-name-with-number-01')
self.do_fuzzy_find_documents_rfc_test('draft-name-that-has-two-02-04')
self.do_fuzzy_find_documents_rfc_test('draft-wild-01-numbers-0312')
class RebuildReferenceRelationsTests(TestCase):
def setUp(self):
super().setUp()
self.doc = WgDraftFactory() # document under test
# Other documents that should be found by rebuild_reference_relations
self.normative, self.informative, self.unknown = WgRfcFactory.create_batch(3)
for relationship in ['refnorm', 'refinfo', 'refunk', 'refold']:
self.doc.relateddocument_set.create(
target=WgRfcFactory().docalias.first(),
relationship_id=relationship,
)
self.updated = WgRfcFactory() # related document that should be left alone
self.doc.relateddocument_set.create(target=self.updated.docalias.first(), relationship_id='updates')
self.assertCountEqual(self.doc.relateddocument_set.values_list('relationship__slug', flat=True),
['refnorm', 'refinfo', 'refold', 'refunk', 'updates'],
'Test conditions set up incorrectly: wrong prior document relationships')
for other_doc in [self.normative, self.informative, self.unknown]:
self.assertEqual(
self.doc.relateddocument_set.filter(target__name=other_doc.canonical_name()).count(),
0,
'Test conditions set up incorrectly: new documents already related',
)
def _get_refs_return_value(self):
return {
self.normative.canonical_name(): Draft.REF_TYPE_NORMATIVE,
self.informative.canonical_name(): Draft.REF_TYPE_INFORMATIVE,
self.unknown.canonical_name(): Draft.REF_TYPE_UNKNOWN,
'draft-not-found': Draft.REF_TYPE_NORMATIVE,
}
def test_requires_txt_or_xml(self):
result = rebuild_reference_relations(self.doc, {})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No draft text available', result['errors'][0],
'Error should be reported if no draft file is given')
result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No draft text available', result['errors'][0],
'Error should be reported if no XML or plaintext file is given')
@patch.object(XMLDraft, 'get_refs')
@patch.object(XMLDraft, '__init__', return_value=None)
def test_xml(self, mock_init, mock_get_refs):
"""Should build reference relations with only XML"""
mock_get_refs.return_value = self._get_refs_return_value()
result = rebuild_reference_relations(self.doc, {'xml': 'file.xml'})
# if the method of calling the XMLDraft() constructor changes, this will need to be updated
xmldraft_init_args, _ = mock_init.call_args
self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments')
self.assertEqual(
result,
{
'warnings': ['There were 1 references with no matching DocAlias'],
'unfound': ['draft-not-found'],
}
)
self.assertCountEqual(
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
[
(self.normative.canonical_name(), 'refnorm'),
(self.informative.canonical_name(), 'refinfo'),
(self.unknown.canonical_name(), 'refunk'),
(self.updated.docalias.first().name, 'updates'),
]
)
@patch.object(PlaintextDraft, 'get_refs')
@patch.object(PlaintextDraft, '__init__', return_value=None)
def test_plaintext(self, mock_init, mock_get_refs):
"""Should build reference relations with only plaintext"""
mock_get_refs.return_value = self._get_refs_return_value()
with name_of_file_containing('contents') as temp_file_name:
result = rebuild_reference_relations(self.doc, {'txt': temp_file_name})
# if the method of calling the PlaintextDraft() constructor changes, this test will need to be updated
_, mock_init_kwargs = mock_init.call_args
self.assertEqual(mock_init_kwargs, {'text': 'contents', 'source': temp_file_name},
'PlaintextDraft initialized with unexpected arguments')
self.assertEqual(
result,
{
'warnings': ['There were 1 references with no matching DocAlias'],
'unfound': ['draft-not-found'],
}
)
self.assertCountEqual(
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
[
(self.normative.canonical_name(), 'refnorm'),
(self.informative.canonical_name(), 'refinfo'),
(self.unknown.canonical_name(), 'refunk'),
(self.updated.docalias.first().name, 'updates'),
]
)
@patch.object(PlaintextDraft, '__init__')
@patch.object(XMLDraft, 'get_refs')
@patch.object(XMLDraft, '__init__', return_value=None)
def test_xml_and_plaintext(self, mock_init, mock_get_refs, mock_plaintext_init):
"""Should build reference relations with XML when plaintext also available"""
mock_get_refs.return_value = self._get_refs_return_value()
result = rebuild_reference_relations(self.doc, {'txt': 'file.txt', 'xml': 'file.xml'})
self.assertFalse(mock_plaintext_init.called, 'PlaintextDraft should not be used when XML is available')
# if the method of calling the XMLDraft() constructor changes, this will need to be updated
xmldraft_init_args, _ = mock_init.call_args
self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments')
self.assertEqual(
result,
{
'warnings': ['There were 1 references with no matching DocAlias'],
'unfound': ['draft-not-found'],
}
)
self.assertCountEqual(
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
[
(self.normative.canonical_name(), 'refnorm'),
(self.informative.canonical_name(), 'refinfo'),
(self.unknown.canonical_name(), 'refunk'),
(self.updated.docalias.first().name, 'updates'),
]
)

View file

@ -39,6 +39,8 @@ from ietf.utils import draft, text
from ietf.utils.mail import send_mail
from ietf.mailtrigger.utils import gather_address_lists
from ietf.utils import log
from ietf.utils.xmldraft import XMLDraft
def save_document_in_history(doc):
"""Save a snapshot of document and related objects in the database."""
@ -742,21 +744,25 @@ def update_telechat(request, doc, by, new_telechat_date, new_returning_item=None
return e
def rebuild_reference_relations(doc,filename=None):
def rebuild_reference_relations(doc, filenames):
"""Rebuild reference relations for a document
filenames should be a dict mapping file ext (i.e., type) to the full path of each file.
"""
if doc.type.slug != 'draft':
return None
if not filename:
if doc.get_state_slug() == 'rfc':
filename=os.path.join(settings.RFC_PATH,doc.canonical_name()+".txt")
else:
filename=os.path.join(settings.INTERNET_DRAFT_PATH,doc.filename_with_rev())
try:
with io.open(filename, 'rb') as file:
refs = draft.Draft(file.read().decode('utf8'), filename).get_refs()
except IOError as e:
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
# try XML first
if 'xml' in filenames:
refs = XMLDraft(filenames['xml']).get_refs()
elif 'txt' in filenames:
filename = filenames['txt']
try:
refs = draft.PlaintextDraft.from_file(filename).get_refs()
except IOError as e:
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
else:
return {'errors': ['No draft text available for rebuilding reference relations. Need XML or plaintext.']}
doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
@ -764,6 +770,7 @@ def rebuild_reference_relations(doc,filename=None):
errors = []
unfound = set()
for ( ref, refType ) in refs.items():
# As of Dec 2021, DocAlias has a unique constraint on the name field, so count > 1 should not occur
refdoc = DocAlias.objects.filter( name=ref )
count = refdoc.count()
if count == 0:

View file

@ -80,7 +80,7 @@ from ietf.review.models import ReviewAssignment
from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs
from ietf.review.utils import no_review_from_teams_on_doc
from ietf.utils import markup_txt, log, markdown
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
from ietf.utils.response import permission_denied
from ietf.utils.text import maybe_split
@ -1842,7 +1842,7 @@ def idnits2_state(request, name, rev=None):
else:
text = doc.text()
if text:
parsed_draft = Draft(text=doc.text(), source=name, name_from_source=False)
parsed_draft = PlaintextDraft(text=doc.text(), source=name, name_from_source=False)
doc.deststatus = parsed_draft.get_status()
else:
doc.deststatus="Unknown"

View file

@ -29,7 +29,7 @@ import debug # pyflakes:ignore
from ietf.doc.models import Document
from ietf.name.models import FormalLanguageName
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
parser = argparse.ArgumentParser()
parser.add_argument("--document", help="specific document name")
@ -89,7 +89,7 @@ for doc in docs_qs.prefetch_related("docalias", "formal_languages", "documentaut
with io.open(path, 'rb') as f:
say("\nProcessing %s" % doc.name)
sys.stdout.flush()
d = Draft(unicode(f.read()), path)
d = PlaintextDraft(unicode(f.read()), path)
updated = False

View file

@ -38,7 +38,7 @@ from ietf.submit.parsers.pdf_parser import PDFParser
from ietf.submit.parsers.plain_parser import PlainParser
from ietf.submit.parsers.xml_parser import XMLParser
from ietf.utils import log
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
from ietf.utils.text import normalize_text
class SubmissionBaseUploadForm(forms.Form):
@ -302,7 +302,7 @@ class SubmissionBaseUploadForm(forms.Form):
try:
text = bytes.decode(self.file_info['txt'].charset)
#
self.parsed_draft = Draft(text, txt_file.name)
self.parsed_draft = PlaintextDraft(text, txt_file.name)
if self.filename == None:
self.filename = self.parsed_draft.filename
elif self.filename != self.parsed_draft.filename:

View file

@ -13,14 +13,19 @@ import mock
from io import StringIO
from pyquery import PyQuery
from pathlib import Path
from django.conf import settings
from django.test import override_settings
from django.test.client import RequestFactory
from django.urls import reverse as urlreverse
from django.utils.encoding import force_str, force_text
import debug # pyflakes:ignore
from ietf.submit.utils import expirable_submissions, expire_submission
from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory
from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
post_submission)
from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory, IndividualRfcFactory
from ietf.doc.models import ( Document, DocAlias, DocEvent, State,
BallotPositionDocEvent, DocumentAuthor, SubmissionDocEvent )
from ietf.doc.utils import create_ballot_if_not_open, can_edit_docextresources, update_action_holders
@ -40,7 +45,7 @@ from ietf.utils.accesstoken import generate_access_token
from ietf.utils.mail import outbox, empty_outbox, get_payload_text
from ietf.utils.models import VersionInfo
from ietf.utils.test_utils import login_testing_unauthorized, TestCase
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
class BaseSubmitTestCase(TestCase):
@ -2860,10 +2865,62 @@ class RefsTests(BaseSubmitTestCase):
group = None
file, __ = submission_file('draft-some-subject', '00', group, 'txt', "test_submission.txt", )
draft = Draft(file.read(), file.name)
draft = PlaintextDraft(file.read(), file.name)
refs = draft.get_refs()
self.assertEqual(refs['rfc2119'], 'norm')
self.assertEqual(refs['rfc8174'], 'norm')
self.assertEqual(refs['rfc8126'], 'info')
self.assertEqual(refs['rfc8175'], 'info')
class PostSubmissionTests(BaseSubmitTestCase):
@override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md'))
def test_find_submission_filenames_rfc(self):
"""Posting an RFC submission should use RFC_FILE_TYPES"""
rfc = IndividualRfcFactory()
path = Path(self.staging_dir)
for ext in ['txt', 'xml', 'pdf', 'md']:
(path / f'{rfc.name}-{rfc.rev}.{ext}').touch()
files = find_submission_filenames(rfc)
self.assertCountEqual(
files,
{
'txt': f'{path}/{rfc.name}-{rfc.rev}.txt',
'xml': f'{path}/{rfc.name}-{rfc.rev}.xml',
# should NOT find the pdf or md
}
)
@override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md'))
def test_find_submission_filenames_draft(self):
"""Posting an I-D submission should use IDSUBMIT_FILE_TYPES"""
draft = WgDraftFactory()
path = Path(self.staging_dir)
for ext in ['txt', 'xml', 'pdf', 'md']:
(path / f'{draft.name}-{draft.rev}.{ext}').touch()
files = find_submission_filenames(draft)
self.assertCountEqual(
files,
{
'pdf': f'{path}/{draft.name}-{draft.rev}.pdf',
'md': f'{path}/{draft.name}-{draft.rev}.md',
# should NOT find the txt or xml
}
)
@mock.patch('ietf.submit.utils.rebuild_reference_relations')
@mock.patch('ietf.submit.utils.find_submission_filenames')
def test_post_submission_rebuilds_ref_relations(self, mock_find_filenames, mock_rebuild_reference_relations):
"""The post_submission method should rebuild reference relations from correct files
This tests that the post_submission() utility function gets the list of files to handle from the
find_submission_filenames() method and passes them along to rebuild_reference_relations().
"""
submission = SubmissionFactory()
mock_find_filenames.return_value = {'xml': f'{self.staging_dir}/{submission.name}.xml'}
request = RequestFactory()
request.user = PersonFactory().user
post_submission(request, submission, 'doc_desc', 'subm_desc')
args, kwargs = mock_rebuild_reference_relations.call_args
self.assertEqual(args[1], mock_find_filenames.return_value)

View file

@ -40,7 +40,7 @@ from ietf.submit.models import ( Submission, SubmissionEvent, Preapproval, Draft
SubmissionCheck, SubmissionExtResource )
from ietf.utils import log
from ietf.utils.accesstoken import generate_random_key
from ietf.utils.draft import Draft
from ietf.utils.draft import PlaintextDraft
from ietf.utils.mail import is_valid_email
from ietf.utils.text import parse_unicode
from ietf.person.name import unidecode_name
@ -262,6 +262,18 @@ def post_rev00_submission_events(draft, submission, submitter):
return events
def find_submission_filenames(draft):
"""Find uploaded files corresponding to the draft
Returns a dict mapping file extension to the corresponding filename (including the full path).
"""
path = pathlib.Path(settings.IDSUBMIT_STAGING_PATH)
stem = f'{draft.name}-{draft.rev}'
allowed_types = settings.RFC_FILE_TYPES if draft.get_state_slug() == 'rfc' else settings.IDSUBMIT_FILE_TYPES
candidates = {ext: path / f'{stem}.{ext}' for ext in allowed_types}
return {ext: str(filename) for ext, filename in candidates.items() if filename.exists()}
@transaction.atomic
def post_submission(request, submission, approved_doc_desc, approved_subm_desc):
log.log(f"{submission.name}: start")
@ -352,7 +364,7 @@ def post_submission(request, submission, approved_doc_desc, approved_subm_desc):
log.log(f"{submission.name}: updated state and info")
trouble = rebuild_reference_relations(draft, filename=os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (submission.name, submission.rev)))
trouble = rebuild_reference_relations(draft, find_submission_filenames(draft))
if trouble:
log.log('Rebuild_reference_relations trouble: %s'%trouble)
log.log(f"{submission.name}: rebuilt reference relations")
@ -723,8 +735,7 @@ def save_files(form):
def get_draft_meta(form, saved_files):
authors = []
file_name = saved_files
abstract = None
file_size = None
if form.cleaned_data['xml']:
# Some meta-information, such as the page-count, can only
# be retrieved from the generated text file. Provide a
@ -732,7 +743,7 @@ def get_draft_meta(form, saved_files):
file_name['txt'] = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s.txt' % (form.filename, form.revision))
file_size = os.stat(file_name['txt']).st_size
with io.open(file_name['txt']) as txt_file:
form.parsed_draft = Draft(txt_file.read(), txt_file.name)
form.parsed_draft = PlaintextDraft(txt_file.read(), txt_file.name)
else:
file_size = form.cleaned_data['txt'].size

View file

@ -1,5 +1,5 @@
#!/usr/bin/python
# Copyright The IETF Trust 2009-2020, All Rights Reserved
# Copyright The IETF Trust 2009-2021, All Rights Reserved
# -*- coding: utf-8 -*-
# -*- python -*-
@ -129,11 +129,61 @@ def acronym_match(s, l):
#_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym))
return s == acronym
class Draft:
"""Base class for drafts
Extracted from PlaintextDraft, formerly named Draft. If I missed part of its public interface
that is relevant for other draft formats, those should be added to this base class.
"""
REF_TYPE_NORMATIVE = 'norm'
REF_TYPE_INFORMATIVE = 'info'
REF_TYPE_UNKNOWN = 'unk'
def get_abstract(self):
raise NotImplementedError
def get_author_list(self):
raise NotImplementedError
def get_authors(self):
raise NotImplementedError
def get_authors_with_firm(self):
raise NotImplementedError
def get_creation_date(self):
raise NotImplementedError
def get_formal_languages(self):
raise NotImplementedError
def get_pagecount(self):
raise NotImplementedError
def get_refs(self):
raise NotImplementedError
def get_status(self):
raise NotImplementedError
def get_title(self):
raise NotImplementedError
def get_wordcount(self):
raise NotImplementedError
# ----------------------------------------------------------------------
class Draft():
class PlaintextDraft(Draft):
def __init__(self, text, source, name_from_source=False):
"""Initialize a Draft instance
:param text: plaintext draft contents
:param source: name of file containing the contents
:param name_from_source: if True, fall back to source to determine draft name not found from text
"""
super().__init__()
assert isinstance(text, str)
self.source = source
self.rawtext = text
@ -169,6 +219,11 @@ class Draft():
self._creation_date = None
self._title = None
@classmethod
def from_file(cls, source, *args, **kwargs):
with open(source, 'r', encoding='utf8') as f:
return cls(text=f.read(), source=source, *args, **kwargs)
# ------------------------------------------------------------------
def _parse_draftname(self):
draftname_regex = r"(draft-[a-z0-9-]*)-(\d\d)(\w|\.txt|\n|$)"
@ -1055,7 +1110,7 @@ class Draft():
refs = {}
in_ref_sect = False
in_norm_ref_sect = False
refType = 'unk'
refType = self.REF_TYPE_UNKNOWN
for i in range( 15, len( self.lines ) ):
line = self.lines[ i ].strip()
@ -1071,10 +1126,10 @@ class Draft():
if m:
if not any( [ rule.search( line ) for rule in not_starting_regexes ]):
in_ref_sect = True
refType = 'info'
refType = self.REF_TYPE_INFORMATIVE
if line.lower().find("normative") > 1:
in_norm_ref_sect = True
refType = 'norm'
refType = self.REF_TYPE_NORMATIVE
# might be subsections within a references section
if in_ref_sect and not in_norm_ref_sect:
@ -1088,7 +1143,7 @@ class Draft():
in_ref_sect = True
if line.lower().find("normative") > 1:
in_norm_ref_sect = True
refType = 'norm'
refType = self.REF_TYPE_NORMATIVE
# look for the end of the normative reference section
if in_norm_ref_sect:
@ -1100,7 +1155,7 @@ class Draft():
if m and line.lower().find("normative") < 0:
in_norm_ref_sect = False
refType = 'info'
refType = self.REF_TYPE_INFORMATIVE
# find references within the section
if in_ref_sect:
@ -1216,9 +1271,9 @@ def getmeta(fn):
timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(filename)[stat.ST_MTIME]))
with io.open(filename, 'rb') as file:
try:
draft = Draft(file.read().decode('utf8'), filename)
draft = PlaintextDraft(file.read().decode('utf8'), filename)
except UnicodeDecodeError:
draft = Draft(file.read().decode('latin1'), filename)
draft = PlaintextDraft(file.read().decode('latin1'), filename)
#_debug("\n".join(draft.lines))
fields["eventdate"] = timestamp

View file

@ -0,0 +1,103 @@
<?xml version='1.0'?>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<rfc category="exp" submissionType="independent" ipr="trust200902" docName="draft-test-references-00">
<front>
<title>Test Draft with References</title>
<author fullname="Alfred Person" initials="A." surname="Person" role="editor">
<address>
<email>aperson@example.com</email>
</address>
</author>
</front>
<middle>
<section title="First Section">
<t>
Text here.
<xref target="RFC0001">RFC0001</xref>
<xref target="RFC0255">RFC0255</xref>
<xref target="bcp6">BCP6</xref>
<xref target="RFC1207">FYI7</xref>
</t>
</section>
</middle>
<back>
<references title="Normative References">
<reference anchor="RFC0001" target="https://www.rfc-editor.org/info/rfc1">
<front>
<title>Host Software</title>
<author initials="S." surname="Crocker" fullname="S. Crocker">
<organization/>
</author>
<date year="1969" month="April"/>
</front>
<seriesInfo name="RFC" value="1"/>
<seriesInfo name="DOI" value="10.17487/RFC0001"/>
</reference>
</references>
<references title="Informative References">
<reference anchor='RFC0255' target='https://www.rfc-editor.org/info/rfc255'>
<front>
<title>Status of network hosts</title>
<author initials='E.' surname='Westheimer' fullname='E. Westheimer'>
<organization/>
</author>
<date year='1971' month='October'/>
</front>
<seriesInfo name='RFC' value='255'/>
<seriesInfo name='DOI' value='10.17487/RFC0255'/>
</reference>
<reference anchor='bcp6' target='https://www.rfc-editor.org/info/rfc1930'>
<front>
<title>Guidelines for creation, selection, and registration of an Autonomous System (AS)</title>
<author initials='J.' surname='Hawkinson' fullname='J. Hawkinson'>
<organization/>
</author>
<author initials='T.' surname='Bates' fullname='T. Bates'>
<organization/>
</author>
<date year='1996' month='March'/>
<abstract>
<t>This memo discusses when it is appropriate to register and utilize an Autonomous System
(AS), and lists criteria for such. This document specifies an Internet Best Current
Practices for the Internet Community, and requests discussion and suggestions for
improvements.
</t>
</abstract>
</front>
<seriesInfo name='BCP' value='6'/>
<seriesInfo name='RFC' value='1930'/>
<seriesInfo name='DOI' value='10.17487/RFC1930'/>
</reference>
</references>
<references title="Mysterious References">
<!-- Unrecognized references section name -->
<reference anchor='RFC1207' target='https://www.rfc-editor.org/info/rfc1207'>
<front>
<title>FYI on Questions and Answers: Answers to commonly asked &quot;experienced Internet user&quot;
questions
</title>
<author initials='G.S.' surname='Malkin' fullname='G.S. Malkin'>
<organization/>
</author>
<author initials='A.N.' surname='Marine' fullname='A.N. Marine'>
<organization/>
</author>
<author initials='J.K.' surname='Reynolds' fullname='J.K. Reynolds'>
<organization/>
</author>
<date year='1991' month='February'/>
<abstract>
<t>This FYI RFC is one of two FYI's called, &quot;Questions and Answers&quot; (Q/A), produced by
the User Services Working Group of the Internet Engineering Task Force (IETF). The goal is
to document the most commonly asked questions and answers in the Internet. This memo
provides information for the Internet community. It does not specify any standard.
</t>
</abstract>
</front>
<seriesInfo name='FYI' value='7'/>
<seriesInfo name='RFC' value='1207'/>
<seriesInfo name='DOI' value='10.17487/RFC1207'/>
</reference>
</references>
</back>
</rfc>

View file

@ -0,0 +1,153 @@
<?xml version='1.0'?>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<rfc category="exp" submissionType="independent" ipr="trust200902" docName="draft-test-references-00" version="3">
<front>
<title>Test Draft with References</title>
<author fullname="Alfred Person" initials="A." surname="Person" role="editor">
<address>
<address>
<email>aperson@example.com</email>
</address>
</address>
</author>
</front>
<middle>
<section>
<name>First Section</name>
<t>
Text here.
<xref target="RFC0001">RFC0001</xref>
<xref target="RFC0255">RFC0255</xref>
<xref target="bcp6">BCP6</xref>
<xref target="RFC1207">FYI7</xref>
</t>
</section>
</middle>
<back>
<references>
<name>Normative References</name>
<reference anchor="RFC0001" target="https://www.rfc-editor.org/info/rfc1">
<front>
<title>Host Software</title>
<author initials="S." surname="Crocker" fullname="S. Crocker">
<organization/>
</author>
<date year="1969" month="April"/>
</front>
<seriesInfo name="RFC" value="1"/>
<seriesInfo name="DOI" value="10.17487/RFC0001"/>
</reference>
</references>
<references>
<name>Informative References</name>
<reference anchor='RFC0255' target='https://www.rfc-editor.org/info/rfc255'>
<front>
<title>Status of network hosts</title>
<author initials='E.' surname='Westheimer' fullname='E. Westheimer'>
<organization/>
</author>
<date year='1971' month='October'/>
</front>
<seriesInfo name='RFC' value='255'/>
<seriesInfo name='DOI' value='10.17487/RFC0255'/>
</reference>
<referencegroup anchor="bcp6">
<reference anchor='RFC1930' target='https://www.rfc-editor.org/info/rfc1930'>
<front>
<title>Guidelines for creation, selection, and registration of an Autonomous System (AS)</title>
<author initials='J.' surname='Hawkinson' fullname='J. Hawkinson'>
<organization/>
</author>
<author initials='T.' surname='Bates' fullname='T. Bates'>
<organization/>
</author>
<date year='1996' month='March'/>
<abstract>
<t>This memo discusses when it is appropriate to register and utilize an Autonomous System
(AS), and lists criteria for such. This document specifies an Internet Best Current
Practices for the Internet Community, and requests discussion and suggestions for
improvements.
</t>
</abstract>
</front>
<seriesInfo name='BCP' value='6'/>
<seriesInfo name='RFC' value='1930'/>
<seriesInfo name='DOI' value='10.17487/RFC1930'/>
</reference>
<reference anchor='RFC6996' target='https://www.rfc-editor.org/info/rfc6996'>
<front>
<title>Autonomous System (AS) Reservation for Private Use</title>
<author initials='J.' surname='Mitchell' fullname='J. Mitchell'>
<organization/>
</author>
<date year='2013' month='July'/>
<abstract>
<t>This document describes the reservation of Autonomous System Numbers (ASNs) that are for
Private Use only, known as Private Use ASNs, and provides operational guidance on their
use. This document enlarges the total space available for Private Use ASNs by
documenting the reservation of a second, larger range and updates RFC 1930 by replacing
Section 10 of that document.
</t>
</abstract>
</front>
<seriesInfo name='BCP' value='6'/>
<seriesInfo name='RFC' value='6996'/>
<seriesInfo name='DOI' value='10.17487/RFC6996'/>
</reference>
<reference anchor='RFC7300' target='https://www.rfc-editor.org/info/rfc7300'>
<front>
<title>Reservation of Last Autonomous System (AS) Numbers</title>
<author initials='J.' surname='Haas' fullname='J. Haas'>
<organization/>
</author>
<author initials='J.' surname='Mitchell' fullname='J. Mitchell'>
<organization/>
</author>
<date year='2014' month='July'/>
<abstract>
<t>This document reserves two Autonomous System Numbers (ASNs) at the end of the 16-bit and
32-bit ranges, described in this document as &quot;Last ASNs&quot;, and provides
guidance to implementers and operators on their use. This document updates Section 10 of
RFC 1930.
</t>
</abstract>
</front>
<seriesInfo name='BCP' value='6'/>
<seriesInfo name='RFC' value='7300'/>
<seriesInfo name='DOI' value='10.17487/RFC7300'/>
</reference>
</referencegroup>
</references>
<references>
<name>Mysterious References</name>
<!-- Unrecognized references section name -->
<reference anchor='RFC1207' target='https://www.rfc-editor.org/info/rfc1207'>
<front>
<title>FYI on Questions and Answers: Answers to commonly asked &quot;experienced Internet user&quot;
questions
</title>
<author initials='G.S.' surname='Malkin' fullname='G.S. Malkin'>
<organization/>
</author>
<author initials='A.N.' surname='Marine' fullname='A.N. Marine'>
<organization/>
</author>
<author initials='J.K.' surname='Reynolds' fullname='J.K. Reynolds'>
<organization/>
</author>
<date year='1991' month='February'/>
<abstract>
<t>This FYI RFC is one of two FYI's called, &quot;Questions and Answers&quot; (Q/A), produced by
the User Services Working Group of the Internet Engineering Task Force (IETF). The goal is
to document the most commonly asked questions and answers in the Internet. This memo
provides information for the Internet community. It does not specify any standard.
</t>
</abstract>
</front>
<seriesInfo name='FYI' value='7'/>
<seriesInfo name='RFC' value='1207'/>
<seriesInfo name='DOI' value='10.17487/RFC1207'/>
</reference>
</references>
</back>
</rfc>

View file

@ -38,12 +38,14 @@ from ietf.group.models import Group
from ietf.person.name import name_parts, unidecode_name
from ietf.submit.tests import submission_file
from ietf.utils.bower_storage import BowerStorageFinder
from ietf.utils.draft import Draft, getmeta
from ietf.utils.draft import PlaintextDraft, getmeta
from ietf.utils.log import unreachable, assertion
from ietf.utils.mail import send_mail_preformatted, send_mail_text, send_mail_mime, outbox, get_payload_text
from ietf.utils.test_runner import get_template_paths, set_coverage_checking
from ietf.utils.test_utils import TestCase
from ietf.utils.text import parse_unicode
from ietf.utils.xmldraft import XMLDraft
skip_wiki_glue_testing = False
skip_message_svn = ""
@ -423,12 +425,12 @@ class TestBowerStaticFiles(TestCase):
self.assertNotEqual(files,[])
class DraftTests(TestCase):
class PlaintextDraftTests(TestCase):
def setUp(self):
super().setUp()
file,_ = submission_file(name='draft-test-draft-class',rev='00',format='txt',templatename='test_submission.txt',group=None)
self.draft = Draft(text=file.getvalue(),source='draft-test-draft-class-00.txt',name_from_source=False)
self.draft = PlaintextDraft(text=file.getvalue(), source='draft-test-draft-class-00.txt', name_from_source=False)
def test_get_status(self):
self.assertEqual(self.draft.get_status(),'Informational')
@ -451,6 +453,32 @@ class DraftTests(TestCase):
shutil.rmtree(tempdir)
class XMLDraftTests(TestCase):
def test_get_refs_v3(self):
draft = XMLDraft('ietf/utils/test_draft_with_references_v3.xml')
self.assertEqual(
draft.get_refs(),
{
'rfc1': XMLDraft.REF_TYPE_NORMATIVE,
'rfc255': XMLDraft.REF_TYPE_INFORMATIVE,
'bcp6': XMLDraft.REF_TYPE_INFORMATIVE,
'rfc1207': XMLDraft.REF_TYPE_UNKNOWN,
}
)
def test_get_refs_v2(self):
draft = XMLDraft('ietf/utils/test_draft_with_references_v2.xml')
self.assertEqual(
draft.get_refs(),
{
'rfc1': XMLDraft.REF_TYPE_NORMATIVE,
'rfc255': XMLDraft.REF_TYPE_INFORMATIVE,
'bcp6': XMLDraft.REF_TYPE_INFORMATIVE,
'rfc1207': XMLDraft.REF_TYPE_UNKNOWN,
}
)
class NameTests(TestCase):
def test_name_parts(self):

87
ietf/utils/xmldraft.py Normal file
View file

@ -0,0 +1,87 @@
# Copyright The IETF Trust 2021, All Rights Reserved
# -*- coding: utf-8 -*-
import os
import xml2rfc
import debug # pyflakes: ignore
from contextlib import ExitStack
from django.conf import settings
from .draft import Draft
class XMLDraft(Draft):
"""Draft from XML source
Currently just a holding place for get_refs() for an XML file. Can eventually expand
to implement the other public methods of Draft as need arises.
"""
def __init__(self, xml_file):
"""Initialize XMLDraft instance
:parameter xml_file: path to file containing XML source
"""
super().__init__()
# cast xml_file to str so, e.g., this will work with a Path
self.xmltree = self.parse_xml(str(xml_file))
self.xmlroot = self.xmltree.getroot()
@staticmethod
def parse_xml(filename):
orig_write_out = xml2rfc.log.write_out
orig_write_err = xml2rfc.log.write_err
orig_xml_library = os.environ.get('XML_LIBRARY', None)
tree = None
with ExitStack() as stack:
@stack.callback
def cleanup(): # called when context exited, even if there's an exception
xml2rfc.log.write_out = orig_write_out
xml2rfc.log.write_err = orig_write_err
os.environ.pop('XML_LIBRARY')
if orig_xml_library is not None:
os.environ['XML_LIBRARY'] = orig_xml_library
xml2rfc.log.write_out = open(os.devnull, 'w')
xml2rfc.log.write_err = open(os.devnull, 'w')
os.environ['XML_LIBRARY'] = settings.XML_LIBRARY
parser = xml2rfc.XmlRfcParser(filename, quiet=True)
tree = parser.parse()
xml_version = tree.getroot().get('version', '2')
if xml_version == '2':
v2v3 = xml2rfc.V2v3XmlWriter(tree)
tree.tree = v2v3.convert2to3()
return tree
def _document_name(self, anchor):
"""Guess document name from reference anchor
Looks for series numbers and removes leading 0s from the number.
"""
anchor = anchor.lower() # always give back lowercase
label = anchor.rstrip('0123456789') # remove trailing digits
if label in ['rfc', 'bcp', 'fyi', 'std']:
number = int(anchor[len(label):])
return f'{label}{number}'
return anchor
def _reference_section_type(self, section_name):
"""Determine reference type from name of references section"""
section_name = section_name.lower()
if 'normative' in section_name:
return self.REF_TYPE_NORMATIVE
elif 'informative' in section_name:
return self.REF_TYPE_INFORMATIVE
return self.REF_TYPE_UNKNOWN
def get_refs(self):
"""Extract references from the draft"""
refs = {}
# accept nested <references> sections
for section in self.xmlroot.findall('back//references'):
ref_type = self._reference_section_type(section.findtext('name'))
for ref in (section.findall('./reference') + section.findall('./referencegroup')):
refs[self._document_name(ref.get('anchor'))] = ref_type
return refs