Improve parser for references in Internet-Drafts. Fixes #2360
- Legacy-Id: 14851
This commit is contained in:
parent
49f00b76ea
commit
565b10e00e
|
@ -1019,79 +1019,103 @@ class Draft():
|
|||
|
||||
# ------------------------------------------------------------------
|
||||
def get_refs(self):
|
||||
refType = 'unk'
|
||||
refs = {}
|
||||
typemap = {
|
||||
'normative': 'norm',
|
||||
'informative': 'info',
|
||||
'informational': 'info',
|
||||
'non-normative': 'info',
|
||||
None: 'old'
|
||||
}
|
||||
# Bill's horrible "references section" regexps, built up over lots of years
|
||||
# of fine tuning for different formats.
|
||||
# Examples:
|
||||
# Appendix A. References:
|
||||
# A.1. Informative References:
|
||||
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
||||
# 9.1 Normative
|
||||
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
||||
# One other reference section type seen:
|
||||
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
||||
# An Internet-Draft reference.
|
||||
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
||||
# An RFC-and-other-series reference.
|
||||
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
||||
# Bill's horrible "references section" regexps, built up over lots of years
|
||||
# of fine tuning for different formats.
|
||||
# Examples:
|
||||
# Appendix A. References:
|
||||
# A.1. Informative References:
|
||||
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
||||
# 9.1 Normative
|
||||
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
||||
# One other reference section type seen:
|
||||
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
||||
# An Internet-Draft reference.
|
||||
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
||||
# An RFC-and-other-series reference.
|
||||
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
||||
# False positives for std
|
||||
not_our_std_ref = re.compile( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
|
||||
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
||||
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
||||
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
||||
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
||||
# std at the front of a line can hide things like IEEE STD or MIL-STD
|
||||
std_start = re.compile( r'(?i)std\n*\b' )
|
||||
|
||||
for i in range( 15, len( self.lines ) ):
|
||||
line = self.lines[ i ].strip()
|
||||
m = sectionre.match( line )
|
||||
if m:
|
||||
match = m.group( 1 )
|
||||
if match is not None:
|
||||
match = match.lower()
|
||||
refType = typemap.get( match, 'unk' )
|
||||
continue
|
||||
m = sectionre2.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
m = sectionre3.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
# If something got split badly, rejoin it.
|
||||
if eol.search( line ) and i < len( self.lines ) - 1:
|
||||
line += self.lines[ i + 1 ].lstrip()
|
||||
m = idref.search( line )
|
||||
if m:
|
||||
draft = m.group( 1 )
|
||||
refs[ draft ] = refType
|
||||
continue
|
||||
m = rfcref.search( line )
|
||||
if m:
|
||||
( series, number ) = m.groups()
|
||||
if series.lower()=='std' and std_start.search(line) and i > 15:
|
||||
line = self.lines[i-1].rstrip()+line
|
||||
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
||||
name = series.lower() + number.lstrip( '0' )
|
||||
refs[ name ] = refType
|
||||
continue
|
||||
# References to BCP78 and BCP79 in boilerplate will appear as "unk".
|
||||
# Remove them.
|
||||
for boilerplate in ( 'bcp78', 'bcp79' ):
|
||||
if refs.get( boilerplate ) == 'unk':
|
||||
del refs[ boilerplate ]
|
||||
refs = {}
|
||||
in_ref_sect = False
|
||||
in_norm_ref_sect = False
|
||||
refType = 'unk'
|
||||
|
||||
for i in range( 15, len( self.lines ) ):
|
||||
line = self.lines[ i ].strip()
|
||||
|
||||
# skip over lines until we find the start of the reference section
|
||||
if not in_ref_sect:
|
||||
m = sectionre.match( line )
|
||||
if not m:
|
||||
m = sectionre2.match( line )
|
||||
if not m:
|
||||
m = sectionre3.match( line )
|
||||
|
||||
if m:
|
||||
in_ref_sect = True
|
||||
refType = 'info'
|
||||
if line.lower().find("normative") > 1:
|
||||
in_norm_ref_sect = True
|
||||
refType = 'norm'
|
||||
|
||||
# might be subsections within a references section
|
||||
if in_ref_sect and not in_norm_ref_sect:
|
||||
m = sectionre.match( line )
|
||||
if not m:
|
||||
m = sectionre2.match( line )
|
||||
if not m:
|
||||
m = sectionre3.match( line )
|
||||
|
||||
if m:
|
||||
in_ref_sect = True
|
||||
if line.lower().find("normative") > 1:
|
||||
in_norm_ref_sect = True
|
||||
refType = 'norm'
|
||||
|
||||
# look for the end of the normative reference section
|
||||
if in_norm_ref_sect:
|
||||
m = sectionre.match( line )
|
||||
if not m:
|
||||
m = sectionre2.match( line )
|
||||
if not m:
|
||||
m = sectionre3.match( line )
|
||||
|
||||
if m and line.lower().find("normative") < 0:
|
||||
in_norm_ref_sect = False
|
||||
refType = 'info'
|
||||
|
||||
# find references within the section
|
||||
if in_ref_sect:
|
||||
# If something got split badly, rejoin it.
|
||||
if eol.search( line ) and i < len( self.lines ) - 1:
|
||||
line += self.lines[ i + 1 ].lstrip()
|
||||
|
||||
m = idref.search( line )
|
||||
if m:
|
||||
draft = m.group( 1 )
|
||||
if draft not in refs:
|
||||
refs[ draft ] = refType
|
||||
|
||||
m = rfcref.search( line )
|
||||
if m:
|
||||
( series, number ) = m.groups()
|
||||
if series.lower()=='std' and std_start.search(line) and i > 15:
|
||||
line = self.lines[i-1].rstrip()+line
|
||||
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
||||
name = series.lower() + number.lstrip( '0' )
|
||||
if name not in refs:
|
||||
refs[ name ] = refType
|
||||
|
||||
# Don't add any references that point back into this doc
|
||||
if self.filename in refs:
|
||||
del refs[self.filename]
|
||||
return refs
|
||||
|
||||
return refs
|
||||
|
||||
def old_get_refs( self ):
|
||||
refs = []
|
||||
|
|
Loading…
Reference in a new issue