Merged in [14851] from housley@vigilsec.com:

Improve parser for references in Internet-Drafts. Fixes #2360
 - Legacy-Id: 14867
Note: SVN reference [14851] has been migrated to Git commit 565b10e00e
This commit is contained in:
Henrik Levkowetz 2018-03-19 22:37:03 +00:00
commit 75deb35e10

View file

@ -1019,15 +1019,6 @@ class Draft():
# ------------------------------------------------------------------
def get_refs(self):
refType = 'unk'
refs = {}
typemap = {
'normative': 'norm',
'informative': 'info',
'informational': 'info',
'non-normative': 'info',
None: 'old'
}
# Bill's horrible "references section" regexps, built up over lots of years
# of fine tuning for different formats.
# Examples:
@ -1049,31 +1040,67 @@ class Draft():
# std at the front of a line can hide things like IEEE STD or MIL-STD
std_start = re.compile( r'(?i)std\n*\b' )
refs = {}
in_ref_sect = False
in_norm_ref_sect = False
refType = 'unk'
for i in range( 15, len( self.lines ) ):
line = self.lines[ i ].strip()
# skip over lines until we find the start of the reference section
if not in_ref_sect:
m = sectionre.match( line )
if m:
match = m.group( 1 )
if match is not None:
match = match.lower()
refType = typemap.get( match, 'unk' )
continue
if not m:
m = sectionre2.match( line )
if m:
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
continue
if not m:
m = sectionre3.match( line )
if m:
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
continue
in_ref_sect = True
refType = 'info'
if line.lower().find("normative") > 1:
in_norm_ref_sect = True
refType = 'norm'
# might be subsections within a references section
if in_ref_sect and not in_norm_ref_sect:
m = sectionre.match( line )
if not m:
m = sectionre2.match( line )
if not m:
m = sectionre3.match( line )
if m:
in_ref_sect = True
if line.lower().find("normative") > 1:
in_norm_ref_sect = True
refType = 'norm'
# look for the end of the normative reference section
if in_norm_ref_sect:
m = sectionre.match( line )
if not m:
m = sectionre2.match( line )
if not m:
m = sectionre3.match( line )
if m and line.lower().find("normative") < 0:
in_norm_ref_sect = False
refType = 'info'
# find references within the section
if in_ref_sect:
# If something got split badly, rejoin it.
if eol.search( line ) and i < len( self.lines ) - 1:
line += self.lines[ i + 1 ].lstrip()
m = idref.search( line )
if m:
draft = m.group( 1 )
if draft not in refs:
refs[ draft ] = refType
continue
m = rfcref.search( line )
if m:
( series, number ) = m.groups()
@ -1081,16 +1108,13 @@ class Draft():
line = self.lines[i-1].rstrip()+line
if series.lower()!='std' or not not_our_std_ref.search( line ):
name = series.lower() + number.lstrip( '0' )
if name not in refs:
refs[ name ] = refType
continue
# References to BCP78 and BCP79 in boilerplate will appear as "unk".
# Remove them.
for boilerplate in ( 'bcp78', 'bcp79' ):
if refs.get( boilerplate ) == 'unk':
del refs[ boilerplate ]
# Don't add any references that point back into this doc
if self.filename in refs:
del refs[self.filename]
return refs
def old_get_refs( self ):