Merged in [14851] from housley@vigilsec.com:
Improve parser for references in Internet-Drafts. Fixes #2360
- Legacy-Id: 14867
Note: SVN reference [14851] has been migrated to Git commit 565b10e00e
This commit is contained in:
commit
75deb35e10
|
@ -1019,79 +1019,103 @@ class Draft():
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def get_refs(self):
|
def get_refs(self):
|
||||||
refType = 'unk'
|
# Bill's horrible "references section" regexps, built up over lots of years
|
||||||
refs = {}
|
# of fine tuning for different formats.
|
||||||
typemap = {
|
# Examples:
|
||||||
'normative': 'norm',
|
# Appendix A. References:
|
||||||
'informative': 'info',
|
# A.1. Informative References:
|
||||||
'informational': 'info',
|
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
||||||
'non-normative': 'info',
|
# 9.1 Normative
|
||||||
None: 'old'
|
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
||||||
}
|
# One other reference section type seen:
|
||||||
# Bill's horrible "references section" regexps, built up over lots of years
|
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
||||||
# of fine tuning for different formats.
|
# An Internet-Draft reference.
|
||||||
# Examples:
|
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
||||||
# Appendix A. References:
|
# An RFC-and-other-series reference.
|
||||||
# A.1. Informative References:
|
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
||||||
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
|
||||||
# 9.1 Normative
|
|
||||||
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
|
||||||
# One other reference section type seen:
|
|
||||||
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
|
||||||
# An Internet-Draft reference.
|
|
||||||
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
|
||||||
# An RFC-and-other-series reference.
|
|
||||||
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
|
||||||
# False positives for std
|
# False positives for std
|
||||||
not_our_std_ref = re.compile( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
|
not_our_std_ref = re.compile( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
|
||||||
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
||||||
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
||||||
# std at the front of a line can hide things like IEEE STD or MIL-STD
|
# std at the front of a line can hide things like IEEE STD or MIL-STD
|
||||||
std_start = re.compile( r'(?i)std\n*\b' )
|
std_start = re.compile( r'(?i)std\n*\b' )
|
||||||
|
|
||||||
for i in range( 15, len( self.lines ) ):
|
refs = {}
|
||||||
line = self.lines[ i ].strip()
|
in_ref_sect = False
|
||||||
m = sectionre.match( line )
|
in_norm_ref_sect = False
|
||||||
if m:
|
refType = 'unk'
|
||||||
match = m.group( 1 )
|
|
||||||
if match is not None:
|
for i in range( 15, len( self.lines ) ):
|
||||||
match = match.lower()
|
line = self.lines[ i ].strip()
|
||||||
refType = typemap.get( match, 'unk' )
|
|
||||||
continue
|
# skip over lines until we find the start of the reference section
|
||||||
m = sectionre2.match( line )
|
if not in_ref_sect:
|
||||||
if m:
|
m = sectionre.match( line )
|
||||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
if not m:
|
||||||
continue
|
m = sectionre2.match( line )
|
||||||
m = sectionre3.match( line )
|
if not m:
|
||||||
if m:
|
m = sectionre3.match( line )
|
||||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
|
||||||
continue
|
if m:
|
||||||
# If something got split badly, rejoin it.
|
in_ref_sect = True
|
||||||
if eol.search( line ) and i < len( self.lines ) - 1:
|
refType = 'info'
|
||||||
line += self.lines[ i + 1 ].lstrip()
|
if line.lower().find("normative") > 1:
|
||||||
m = idref.search( line )
|
in_norm_ref_sect = True
|
||||||
if m:
|
refType = 'norm'
|
||||||
draft = m.group( 1 )
|
|
||||||
refs[ draft ] = refType
|
# might be subsections within a references section
|
||||||
continue
|
if in_ref_sect and not in_norm_ref_sect:
|
||||||
m = rfcref.search( line )
|
m = sectionre.match( line )
|
||||||
if m:
|
if not m:
|
||||||
( series, number ) = m.groups()
|
m = sectionre2.match( line )
|
||||||
if series.lower()=='std' and std_start.search(line) and i > 15:
|
if not m:
|
||||||
line = self.lines[i-1].rstrip()+line
|
m = sectionre3.match( line )
|
||||||
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
|
||||||
name = series.lower() + number.lstrip( '0' )
|
if m:
|
||||||
refs[ name ] = refType
|
in_ref_sect = True
|
||||||
continue
|
if line.lower().find("normative") > 1:
|
||||||
# References to BCP78 and BCP79 in boilerplate will appear as "unk".
|
in_norm_ref_sect = True
|
||||||
# Remove them.
|
refType = 'norm'
|
||||||
for boilerplate in ( 'bcp78', 'bcp79' ):
|
|
||||||
if refs.get( boilerplate ) == 'unk':
|
# look for the end of the normative reference section
|
||||||
del refs[ boilerplate ]
|
if in_norm_ref_sect:
|
||||||
|
m = sectionre.match( line )
|
||||||
|
if not m:
|
||||||
|
m = sectionre2.match( line )
|
||||||
|
if not m:
|
||||||
|
m = sectionre3.match( line )
|
||||||
|
|
||||||
|
if m and line.lower().find("normative") < 0:
|
||||||
|
in_norm_ref_sect = False
|
||||||
|
refType = 'info'
|
||||||
|
|
||||||
|
# find references within the section
|
||||||
|
if in_ref_sect:
|
||||||
|
# If something got split badly, rejoin it.
|
||||||
|
if eol.search( line ) and i < len( self.lines ) - 1:
|
||||||
|
line += self.lines[ i + 1 ].lstrip()
|
||||||
|
|
||||||
|
m = idref.search( line )
|
||||||
|
if m:
|
||||||
|
draft = m.group( 1 )
|
||||||
|
if draft not in refs:
|
||||||
|
refs[ draft ] = refType
|
||||||
|
|
||||||
|
m = rfcref.search( line )
|
||||||
|
if m:
|
||||||
|
( series, number ) = m.groups()
|
||||||
|
if series.lower()=='std' and std_start.search(line) and i > 15:
|
||||||
|
line = self.lines[i-1].rstrip()+line
|
||||||
|
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
||||||
|
name = series.lower() + number.lstrip( '0' )
|
||||||
|
if name not in refs:
|
||||||
|
refs[ name ] = refType
|
||||||
|
|
||||||
# Don't add any references that point back into this doc
|
# Don't add any references that point back into this doc
|
||||||
if self.filename in refs:
|
if self.filename in refs:
|
||||||
del refs[self.filename]
|
del refs[self.filename]
|
||||||
return refs
|
|
||||||
|
return refs
|
||||||
|
|
||||||
def old_get_refs( self ):
|
def old_get_refs( self ):
|
||||||
refs = []
|
refs = []
|
||||||
|
|
Loading…
Reference in a new issue