Merged [6622] from rjsparks@nostrum.com:
Refines Bill Fenner's regex based search through documents for references.
Populates RelatedDocument with relations for references for each type draft Document.
Replaces these reference relationships with updated copies on draft submission.
Note to deployer: There is a script to run in patches/fill_in_references.py that does the work of bringing the database up to date. It takes around 10 minutes to complete on a fast development laptop.
fixes bug #1173
- Legacy-Id: 6633
Note: SVN reference [6622] has been migrated to Git commit a677a70df3
This commit is contained in:
commit
7925223ef8
|
@ -6,6 +6,8 @@ from django.conf import settings
|
|||
from ietf.utils import markup_txt
|
||||
from ietf.doc.models import *
|
||||
|
||||
from ietf.utils import draft
|
||||
|
||||
def get_state_types(doc):
|
||||
res = []
|
||||
|
||||
|
@ -290,3 +292,47 @@ def update_telechat(request, doc, by, new_telechat_date, new_returning_item=None
|
|||
e.desc = "Removed telechat returning item indication"
|
||||
|
||||
e.save()
|
||||
|
||||
def rebuild_reference_relations(doc):
|
||||
if doc.type.slug != 'draft':
|
||||
return None
|
||||
|
||||
if doc.get_state_slug() == 'rfc':
|
||||
filename=os.path.join(settings.RFC_PATH,doc.canonical_name()+".txt")
|
||||
else:
|
||||
filename=os.path.join(settings.INTERNET_DRAFT_PATH,doc.filename_with_rev())
|
||||
|
||||
try:
|
||||
refs = draft.Draft(draft._gettext(filename), filename).get_refs()
|
||||
except IOError as e:
|
||||
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
|
||||
|
||||
doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
|
||||
|
||||
warnings = []
|
||||
errors = []
|
||||
unfound = set()
|
||||
for ( ref, refType ) in refs.iteritems():
|
||||
refdoc = DocAlias.objects.filter( name=ref )
|
||||
count = refdoc.count()
|
||||
if count == 0:
|
||||
unfound.add( "%s" % ref )
|
||||
continue
|
||||
elif count > 1:
|
||||
errors.append("Too many DocAlias objects found for %s"%ref)
|
||||
else:
|
||||
# Don't add references to ourself
|
||||
if doc != refdoc[0].document:
|
||||
RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) )
|
||||
if unfound:
|
||||
warnings.append('There were %d references with no matching DocAlias'%len(unfound))
|
||||
|
||||
ret = {}
|
||||
if errors:
|
||||
ret['errors']=errors
|
||||
if warnings:
|
||||
ret['warnings']=warnings
|
||||
if unfound:
|
||||
ret['unfound']=list(unfound)
|
||||
|
||||
return ret
|
||||
|
|
|
@ -86,7 +86,7 @@ img.hidden { display: none; }
|
|||
</head>
|
||||
<body class="yui-skin-sam" onload='setGroupState();updateAgendaColors()'>
|
||||
<div style="background-color:#c00000;color:white;font-size:150%;height:35px;" class="noprint">
|
||||
<a href="/" style="text-decoration:none;color:white"><img src="/images/ietflogo-blue-small.png" width="60" height="34" style="vertical-align:middle;padding-left:8px;" alt=""/><span style="padding-left:15px;font-weight:bold;letter-spacing:0.1em;">datatracker.ietf.org</a> - DEVELOPMENT MODE</span>
|
||||
<span style="padding-left:15px;font-weight:bold;letter-spacing:0.1em;"><a href="/" style="text-decoration:none;color:white"><img src="/images/ietflogo-blue-small.png" width="60" height="34" style="vertical-align:middle;padding-left:8px;" alt=""/>datatracker.ietf.org</a> - DEVELOPMENT MODE</span>
|
||||
</div>
|
||||
|
||||
|
||||
|
|
|
@ -90,7 +90,7 @@ img.hidden { display: none; }
|
|||
</head>
|
||||
<body class="yui-skin-sam" onload='setGroupState();updateAgendaColors()'>
|
||||
<div style="background-color:#c00000;color:white;font-size:150%;height:35px;" class="noprint">
|
||||
<a href="/" style="text-decoration:none;color:white"><img src="/images/ietflogo-blue-small.png" width="60" height="34" style="vertical-align:middle;padding-left:8px;" alt=""/><span style="padding-left:15px;font-weight:bold;letter-spacing:0.1em;">datatracker.ietf.org</a> - DEVELOPMENT MODE</span>
|
||||
<span style="padding-left:15px;font-weight:bold;letter-spacing:0.1em;"><a href="/" style="text-decoration:none;color:white"><img src="/images/ietflogo-blue-small.png" width="60" height="34" style="vertical-align:middle;padding-left:8px;" alt=""/>datatracker.ietf.org</a> - DEVELOPMENT MODE</span>
|
||||
</div>
|
||||
|
||||
|
||||
|
|
306417
ietf/patches/fill_in_references.py
Normal file
306417
ietf/patches/fill_in_references.py
Normal file
File diff suppressed because it is too large
Load diff
|
@ -17,9 +17,10 @@ from ietf.ietfauth.decorators import has_role
|
|||
|
||||
from ietf.doc.models import *
|
||||
from ietf.person.models import Person, Alias, Email
|
||||
from ietf.doc.utils import add_state_change_event
|
||||
from ietf.doc.utils import add_state_change_event, rebuild_reference_relations
|
||||
from ietf.message.models import Message
|
||||
|
||||
|
||||
# Some useful states
|
||||
UPLOADED = 1
|
||||
AWAITING_AUTHENTICATION = 4
|
||||
|
@ -97,6 +98,8 @@ def perform_post(request, submission):
|
|||
|
||||
update_authors(draft, submission)
|
||||
|
||||
rebuild_reference_relations(draft)
|
||||
|
||||
# new revision event
|
||||
e = NewRevisionDocEvent(type="new_revision", doc=draft, rev=draft.rev)
|
||||
e.time = draft.time #submission.submission_date
|
||||
|
|
|
@ -908,6 +908,81 @@ class Draft():
|
|||
|
||||
# ------------------------------------------------------------------
|
||||
def get_refs(self):
|
||||
refType = 'unk'
|
||||
refs = {}
|
||||
typemap = {
|
||||
'normative': 'norm',
|
||||
'informative': 'info',
|
||||
'informational': 'info',
|
||||
'non-normative': 'info',
|
||||
None: 'old'
|
||||
}
|
||||
# Bill's horrible "references section" regexps, built up over lots of years
|
||||
# of fine tuning for different formats.
|
||||
# Examples:
|
||||
# Appendix A. References:
|
||||
# A.1. Informative References:
|
||||
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
||||
# 9.1 Normative
|
||||
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
||||
# One other reference section type seen:
|
||||
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
||||
# An Internet-Draft reference.
|
||||
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
||||
# An RFC-and-other-series reference.
|
||||
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
||||
# False positives for std
|
||||
not_our_std_ref = re.compile( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
|
||||
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
||||
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
||||
# std at the front of a line can hide things like IEEE STD or MIL-STD
|
||||
std_start = re.compile( r'(?i)std\n*\b' )
|
||||
|
||||
for i in range( 15, len( self.lines ) ):
|
||||
line = self.lines[ i ].strip()
|
||||
m = sectionre.match( line )
|
||||
if m:
|
||||
match = m.group( 1 )
|
||||
if match is not None:
|
||||
match = match.lower()
|
||||
refType = typemap.get( match, 'unk' )
|
||||
continue
|
||||
m = sectionre2.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
m = sectionre3.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
# If something got split badly, rejoin it.
|
||||
if eol.search( line ) and i < len( self.lines ) - 1:
|
||||
line += self.lines[ i + 1 ].lstrip()
|
||||
m = idref.search( line )
|
||||
if m:
|
||||
draft = m.group( 1 )
|
||||
refs[ draft ] = refType
|
||||
continue
|
||||
m = rfcref.search( line )
|
||||
if m:
|
||||
( series, number ) = m.groups()
|
||||
if series.lower()=='std' and std_start.search(line) and i > 15:
|
||||
line = self.lines[i-1].rstrip()+line
|
||||
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
||||
name = series.lower() + number.lstrip( '0' )
|
||||
refs[ name ] = refType
|
||||
continue
|
||||
# References to BCP78 and BCP79 in boilerplate will appear as "unk".
|
||||
# Remove them.
|
||||
for boilerplate in ( 'bcp78', 'bcp79' ):
|
||||
if refs.get( boilerplate ) == 'unk':
|
||||
del refs[ boilerplate ]
|
||||
# Don't add any references that point back into this doc
|
||||
if self.filename in refs:
|
||||
del refs[self.filename]
|
||||
return refs
|
||||
|
||||
def old_get_refs( self ):
|
||||
refs = []
|
||||
normrefs = []
|
||||
rfcrefs = []
|
||||
|
|
Loading…
Reference in a new issue