Refines Bill Fenner's regex based search through documents for references.
Populates RelatedDocument with relations for references for each type draft Document. Replaces these reference relationships with updated copies on draft submission. Note to deployer: This migration takes around 10 minutes to complete on a fast development laptop. - Legacy-Id: 6572
This commit is contained in:
parent
8c87d60c51
commit
b18249222b
306772
ietf/doc/migrations/0015_fillin_references.py
Normal file
306772
ietf/doc/migrations/0015_fillin_references.py
Normal file
File diff suppressed because it is too large
Load diff
|
@ -6,6 +6,8 @@ from django.conf import settings
|
|||
from ietf.utils import markup_txt
|
||||
from ietf.doc.models import *
|
||||
|
||||
from ietf.utils import draft
|
||||
|
||||
def get_state_types(doc):
|
||||
res = []
|
||||
|
||||
|
@ -290,3 +292,45 @@ def update_telechat(request, doc, by, new_telechat_date, new_returning_item=None
|
|||
e.desc = "Removed telechat returning item indication"
|
||||
|
||||
e.save()
|
||||
|
||||
def rebuild_reference_relations(doc):
|
||||
if doc.type.slug != 'draft':
|
||||
return None
|
||||
|
||||
if doc.get_state_slug() == 'rfc':
|
||||
filename=os.path.join(settings.RFC_PATH,doc.canonical_name()+".txt")
|
||||
else:
|
||||
filename=os.path.join(settings.INTERNET_DRAFT_PATH,doc.filename_with_rev())
|
||||
|
||||
try:
|
||||
refs = draft.Draft(draft._gettext(filename), filename).get_refs()
|
||||
except IOError as e:
|
||||
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
|
||||
|
||||
doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
|
||||
|
||||
warnings = []
|
||||
errors = []
|
||||
unfound = set()
|
||||
for ( ref, refType ) in refs.iteritems():
|
||||
refdoc = DocAlias.objects.filter( name=ref )
|
||||
count = refdoc.count()
|
||||
if count == 0:
|
||||
unfound.add( "%s" % ref )
|
||||
continue
|
||||
elif count > 1:
|
||||
errors.append("Too many DocAlias objects found for %s"%ref)
|
||||
else:
|
||||
RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) )
|
||||
if unfound:
|
||||
warnings.append('There were %d references with no matching DocAlias'%len(unfound))
|
||||
|
||||
ret = {}
|
||||
if errors:
|
||||
ret['errors']=errors
|
||||
if warnings:
|
||||
ret['warnings']=warnings
|
||||
if unfound:
|
||||
ret['unfound']=list(unfound)
|
||||
|
||||
return ret
|
||||
|
|
|
@ -17,9 +17,10 @@ from ietf.ietfauth.decorators import has_role
|
|||
|
||||
from ietf.doc.models import *
|
||||
from ietf.person.models import Person, Alias, Email
|
||||
from ietf.doc.utils import add_state_change_event
|
||||
from ietf.doc.utils import add_state_change_event, rebuild_reference_relations
|
||||
from ietf.message.models import Message
|
||||
|
||||
|
||||
# Some useful states
|
||||
UPLOADED = 1
|
||||
AWAITING_AUTHENTICATION = 4
|
||||
|
@ -97,6 +98,8 @@ def perform_post(request, submission):
|
|||
|
||||
update_authors(draft, submission)
|
||||
|
||||
rebuild_reference_relations(draft)
|
||||
|
||||
# new revision event
|
||||
e = NewRevisionDocEvent(type="new_revision", doc=draft, rev=draft.rev)
|
||||
e.time = draft.time #submission.submission_date
|
||||
|
|
|
@ -908,6 +908,80 @@ class Draft():
|
|||
|
||||
# ------------------------------------------------------------------
|
||||
def get_refs(self):
|
||||
refType = 'unk'
|
||||
refs = {}
|
||||
typemap = {
|
||||
'normative': 'norm',
|
||||
'informative': 'info',
|
||||
'informational': 'info',
|
||||
'non-normative': 'info',
|
||||
None: 'old'
|
||||
}
|
||||
# Bill's horrible "references section" regexps, built up over lots of years
|
||||
# of fine tuning for different formats.
|
||||
# Examples:
|
||||
# Appendix A. References:
|
||||
# A.1. Informative References:
|
||||
sectionre = re.compile( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
|
||||
# 9.1 Normative
|
||||
sectionre2 = re.compile( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
|
||||
# One other reference section type seen:
|
||||
sectionre3 = re.compile( r'(?i)References \((\S+ormative)\)$' )
|
||||
# An Internet-Draft reference.
|
||||
idref = re.compile( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
|
||||
# An RFC-and-other-series reference.
|
||||
rfcref = re.compile( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
|
||||
# False positives for std
|
||||
not_our_std_ref = re.compile( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
|
||||
# An Internet-Draft or series reference hyphenated by a well-meaning line break.
|
||||
eol = re.compile( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
|
||||
# std at the front of a line can hide things like IEEE STD or MIL-STD
|
||||
std_start = re.compile( r'(?i)std\n*\b' )
|
||||
|
||||
for i in range( 15, len( self.lines ) ):
|
||||
line = self.lines[ i ].strip()
|
||||
m = sectionre.match( line )
|
||||
if m:
|
||||
match = m.group( 1 )
|
||||
if match is not None:
|
||||
match = match.lower()
|
||||
refType = typemap.get( match, 'unk' )
|
||||
continue
|
||||
m = sectionre2.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
m = sectionre3.match( line )
|
||||
if m:
|
||||
refType = typemap.get( m.group( 1 ).lower(), 'unk' )
|
||||
continue
|
||||
# If something got split badly, rejoin it.
|
||||
if eol.search( line ) and i < len( self.lines ) - 1:
|
||||
line += self.lines[ i + 1 ].lstrip()
|
||||
m = idref.search( line )
|
||||
if m:
|
||||
draft = m.group( 1 )
|
||||
refs[ draft ] = refType
|
||||
continue
|
||||
m = rfcref.search( line )
|
||||
if m:
|
||||
( series, number ) = m.groups()
|
||||
if series.lower()=='std' and std_start.search(line) and i > 15:
|
||||
line = self.lines[i-1].rstrip()+line
|
||||
if series.lower()!='std' or not not_our_std_ref.search( line ):
|
||||
name = series.lower() + number.lstrip( '0' )
|
||||
refs[ name ] = refType
|
||||
continue
|
||||
# References to BCP78 and BCP79 in boilerplate will appear as "unk".
|
||||
# Remove them.
|
||||
for boilerplate in ( 'bcp78', 'bcp79' ):
|
||||
if refs.get( boilerplate ) == 'unk':
|
||||
del refs[ boilerplate ]
|
||||
return refs
|
||||
|
||||
|
||||
|
||||
def old_get_refs( self ):
|
||||
refs = []
|
||||
normrefs = []
|
||||
rfcrefs = []
|
||||
|
|
Loading…
Reference in a new issue