Return rev used to find doc when heuristics modify the input. Share heuristics between rfcdiff and html views. Fixes #3437. Commit ready for merge.

- Legacy-Id: 19658
This commit is contained in:
Jennifer Richards 2021-11-12 21:45:58 +00:00
parent d6a262742f
commit 38469966e2
5 changed files with 276 additions and 133 deletions

View file

@ -87,6 +87,19 @@ class BaseDocumentFactory(factory.django.DjangoModelFactory):
continue
obj.relateddocument_set.create(relationship_id=rel_id, target=docalias)
@factory.post_generation
def create_revisions(obj, create, extracted, **kwargs): # pylint: disable=no-self-argument
"""Create additional revisions of the document
Argument should be an iterable of revisions. Remember that range() is exclusive on the end
index, so range(1, 10) stops at 9.
"""
if create and extracted:
for rev in extracted:
e = NewRevisionDocEventFactory(doc=obj, rev=f'{rev:02d}')
obj.rev = f'{rev:02d}'
obj.save_with_history([e])
@classmethod
def _after_postgeneration(cls, obj, create, results=None):
"""Save again the instance if creating and at least one hook ran."""

View file

@ -2486,6 +2486,7 @@ class RfcdiffSupportTests(TestCase):
def setUp(self):
super().setUp()
self.target_view = 'ietf.doc.views_doc.rfcdiff_latest_json'
self._last_rfc_num = 8000
def getJson(self, view_args):
url = urlreverse(self.target_view, kwargs=view_args)
@ -2493,79 +2494,85 @@ class RfcdiffSupportTests(TestCase):
self.assertEqual(r.status_code, 200)
return r.json()
def test_draft(self):
draft = IndividualDraftFactory(name='draft-somebody-did-something',rev='00')
for r in range(0,13):
e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}')
draft.rev = f'{r:02d}'
draft.save_with_history([e])
def next_rfc_number(self):
self._last_rfc_num += 1
return self._last_rfc_num
def do_draft_test(self, name):
draft = IndividualDraftFactory(name=name, rev='00', create_revisions=range(0,13))
draft = reload_db_objects(draft)
received = self.getJson(dict(name=draft.name))
self.assertEqual(received, dict(
name=draft.name,
rev=draft.rev,
content_url=draft.get_href(),
previous=f'{draft.name}-{(int(draft.rev)-1):02d}'
))
self.assertEqual(
received,
dict(
name=draft.name,
rev=draft.rev,
content_url=draft.get_href(),
previous=f'{draft.name}-{(int(draft.rev)-1):02d}'
),
'Incorrect JSON when draft revision not specified',
)
received = self.getJson(dict(name=draft.name, rev=draft.rev))
self.assertEqual(received, dict(
name=draft.name,
rev=draft.rev,
content_url=draft.get_href(),
previous=f'{draft.name}-{(int(draft.rev)-1):02d}'
))
self.assertEqual(
received,
dict(
name=draft.name,
rev=draft.rev,
content_url=draft.get_href(),
previous=f'{draft.name}-{(int(draft.rev)-1):02d}'
),
'Incorrect JSON when latest revision specified',
)
received = self.getJson(dict(name=draft.name, rev='10'))
self.assertEqual(received, dict(
name=draft.name,
rev='10',
content_url=draft.history_set.get(rev='10').get_href(),
previous=f'{draft.name}-09'
))
self.assertEqual(
received,
dict(
name=draft.name,
rev='10',
content_url=draft.history_set.get(rev='10').get_href(),
previous=f'{draft.name}-09'
),
'Incorrect JSON when historical revision specified',
)
received = self.getJson(dict(name=draft.name, rev='00'))
self.assertNotIn('previous', received)
self.assertNotIn('previous', received, 'Rev 00 has no previous name when not replacing a draft')
replaced = IndividualDraftFactory()
RelatedDocument.objects.create(relationship_id='replaces',source=draft,target=replaced.docalias.first())
received = self.getJson(dict(name=draft.name, rev='00'))
self.assertEqual(received['previous'], f'{replaced.name}-{replaced.rev}')
self.assertEqual(received['previous'], f'{replaced.name}-{replaced.rev}',
'Rev 00 has a previous name when replacing a draft')
def test_draft(self):
# test with typical, straightforward names
self.do_draft_test(name='draft-somebody-did-a-thing')
# try with different potentially problematic names
self.do_draft_test(name='draft-someone-did-something-01-02')
self.do_draft_test(name='draft-someone-did-something-else-02')
self.do_draft_test(name='draft-someone-did-something-02-weird-01')
def test_draft_with_broken_history(self):
draft = IndividualDraftFactory(rev='10')
def do_draft_with_broken_history_test(self, name):
draft = IndividualDraftFactory(name=name, rev='10')
received = self.getJson(dict(name=draft.name,rev='09'))
self.assertEqual(received['rev'],'09')
self.assertEqual(received['previous'], f'{draft.name}-08')
self.assertTrue('warning' in received)
def test_draft_with_broken_history(self):
# test with typical, straightforward names
self.do_draft_with_broken_history_test(name='draft-somebody-did-something')
# try with different potentially problematic names
self.do_draft_with_broken_history_test(name='draft-someone-did-something-01-02')
self.do_draft_with_broken_history_test(name='draft-someone-did-something-else-02')
self.do_draft_with_broken_history_test(name='draft-someone-did-something-02-weird-03')
def test_draftname_with_numeric_suffix(self):
draft = IndividualDraftFactory(name='draft-someone-did-something-01-02',rev='00')
for r in range(0,4):
e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}')
draft.rev = f'{r:02d}'
draft.save_with_history([e])
received = self.getJson(dict(name=draft.name))
self.assertEqual(received['rev'],'03')
self.assertIn('01-02-03',received['content_url'])
self.assertIn('01-02-02',received['previous'])
received = self.getJson(dict(name=draft.name,rev='02'))
self.assertEqual(received['rev'],'02')
self.assertIn('01-02-02',received['content_url'])
def test_rfc(self):
draft = WgDraftFactory()
for r in range(0,2):
e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}')
draft.rev = f'{r:02d}'
draft.save_with_history([e])
draft.docalias.create(name='rfc8000')
def do_rfc_test(self, draft_name):
draft = WgDraftFactory(name=draft_name, create_revisions=range(0,2))
draft.docalias.create(name=f'rfc{self.next_rfc_number():04}')
draft.set_state(State.objects.get(type_id='draft',slug='rfc'))
draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
draft = reload_db_objects(draft)
@ -2573,26 +2580,47 @@ class RfcdiffSupportTests(TestCase):
number = rfc.rfc_number()
received = self.getJson(dict(name=number))
self.assertEqual(received, dict(
content_url=rfc.get_href(),
name=rfc.canonical_name(),
previous=f'{draft.name}-{draft.rev}',
))
self.assertEqual(
received,
dict(
content_url=rfc.get_href(),
name=rfc.canonical_name(),
previous=f'{draft.name}-{draft.rev}',
),
'Can look up an RFC by number',
)
num_received = received
received = self.getJson(dict(name=rfc.canonical_name()))
self.assertEqual(num_received, received)
self.assertEqual(num_received, received, 'RFC by canonical name gives same result as by number')
received = self.getJson(dict(name=f'RfC {number}'))
self.assertEqual(num_received, received)
self.assertEqual(num_received, received, 'RFC with unusual spacing/caps gives same result as by number')
received = self.getJson(dict(name=draft.name))
self.assertEqual(num_received, received, 'RFC by draft name and no rev gives same result as by number')
received = self.getJson(dict(name=draft.name, rev='01'))
self.assertEqual(
received,
dict(
content_url=draft.history_set.get(rev='01').get_href(),
name=draft.name,
rev='01',
previous=f'{draft.name}-00',
),
'RFC by draft name with rev should give draft name, not canonical name'
)
def test_rfc(self):
# simple draft name
self.do_rfc_test(draft_name='draft-test-ar-ef-see')
# tricky draft names
self.do_rfc_test(draft_name='draft-whatever-02')
self.do_rfc_test(draft_name='draft-test-me-03-04')
def test_rfc_with_tombstone(self):
draft = WgDraftFactory()
for r in range(0,2):
e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}')
draft.rev = f'{r:02d}'
draft.save_with_history([e])
draft = WgDraftFactory(create_revisions=range(0,2))
draft.docalias.create(name='rfc3261') # See views_doc.HAS_TOMBSTONE
draft.set_state(State.objects.get(type_id='draft',slug='rfc'))
draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
@ -2603,4 +2631,46 @@ class RfcdiffSupportTests(TestCase):
received = self.getJson(dict(name=rfc.canonical_name()))
self.assertTrue(received['previous'].endswith('00'))
def do_rfc_with_broken_history_test(self, draft_name):
draft = WgDraftFactory(rev='10', name=draft_name)
draft.docalias.create(name=f'rfc{self.next_rfc_number():04}')
draft.set_state(State.objects.get(type_id='draft',slug='rfc'))
draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
draft = reload_db_objects(draft)
rfc = draft
received = self.getJson(dict(name=draft.name))
self.assertEqual(
received,
dict(
content_url=rfc.get_href(),
name=rfc.canonical_name(),
previous=f'{draft.name}-10',
),
'RFC by draft name without rev should return canonical RFC name and no rev',
)
received = self.getJson(dict(name=draft.name, rev='10'))
self.assertEqual(received['name'], draft.name, 'RFC by draft name with rev should return draft name')
self.assertEqual(received['rev'], '10', 'Requested rev should be returned')
self.assertEqual(received['previous'], f'{draft.name}-09', 'Previous rev is one less than requested')
self.assertIn(f'{draft.name}-10', received['content_url'], 'Returned URL should include requested rev')
self.assertNotIn('warning', received, 'No warning when we have the rev requested')
received = self.getJson(dict(name=f'{draft.name}-09'))
self.assertEqual(received['name'], draft.name, 'RFC by draft name with rev should return draft name')
self.assertEqual(received['rev'], '09', 'Requested rev should be returned')
self.assertEqual(received['previous'], f'{draft.name}-08', 'Previous rev is one less than requested')
self.assertIn(f'{draft.name}-09', received['content_url'], 'Returned URL should include requested rev')
self.assertEqual(
received['warning'],
'History for this version not found - these results are speculation',
'Warning should be issued when requested rev is not found'
)
def test_rfc_with_broken_history(self):
# simple draft name
self.do_rfc_with_broken_history_test(draft_name='draft-some-draft')
# tricky draft names
self.do_rfc_with_broken_history_test(draft_name='draft-gizmo-01')
self.do_rfc_with_broken_history_test(draft_name='draft-oh-boy-what-a-draft-02-03')

View file

@ -8,9 +8,9 @@ from ietf.name.models import DocTagName
from ietf.person.factories import PersonFactory
from ietf.utils.test_utils import TestCase
from ietf.person.models import Person
from ietf.doc.factories import DocumentFactory
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors
from ietf.doc.factories import DocumentFactory, WgRfcFactory
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor, Document
from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors, fuzzy_find_documents
class ActionHoldersTests(TestCase):
@ -239,4 +239,49 @@ class MiscTests(TestCase):
self.assertIn('cleared country (was "USA")', events[0].desc)
docauth = doc.documentauthor_set.first()
self.assertEqual(docauth.affiliation, '')
self.assertEqual(docauth.country, '')
self.assertEqual(docauth.country, '')
def do_fuzzy_find_documents_rfc_test(self, name):
rfc = WgRfcFactory(name=name, create_revisions=(0, 1, 2))
rfc = Document.objects.get(pk=rfc.pk) # clear out any cached values
# by canonical name
found = fuzzy_find_documents(rfc.canonical_name(), None)
self.assertCountEqual(found.documents, [rfc])
self.assertEqual(found.matched_rev, None)
self.assertEqual(found.matched_name, rfc.canonical_name())
# by draft name, no rev
found = fuzzy_find_documents(rfc.name, None)
self.assertCountEqual(found.documents, [rfc])
self.assertEqual(found.matched_rev, None)
self.assertEqual(found.matched_name, rfc.name)
# by draft name, latest rev
found = fuzzy_find_documents(rfc.name, '02')
self.assertCountEqual(found.documents, [rfc])
self.assertEqual(found.matched_rev, '02')
self.assertEqual(found.matched_name, rfc.name)
# by draft name, earlier rev
found = fuzzy_find_documents(rfc.name, '01')
self.assertCountEqual(found.documents, [rfc])
self.assertEqual(found.matched_rev, '01')
self.assertEqual(found.matched_name, rfc.name)
# wrong name or revision
found = fuzzy_find_documents(rfc.name + '-incorrect')
self.assertCountEqual(found.documents, [], 'Should not find document that does not match')
found = fuzzy_find_documents(rfc.name + '-incorrect', '02')
self.assertCountEqual(found.documents, [], 'Still should not find document, even with a version')
found = fuzzy_find_documents(rfc.name, '22')
self.assertCountEqual(found.documents, [rfc],
'Should find document even if rev does not exist')
def test_fuzzy_find_documents(self):
# Should add additional tests/test cases for other document types/name formats
self.do_fuzzy_find_documents_rfc_test('draft-normal-name')
self.do_fuzzy_find_documents_rfc_test('draft-name-with-number-01')
self.do_fuzzy_find_documents_rfc_test('draft-name-that-has-two-02-04')
self.do_fuzzy_find_documents_rfc_test('draft-wild-01-numbers-0312')

View file

@ -11,7 +11,7 @@ import os
import re
import textwrap
from collections import defaultdict
from collections import defaultdict, namedtuple
from urllib.parse import quote
from django.conf import settings
@ -1286,3 +1286,39 @@ def generate_idnits2_rfcs_obsoleted():
obsdict[k] = sorted(obsdict[k])
return render_to_string('doc/idnits2-rfcs-obsoleted.txt', context={'obsitems':sorted(obsdict.items())})
def fuzzy_find_documents(name, rev=None):
"""Find a document based on name/rev
Applies heuristics, assuming the inputs were joined by a '-' that may have been misplaced.
If returned documents queryset is empty, matched_rev and and matched_name are meaningless.
The rev input is not validated - it is used to find possible names if the name input does
not match anything, but matched_rev may not correspond to an actual version of the found
document.
"""
# Handle special case name formats
if name.startswith('rfc0'):
name = "rfc" + name[3:].lstrip('0')
if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name):
name = "%s-%s" % (name, rev)
rev = None
if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev):
name = "%s-%s" % (name, rev[:-3])
rev = rev[-2:]
if re.match("^[0-9]+$", name):
name = f'rfc{name}'
if re.match("^[Rr][Ff][Cc] [0-9]+$",name):
name = f'rfc{name[4:]}'
# see if we can find a document using this name
docs = Document.objects.filter(docalias__name=name, type_id='draft')
if rev and not docs.exists():
# No document found, see if the name/rev split has been misidentified.
# Handles some special cases, like draft-ietf-tsvwg-ieee-802-11.
name = '%s-%s' % (name, rev)
docs = Document.objects.filter(docalias__name=name, type_id='draft')
if docs.exists():
rev = None # found a doc by name with rev = None, so update that
FoundDocuments = namedtuple('FoundDocuments', 'documents matched_name matched_rev')
return FoundDocuments(docs, name, rev)

View file

@ -64,7 +64,7 @@ from ietf.doc.utils import (add_links_in_new_revision_events, augment_events_wit
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
add_events_message_info, get_unicode_document_content, build_doc_meta_block,
augment_docs_and_user_with_user_info, irsg_needed_ballot_positions, add_action_holder_change_event,
build_doc_supermeta_block, build_file_urls, update_documentauthors)
build_doc_supermeta_block, build_file_urls, update_documentauthors, fuzzy_find_documents)
from ietf.doc.utils_bofreq import bofreq_editors, bofreq_responsible
from ietf.group.models import Role, Group
from ietf.group.utils import can_manage_all_groups_of_type, can_manage_materials, group_features_role_filter
@ -721,40 +721,26 @@ def document_main(request, name, rev=None):
def document_html(request, name, rev=None):
if name.startswith('rfc0'):
name = "rfc" + name[3:].lstrip('0')
if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name):
name = "%s-%s" % (name, rev)
if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev):
name = "%s-%s" % (name, rev[:-3])
rev = rev[-2:]
if re.match("^[0-9]+$", name):
return redirect('ietf.doc.views_doc.document_html',name=f'rfc{name}')
if re.match("^[Rr][Ff][Cc] [0-9]+$",name):
return redirect('ietf.doc.views_doc.document_html',name=f'rfc{name[4:]}')
docs = Document.objects.filter(docalias__name=name)
if rev and not docs.exists():
# handle some special cases, like draft-ietf-tsvwg-ieee-802-11
name = '%s-%s' % (name, rev)
rev=None
docs = Document.objects.filter(docalias__name=name)
if not docs.exists():
found = fuzzy_find_documents(name, rev)
num_found = found.documents.count()
if num_found == 0:
raise Http404("Document not found: %s" % name)
if docs.count() > 1:
if num_found > 1:
raise Http404("Multiple documents matched: %s" % name)
doc = docs.get()
if found.matched_name.startswith('rfc') and name != found.matched_name:
return redirect('ietf.doc.views_doc.document_html', name=found.matched_name)
doc = found.documents.get()
if not os.path.exists(doc.get_file_name()):
raise Http404("File not found: %s" % doc.get_file_name())
if not rev and not name.startswith('rfc'):
if found.matched_rev or found.matched_name.startswith('rfc'):
rev = found.matched_rev
else:
rev = doc.rev
if rev:
docs = DocHistory.objects.filter(doc=doc, rev=rev)
if docs.exists():
doc = docs.first()
else:
doc = doc.fake_history_obj(rev)
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
if doc.type_id in ['draft',]:
doc.supermeta = build_doc_supermeta_block(doc)
doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX)
@ -1794,44 +1780,30 @@ def idnits2_state(request, name, rev=None):
return render(request, 'doc/idnits2-state.txt', context={'doc':doc}, content_type='text/plain;charset=utf-8')
def find_doc_for_rfcdiff(name, rev):
if name.startswith('rfc0'):
name = "rfc" + name[3:].lstrip('0')
if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name):
name = "%s-%s" % (name, rev)
if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev):
name = "%s-%s" % (name, rev[:-3])
rev = rev[-2:]
if re.match("^[0-9]+$", name):
name = f'rfc{name}'
if re.match("^[Rr][Ff][Cc] [0-9]+$",name):
name = f'rfc{name[4:]}'
docs = Document.objects.filter(docalias__name=name, type_id='draft')
if rev and not docs.exists():
# handle some special cases, like draft-ietf-tsvwg-ieee-802-11
name = '%s-%s' % (name, rev)
rev=None
docs = Document.objects.filter(docalias__name=name, type_id='draft')
"""rfcdiff lookup heuristics
Returns a tuple with:
[0] - condition string
[1] - document found (or None)
[2] - historic version
[3] - revision actually found (may differ from :rev: input)
"""
found = fuzzy_find_documents(name, rev)
condition = 'no such document'
if not docs.exists() or docs.count() > 1:
return (condition, None, None)
doc = docs.get()
if not rev or (rev and doc.rev==rev):
if found.documents.count() != 1:
return (condition, None, None, rev)
doc = found.documents.get()
if found.matched_rev is None or doc.rev == found.matched_rev:
condition = 'current version'
return condition, doc, None
return (condition, doc, None, found.matched_rev)
else:
candidate = None
for h in doc.history_set.order_by("-time"):
if rev == h.rev:
candidate = h
break
candidate = doc.history_set.filter(rev=found.matched_rev).order_by("-time").first()
if candidate:
condition = 'historic version'
return condition, doc, candidate
return (condition, doc, candidate, found.matched_rev)
else:
condition = 'version dochistory not found'
return condition, doc, None
return (condition, doc, None, found.matched_rev)
# This is a proof of concept of a service that would redirect to the current revision
# def rfcdiff_latest(request, name, rev=None):
@ -1886,17 +1858,20 @@ HAS_TOMBSTONE = [
def rfcdiff_latest_json(request, name, rev=None):
response = dict()
condition, document, history = find_doc_for_rfcdiff(name, rev)
condition, document, history, found_rev = find_doc_for_rfcdiff(name, rev)
if condition == 'no such document':
raise Http404
elif condition in ('historic version', 'current version'):
doc = history if history else document
if not rev and doc.is_rfc():
if not found_rev and doc.is_rfc():
response['content_url'] = doc.get_href()
response['name']=doc.canonical_name()
if doc.name != doc.canonical_name():
prev_rev = doc.rev
# not sure what to do if non-numeric values come back, so at least log it
log.assertion('doc.rfc_number().isdigit()')
log.assertion('doc.rev.isdigit()')
if int(doc.rfc_number()) in HAS_TOMBSTONE and prev_rev != '00':
prev_rev = f'{(int(doc.rev)-1):02d}'
response['previous'] = f'{doc.name}-{prev_rev}'
@ -1915,16 +1890,20 @@ def rfcdiff_latest_json(request, name, rev=None):
if match and match.group(2):
response['previous'] = f'rfc{match.group(2)}'
else:
# not sure what to do if non-numeric values come back, so at least log it
log.assertion('doc.rev.isdigit()')
response['previous'] = f'{doc.name}-{(int(doc.rev)-1):02d}'
elif condition == 'version dochistory not found':
response['warning'] = 'History for this version not found - these results are speculation'
response['name'] = document.name
response['rev'] = rev
document.rev = rev
response['rev'] = found_rev
document.rev = found_rev
document.is_rfc = lambda: False
response['content_url'] = document.get_href()
if int(rev)>0:
response['previous'] = f'{document.name}-{(int(rev)-1):02d}'
# not sure what to do if non-numeric values come back, so at least log it
log.assertion('found_rev.isdigit()')
if int(found_rev) > 0:
response['previous'] = f'{document.name}-{(int(found_rev)-1):02d}'
else:
match = re.search("-(rfc)?([0-9][0-9][0-9]+)bis(-.*)?$", name)
if match and match.group(2):