From 38469966e209912f080cb8f899457d0cd5979b2d Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 12 Nov 2021 21:45:58 +0000 Subject: [PATCH] Return rev used to find doc when heuristics modify the input. Share heuristics between rfcdiff and html views. Fixes #3437. Commit ready for merge. - Legacy-Id: 19658 --- ietf/doc/factories.py | 13 +++ ietf/doc/tests.py | 200 +++++++++++++++++++++++++++------------- ietf/doc/tests_utils.py | 53 ++++++++++- ietf/doc/utils.py | 38 +++++++- ietf/doc/views_doc.py | 105 +++++++++------------ 5 files changed, 276 insertions(+), 133 deletions(-) diff --git a/ietf/doc/factories.py b/ietf/doc/factories.py index 824f13cba..4dd96fabf 100644 --- a/ietf/doc/factories.py +++ b/ietf/doc/factories.py @@ -87,6 +87,19 @@ class BaseDocumentFactory(factory.django.DjangoModelFactory): continue obj.relateddocument_set.create(relationship_id=rel_id, target=docalias) + @factory.post_generation + def create_revisions(obj, create, extracted, **kwargs): # pylint: disable=no-self-argument + """Create additional revisions of the document + + Argument should be an iterable of revisions. Remember that range() is exclusive on the end + index, so range(1, 10) stops at 9. + """ + if create and extracted: + for rev in extracted: + e = NewRevisionDocEventFactory(doc=obj, rev=f'{rev:02d}') + obj.rev = f'{rev:02d}' + obj.save_with_history([e]) + @classmethod def _after_postgeneration(cls, obj, create, results=None): """Save again the instance if creating and at least one hook ran.""" diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index 4559020e4..0b0b1cde2 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -2486,6 +2486,7 @@ class RfcdiffSupportTests(TestCase): def setUp(self): super().setUp() self.target_view = 'ietf.doc.views_doc.rfcdiff_latest_json' + self._last_rfc_num = 8000 def getJson(self, view_args): url = urlreverse(self.target_view, kwargs=view_args) @@ -2493,79 +2494,85 @@ class RfcdiffSupportTests(TestCase): self.assertEqual(r.status_code, 200) return r.json() - def test_draft(self): - draft = IndividualDraftFactory(name='draft-somebody-did-something',rev='00') - for r in range(0,13): - e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}') - draft.rev = f'{r:02d}' - draft.save_with_history([e]) + def next_rfc_number(self): + self._last_rfc_num += 1 + return self._last_rfc_num + + def do_draft_test(self, name): + draft = IndividualDraftFactory(name=name, rev='00', create_revisions=range(0,13)) draft = reload_db_objects(draft) received = self.getJson(dict(name=draft.name)) - self.assertEqual(received, dict( - name=draft.name, - rev=draft.rev, - content_url=draft.get_href(), - previous=f'{draft.name}-{(int(draft.rev)-1):02d}' - )) + self.assertEqual( + received, + dict( + name=draft.name, + rev=draft.rev, + content_url=draft.get_href(), + previous=f'{draft.name}-{(int(draft.rev)-1):02d}' + ), + 'Incorrect JSON when draft revision not specified', + ) received = self.getJson(dict(name=draft.name, rev=draft.rev)) - self.assertEqual(received, dict( - name=draft.name, - rev=draft.rev, - content_url=draft.get_href(), - previous=f'{draft.name}-{(int(draft.rev)-1):02d}' - )) + self.assertEqual( + received, + dict( + name=draft.name, + rev=draft.rev, + content_url=draft.get_href(), + previous=f'{draft.name}-{(int(draft.rev)-1):02d}' + ), + 'Incorrect JSON when latest revision specified', + ) received = self.getJson(dict(name=draft.name, rev='10')) - self.assertEqual(received, dict( - name=draft.name, - rev='10', - content_url=draft.history_set.get(rev='10').get_href(), - previous=f'{draft.name}-09' - )) + self.assertEqual( + received, + dict( + name=draft.name, + rev='10', + content_url=draft.history_set.get(rev='10').get_href(), + previous=f'{draft.name}-09' + ), + 'Incorrect JSON when historical revision specified', + ) received = self.getJson(dict(name=draft.name, rev='00')) - self.assertNotIn('previous', received) + self.assertNotIn('previous', received, 'Rev 00 has no previous name when not replacing a draft') replaced = IndividualDraftFactory() RelatedDocument.objects.create(relationship_id='replaces',source=draft,target=replaced.docalias.first()) received = self.getJson(dict(name=draft.name, rev='00')) - self.assertEqual(received['previous'], f'{replaced.name}-{replaced.rev}') + self.assertEqual(received['previous'], f'{replaced.name}-{replaced.rev}', + 'Rev 00 has a previous name when replacing a draft') + def test_draft(self): + # test with typical, straightforward names + self.do_draft_test(name='draft-somebody-did-a-thing') + # try with different potentially problematic names + self.do_draft_test(name='draft-someone-did-something-01-02') + self.do_draft_test(name='draft-someone-did-something-else-02') + self.do_draft_test(name='draft-someone-did-something-02-weird-01') - def test_draft_with_broken_history(self): - draft = IndividualDraftFactory(rev='10') + def do_draft_with_broken_history_test(self, name): + draft = IndividualDraftFactory(name=name, rev='10') received = self.getJson(dict(name=draft.name,rev='09')) self.assertEqual(received['rev'],'09') self.assertEqual(received['previous'], f'{draft.name}-08') self.assertTrue('warning' in received) + def test_draft_with_broken_history(self): + # test with typical, straightforward names + self.do_draft_with_broken_history_test(name='draft-somebody-did-something') + # try with different potentially problematic names + self.do_draft_with_broken_history_test(name='draft-someone-did-something-01-02') + self.do_draft_with_broken_history_test(name='draft-someone-did-something-else-02') + self.do_draft_with_broken_history_test(name='draft-someone-did-something-02-weird-03') - def test_draftname_with_numeric_suffix(self): - draft = IndividualDraftFactory(name='draft-someone-did-something-01-02',rev='00') - for r in range(0,4): - e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}') - draft.rev = f'{r:02d}' - draft.save_with_history([e]) - - received = self.getJson(dict(name=draft.name)) - self.assertEqual(received['rev'],'03') - self.assertIn('01-02-03',received['content_url']) - self.assertIn('01-02-02',received['previous']) - - received = self.getJson(dict(name=draft.name,rev='02')) - self.assertEqual(received['rev'],'02') - self.assertIn('01-02-02',received['content_url']) - - def test_rfc(self): - draft = WgDraftFactory() - for r in range(0,2): - e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}') - draft.rev = f'{r:02d}' - draft.save_with_history([e]) - - draft.docalias.create(name='rfc8000') + def do_rfc_test(self, draft_name): + draft = WgDraftFactory(name=draft_name, create_revisions=range(0,2)) + draft.docalias.create(name=f'rfc{self.next_rfc_number():04}') draft.set_state(State.objects.get(type_id='draft',slug='rfc')) draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub')) draft = reload_db_objects(draft) @@ -2573,26 +2580,47 @@ class RfcdiffSupportTests(TestCase): number = rfc.rfc_number() received = self.getJson(dict(name=number)) - self.assertEqual(received, dict( - content_url=rfc.get_href(), - name=rfc.canonical_name(), - previous=f'{draft.name}-{draft.rev}', - )) + self.assertEqual( + received, + dict( + content_url=rfc.get_href(), + name=rfc.canonical_name(), + previous=f'{draft.name}-{draft.rev}', + ), + 'Can look up an RFC by number', + ) num_received = received received = self.getJson(dict(name=rfc.canonical_name())) - self.assertEqual(num_received, received) + self.assertEqual(num_received, received, 'RFC by canonical name gives same result as by number') received = self.getJson(dict(name=f'RfC {number}')) - self.assertEqual(num_received, received) + self.assertEqual(num_received, received, 'RFC with unusual spacing/caps gives same result as by number') + + received = self.getJson(dict(name=draft.name)) + self.assertEqual(num_received, received, 'RFC by draft name and no rev gives same result as by number') + + received = self.getJson(dict(name=draft.name, rev='01')) + self.assertEqual( + received, + dict( + content_url=draft.history_set.get(rev='01').get_href(), + name=draft.name, + rev='01', + previous=f'{draft.name}-00', + ), + 'RFC by draft name with rev should give draft name, not canonical name' + ) + + def test_rfc(self): + # simple draft name + self.do_rfc_test(draft_name='draft-test-ar-ef-see') + # tricky draft names + self.do_rfc_test(draft_name='draft-whatever-02') + self.do_rfc_test(draft_name='draft-test-me-03-04') def test_rfc_with_tombstone(self): - draft = WgDraftFactory() - for r in range(0,2): - e = NewRevisionDocEventFactory(doc=draft,rev=f'{r:02d}') - draft.rev = f'{r:02d}' - draft.save_with_history([e]) - + draft = WgDraftFactory(create_revisions=range(0,2)) draft.docalias.create(name='rfc3261') # See views_doc.HAS_TOMBSTONE draft.set_state(State.objects.get(type_id='draft',slug='rfc')) draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub')) @@ -2603,4 +2631,46 @@ class RfcdiffSupportTests(TestCase): received = self.getJson(dict(name=rfc.canonical_name())) self.assertTrue(received['previous'].endswith('00')) + def do_rfc_with_broken_history_test(self, draft_name): + draft = WgDraftFactory(rev='10', name=draft_name) + draft.docalias.create(name=f'rfc{self.next_rfc_number():04}') + draft.set_state(State.objects.get(type_id='draft',slug='rfc')) + draft.set_state(State.objects.get(type_id='draft-iesg', slug='pub')) + draft = reload_db_objects(draft) + rfc = draft + received = self.getJson(dict(name=draft.name)) + self.assertEqual( + received, + dict( + content_url=rfc.get_href(), + name=rfc.canonical_name(), + previous=f'{draft.name}-10', + ), + 'RFC by draft name without rev should return canonical RFC name and no rev', + ) + + received = self.getJson(dict(name=draft.name, rev='10')) + self.assertEqual(received['name'], draft.name, 'RFC by draft name with rev should return draft name') + self.assertEqual(received['rev'], '10', 'Requested rev should be returned') + self.assertEqual(received['previous'], f'{draft.name}-09', 'Previous rev is one less than requested') + self.assertIn(f'{draft.name}-10', received['content_url'], 'Returned URL should include requested rev') + self.assertNotIn('warning', received, 'No warning when we have the rev requested') + + received = self.getJson(dict(name=f'{draft.name}-09')) + self.assertEqual(received['name'], draft.name, 'RFC by draft name with rev should return draft name') + self.assertEqual(received['rev'], '09', 'Requested rev should be returned') + self.assertEqual(received['previous'], f'{draft.name}-08', 'Previous rev is one less than requested') + self.assertIn(f'{draft.name}-09', received['content_url'], 'Returned URL should include requested rev') + self.assertEqual( + received['warning'], + 'History for this version not found - these results are speculation', + 'Warning should be issued when requested rev is not found' + ) + + def test_rfc_with_broken_history(self): + # simple draft name + self.do_rfc_with_broken_history_test(draft_name='draft-some-draft') + # tricky draft names + self.do_rfc_with_broken_history_test(draft_name='draft-gizmo-01') + self.do_rfc_with_broken_history_test(draft_name='draft-oh-boy-what-a-draft-02-03') diff --git a/ietf/doc/tests_utils.py b/ietf/doc/tests_utils.py index 165594cf4..d626c9c34 100644 --- a/ietf/doc/tests_utils.py +++ b/ietf/doc/tests_utils.py @@ -8,9 +8,9 @@ from ietf.name.models import DocTagName from ietf.person.factories import PersonFactory from ietf.utils.test_utils import TestCase from ietf.person.models import Person -from ietf.doc.factories import DocumentFactory -from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor -from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors +from ietf.doc.factories import DocumentFactory, WgRfcFactory +from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor, Document +from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors, fuzzy_find_documents class ActionHoldersTests(TestCase): @@ -239,4 +239,49 @@ class MiscTests(TestCase): self.assertIn('cleared country (was "USA")', events[0].desc) docauth = doc.documentauthor_set.first() self.assertEqual(docauth.affiliation, '') - self.assertEqual(docauth.country, '') \ No newline at end of file + self.assertEqual(docauth.country, '') + + def do_fuzzy_find_documents_rfc_test(self, name): + rfc = WgRfcFactory(name=name, create_revisions=(0, 1, 2)) + rfc = Document.objects.get(pk=rfc.pk) # clear out any cached values + + # by canonical name + found = fuzzy_find_documents(rfc.canonical_name(), None) + self.assertCountEqual(found.documents, [rfc]) + self.assertEqual(found.matched_rev, None) + self.assertEqual(found.matched_name, rfc.canonical_name()) + + # by draft name, no rev + found = fuzzy_find_documents(rfc.name, None) + self.assertCountEqual(found.documents, [rfc]) + self.assertEqual(found.matched_rev, None) + self.assertEqual(found.matched_name, rfc.name) + + # by draft name, latest rev + found = fuzzy_find_documents(rfc.name, '02') + self.assertCountEqual(found.documents, [rfc]) + self.assertEqual(found.matched_rev, '02') + self.assertEqual(found.matched_name, rfc.name) + + # by draft name, earlier rev + found = fuzzy_find_documents(rfc.name, '01') + self.assertCountEqual(found.documents, [rfc]) + self.assertEqual(found.matched_rev, '01') + self.assertEqual(found.matched_name, rfc.name) + + # wrong name or revision + found = fuzzy_find_documents(rfc.name + '-incorrect') + self.assertCountEqual(found.documents, [], 'Should not find document that does not match') + found = fuzzy_find_documents(rfc.name + '-incorrect', '02') + self.assertCountEqual(found.documents, [], 'Still should not find document, even with a version') + found = fuzzy_find_documents(rfc.name, '22') + self.assertCountEqual(found.documents, [rfc], + 'Should find document even if rev does not exist') + + + def test_fuzzy_find_documents(self): + # Should add additional tests/test cases for other document types/name formats + self.do_fuzzy_find_documents_rfc_test('draft-normal-name') + self.do_fuzzy_find_documents_rfc_test('draft-name-with-number-01') + self.do_fuzzy_find_documents_rfc_test('draft-name-that-has-two-02-04') + self.do_fuzzy_find_documents_rfc_test('draft-wild-01-numbers-0312') diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 5ebcc8124..d266cdbf9 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -11,7 +11,7 @@ import os import re import textwrap -from collections import defaultdict +from collections import defaultdict, namedtuple from urllib.parse import quote from django.conf import settings @@ -1286,3 +1286,39 @@ def generate_idnits2_rfcs_obsoleted(): obsdict[k] = sorted(obsdict[k]) return render_to_string('doc/idnits2-rfcs-obsoleted.txt', context={'obsitems':sorted(obsdict.items())}) + +def fuzzy_find_documents(name, rev=None): + """Find a document based on name/rev + + Applies heuristics, assuming the inputs were joined by a '-' that may have been misplaced. + If returned documents queryset is empty, matched_rev and and matched_name are meaningless. + The rev input is not validated - it is used to find possible names if the name input does + not match anything, but matched_rev may not correspond to an actual version of the found + document. + """ + # Handle special case name formats + if name.startswith('rfc0'): + name = "rfc" + name[3:].lstrip('0') + if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name): + name = "%s-%s" % (name, rev) + rev = None + if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev): + name = "%s-%s" % (name, rev[:-3]) + rev = rev[-2:] + if re.match("^[0-9]+$", name): + name = f'rfc{name}' + if re.match("^[Rr][Ff][Cc] [0-9]+$",name): + name = f'rfc{name[4:]}' + + # see if we can find a document using this name + docs = Document.objects.filter(docalias__name=name, type_id='draft') + if rev and not docs.exists(): + # No document found, see if the name/rev split has been misidentified. + # Handles some special cases, like draft-ietf-tsvwg-ieee-802-11. + name = '%s-%s' % (name, rev) + docs = Document.objects.filter(docalias__name=name, type_id='draft') + if docs.exists(): + rev = None # found a doc by name with rev = None, so update that + + FoundDocuments = namedtuple('FoundDocuments', 'documents matched_name matched_rev') + return FoundDocuments(docs, name, rev) diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index f255219b9..f2ccb3ec9 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -64,7 +64,7 @@ from ietf.doc.utils import (add_links_in_new_revision_events, augment_events_wit get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus, add_events_message_info, get_unicode_document_content, build_doc_meta_block, augment_docs_and_user_with_user_info, irsg_needed_ballot_positions, add_action_holder_change_event, - build_doc_supermeta_block, build_file_urls, update_documentauthors) + build_doc_supermeta_block, build_file_urls, update_documentauthors, fuzzy_find_documents) from ietf.doc.utils_bofreq import bofreq_editors, bofreq_responsible from ietf.group.models import Role, Group from ietf.group.utils import can_manage_all_groups_of_type, can_manage_materials, group_features_role_filter @@ -721,40 +721,26 @@ def document_main(request, name, rev=None): def document_html(request, name, rev=None): - if name.startswith('rfc0'): - name = "rfc" + name[3:].lstrip('0') - if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name): - name = "%s-%s" % (name, rev) - if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev): - name = "%s-%s" % (name, rev[:-3]) - rev = rev[-2:] - if re.match("^[0-9]+$", name): - return redirect('ietf.doc.views_doc.document_html',name=f'rfc{name}') - if re.match("^[Rr][Ff][Cc] [0-9]+$",name): - return redirect('ietf.doc.views_doc.document_html',name=f'rfc{name[4:]}') - docs = Document.objects.filter(docalias__name=name) - if rev and not docs.exists(): - # handle some special cases, like draft-ietf-tsvwg-ieee-802-11 - name = '%s-%s' % (name, rev) - rev=None - docs = Document.objects.filter(docalias__name=name) - if not docs.exists(): + found = fuzzy_find_documents(name, rev) + num_found = found.documents.count() + if num_found == 0: raise Http404("Document not found: %s" % name) - if docs.count() > 1: + if num_found > 1: raise Http404("Multiple documents matched: %s" % name) - doc = docs.get() + if found.matched_name.startswith('rfc') and name != found.matched_name: + return redirect('ietf.doc.views_doc.document_html', name=found.matched_name) + + doc = found.documents.get() if not os.path.exists(doc.get_file_name()): raise Http404("File not found: %s" % doc.get_file_name()) - if not rev and not name.startswith('rfc'): + if found.matched_rev or found.matched_name.startswith('rfc'): + rev = found.matched_rev + else: rev = doc.rev if rev: - docs = DocHistory.objects.filter(doc=doc, rev=rev) - if docs.exists(): - doc = docs.first() - else: - doc = doc.fake_history_obj(rev) + doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev) if doc.type_id in ['draft',]: doc.supermeta = build_doc_supermeta_block(doc) doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX) @@ -1794,44 +1780,30 @@ def idnits2_state(request, name, rev=None): return render(request, 'doc/idnits2-state.txt', context={'doc':doc}, content_type='text/plain;charset=utf-8') def find_doc_for_rfcdiff(name, rev): - if name.startswith('rfc0'): - name = "rfc" + name[3:].lstrip('0') - if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name): - name = "%s-%s" % (name, rev) - if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev): - name = "%s-%s" % (name, rev[:-3]) - rev = rev[-2:] - if re.match("^[0-9]+$", name): - name = f'rfc{name}' - if re.match("^[Rr][Ff][Cc] [0-9]+$",name): - name = f'rfc{name[4:]}' - - docs = Document.objects.filter(docalias__name=name, type_id='draft') - if rev and not docs.exists(): - # handle some special cases, like draft-ietf-tsvwg-ieee-802-11 - name = '%s-%s' % (name, rev) - rev=None - docs = Document.objects.filter(docalias__name=name, type_id='draft') + """rfcdiff lookup heuristics + Returns a tuple with: + [0] - condition string + [1] - document found (or None) + [2] - historic version + [3] - revision actually found (may differ from :rev: input) + """ + found = fuzzy_find_documents(name, rev) condition = 'no such document' - if not docs.exists() or docs.count() > 1: - return (condition, None, None) - doc = docs.get() - if not rev or (rev and doc.rev==rev): + if found.documents.count() != 1: + return (condition, None, None, rev) + doc = found.documents.get() + if found.matched_rev is None or doc.rev == found.matched_rev: condition = 'current version' - return condition, doc, None + return (condition, doc, None, found.matched_rev) else: - candidate = None - for h in doc.history_set.order_by("-time"): - if rev == h.rev: - candidate = h - break + candidate = doc.history_set.filter(rev=found.matched_rev).order_by("-time").first() if candidate: condition = 'historic version' - return condition, doc, candidate + return (condition, doc, candidate, found.matched_rev) else: condition = 'version dochistory not found' - return condition, doc, None + return (condition, doc, None, found.matched_rev) # This is a proof of concept of a service that would redirect to the current revision # def rfcdiff_latest(request, name, rev=None): @@ -1886,17 +1858,20 @@ HAS_TOMBSTONE = [ def rfcdiff_latest_json(request, name, rev=None): response = dict() - condition, document, history = find_doc_for_rfcdiff(name, rev) + condition, document, history, found_rev = find_doc_for_rfcdiff(name, rev) if condition == 'no such document': raise Http404 elif condition in ('historic version', 'current version'): doc = history if history else document - if not rev and doc.is_rfc(): + if not found_rev and doc.is_rfc(): response['content_url'] = doc.get_href() response['name']=doc.canonical_name() if doc.name != doc.canonical_name(): prev_rev = doc.rev + # not sure what to do if non-numeric values come back, so at least log it + log.assertion('doc.rfc_number().isdigit()') + log.assertion('doc.rev.isdigit()') if int(doc.rfc_number()) in HAS_TOMBSTONE and prev_rev != '00': prev_rev = f'{(int(doc.rev)-1):02d}' response['previous'] = f'{doc.name}-{prev_rev}' @@ -1915,16 +1890,20 @@ def rfcdiff_latest_json(request, name, rev=None): if match and match.group(2): response['previous'] = f'rfc{match.group(2)}' else: + # not sure what to do if non-numeric values come back, so at least log it + log.assertion('doc.rev.isdigit()') response['previous'] = f'{doc.name}-{(int(doc.rev)-1):02d}' elif condition == 'version dochistory not found': response['warning'] = 'History for this version not found - these results are speculation' response['name'] = document.name - response['rev'] = rev - document.rev = rev + response['rev'] = found_rev + document.rev = found_rev document.is_rfc = lambda: False response['content_url'] = document.get_href() - if int(rev)>0: - response['previous'] = f'{document.name}-{(int(rev)-1):02d}' + # not sure what to do if non-numeric values come back, so at least log it + log.assertion('found_rev.isdigit()') + if int(found_rev) > 0: + response['previous'] = f'{document.name}-{(int(found_rev)-1):02d}' else: match = re.search("-(rfc)?([0-9][0-9][0-9]+)bis(-.*)?$", name) if match and match.group(2):