From 266d5bed3c2e0d2004b12e9d02dbc209f8295cad Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 22 Jan 2025 14:19:21 -0400 Subject: [PATCH] feat: cache file investigation results (#8459) * feat: cache result of investigate_fragment * test: test caching --- ietf/doc/tests.py | 37 +++++++++++++++++++++++++++ ietf/doc/utils.py | 65 +++++++++++++++++++++++++++++++---------------- 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index f5af7bb48..d74688f3f 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -5,6 +5,8 @@ import os import datetime import io +from hashlib import sha384 + from django.http import HttpRequest import lxml import bibtexparser @@ -3280,6 +3282,41 @@ class InvestigateTests(TestCase): "draft-this-should-not-be-possible-00.txt", ) + @mock.patch("ietf.doc.utils.caches") + def test_investigate_fragment_cache(self, mock_caches): + """investigate_fragment should cache its result""" + mock_default_cache = mock_caches["default"] + mock_default_cache.get.return_value = None # disable cache + result = investigate_fragment("this-is-active") + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-this-is-active-00.txt" + ) + self.assertTrue(mock_default_cache.get.called) + self.assertTrue(mock_default_cache.set.called) + expected_key = f"investigate_fragment:{sha384(b'this-is-active').hexdigest()}" + self.assertEqual(mock_default_cache.set.call_args.kwargs["key"], expected_key) + cached_value = mock_default_cache.set.call_args.kwargs["value"] # hang on to this + mock_default_cache.reset_mock() + + # Check that a cached value is used + mock_default_cache.get.return_value = cached_value + with mock.patch("ietf.doc.utils.Path") as mock_path: + result = investigate_fragment("this-is-active") + # Check that we got the same results + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-this-is-active-00.txt" + ) + # And that we used the cache + self.assertFalse(mock_path.called) # a proxy for "did the method do any real work" + self.assertTrue(mock_default_cache.get.called) + self.assertEqual(mock_default_cache.get.call_args, mock.call(expected_key)) + def test_investigate_get(self): """GET with no querystring should retrieve the investigate UI""" url = urlreverse("ietf.doc.views_doc.investigate") diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index ff19dfbde..53307526a 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -11,12 +11,14 @@ import textwrap from collections import defaultdict, namedtuple, Counter from dataclasses import dataclass +from hashlib import sha384 from pathlib import Path from typing import Iterator, Optional, Union from zoneinfo import ZoneInfo from django.conf import settings from django.contrib import messages +from django.core.cache import caches from django.db.models import OuterRef from django.forms import ValidationError from django.http import Http404 @@ -1459,29 +1461,48 @@ def get_doc_email_aliases(name: Optional[str] = None): return sorted(aliases, key=lambda a: (a["doc_name"])) -def investigate_fragment(name_fragment): - can_verify = set() - for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]: - can_verify.update(list(Path(root).glob(f"*{name_fragment}*"))) - archive_verifiable_names = set([p.name for p in can_verify]) - # Can also verify drafts in proceedings directories - can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*"))) - - # N.B. This reflects the assumption that the internet draft archive dir is in the - # a directory with other collections (at /a/ietfdata/draft/collections as this is written) - unverifiable_collections = set([ - p for p in - Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*") - if p.name not in archive_verifiable_names - ]) +def investigate_fragment(name_fragment: str): + cache = caches["default"] + # Ensure name_fragment does not interact badly with the cache key handling + name_digest = sha384(name_fragment.encode("utf8")).hexdigest() + cache_key = f"investigate_fragment:{name_digest}" + cached_result = cache.get(cache_key) + if cached_result is not None: + can_verify = cached_result["can_verify"] + unverifiable_collections = cached_result["unverifiable_collections"] + unexpected = cached_result["unexpected"] + else: + can_verify = set() + for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]: + can_verify.update(list(Path(root).glob(f"*{name_fragment}*"))) + archive_verifiable_names = set([p.name for p in can_verify]) + # Can also verify drafts in proceedings directories + can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*"))) - unverifiable_collections.difference_update(can_verify) - - expected_names = set([p.name for p in can_verify.union(unverifiable_collections)]) - maybe_unexpected = list( - Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*") - ) - unexpected = [p for p in maybe_unexpected if p.name not in expected_names] + # N.B. This reflects the assumption that the internet draft archive dir is in the + # a directory with other collections (at /a/ietfdata/draft/collections as this is written) + unverifiable_collections = set([ + p for p in + Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*") + if p.name not in archive_verifiable_names + ]) + + unverifiable_collections.difference_update(can_verify) + + expected_names = set([p.name for p in can_verify.union(unverifiable_collections)]) + maybe_unexpected = list( + Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*") + ) + unexpected = [p for p in maybe_unexpected if p.name not in expected_names] + cache.set( + key=cache_key, + timeout=3600, # 1 hour + value={ + "can_verify": can_verify, + "unverifiable_collections": unverifiable_collections, + "unexpected": unexpected, + } + ) return dict( can_verify=can_verify,