feat: cache file investigation results (#8459)

* feat: cache result of investigate_fragment

* test: test caching
This commit is contained in:
Jennifer Richards 2025-01-22 14:19:21 -04:00 committed by GitHub
parent 9a72cc0d6a
commit 266d5bed3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 80 additions and 22 deletions

View file

@ -5,6 +5,8 @@
import os import os
import datetime import datetime
import io import io
from hashlib import sha384
from django.http import HttpRequest from django.http import HttpRequest
import lxml import lxml
import bibtexparser import bibtexparser
@ -3280,6 +3282,41 @@ class InvestigateTests(TestCase):
"draft-this-should-not-be-possible-00.txt", "draft-this-should-not-be-possible-00.txt",
) )
@mock.patch("ietf.doc.utils.caches")
def test_investigate_fragment_cache(self, mock_caches):
"""investigate_fragment should cache its result"""
mock_default_cache = mock_caches["default"]
mock_default_cache.get.return_value = None # disable cache
result = investigate_fragment("this-is-active")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)
self.assertTrue(mock_default_cache.get.called)
self.assertTrue(mock_default_cache.set.called)
expected_key = f"investigate_fragment:{sha384(b'this-is-active').hexdigest()}"
self.assertEqual(mock_default_cache.set.call_args.kwargs["key"], expected_key)
cached_value = mock_default_cache.set.call_args.kwargs["value"] # hang on to this
mock_default_cache.reset_mock()
# Check that a cached value is used
mock_default_cache.get.return_value = cached_value
with mock.patch("ietf.doc.utils.Path") as mock_path:
result = investigate_fragment("this-is-active")
# Check that we got the same results
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)
# And that we used the cache
self.assertFalse(mock_path.called) # a proxy for "did the method do any real work"
self.assertTrue(mock_default_cache.get.called)
self.assertEqual(mock_default_cache.get.call_args, mock.call(expected_key))
def test_investigate_get(self): def test_investigate_get(self):
"""GET with no querystring should retrieve the investigate UI""" """GET with no querystring should retrieve the investigate UI"""
url = urlreverse("ietf.doc.views_doc.investigate") url = urlreverse("ietf.doc.views_doc.investigate")

View file

@ -11,12 +11,14 @@ import textwrap
from collections import defaultdict, namedtuple, Counter from collections import defaultdict, namedtuple, Counter
from dataclasses import dataclass from dataclasses import dataclass
from hashlib import sha384
from pathlib import Path from pathlib import Path
from typing import Iterator, Optional, Union from typing import Iterator, Optional, Union
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from django.conf import settings from django.conf import settings
from django.contrib import messages from django.contrib import messages
from django.core.cache import caches
from django.db.models import OuterRef from django.db.models import OuterRef
from django.forms import ValidationError from django.forms import ValidationError
from django.http import Http404 from django.http import Http404
@ -1459,29 +1461,48 @@ def get_doc_email_aliases(name: Optional[str] = None):
return sorted(aliases, key=lambda a: (a["doc_name"])) return sorted(aliases, key=lambda a: (a["doc_name"]))
def investigate_fragment(name_fragment): def investigate_fragment(name_fragment: str):
can_verify = set() cache = caches["default"]
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]: # Ensure name_fragment does not interact badly with the cache key handling
can_verify.update(list(Path(root).glob(f"*{name_fragment}*"))) name_digest = sha384(name_fragment.encode("utf8")).hexdigest()
archive_verifiable_names = set([p.name for p in can_verify]) cache_key = f"investigate_fragment:{name_digest}"
# Can also verify drafts in proceedings directories cached_result = cache.get(cache_key)
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*"))) if cached_result is not None:
can_verify = cached_result["can_verify"]
# N.B. This reflects the assumption that the internet draft archive dir is in the unverifiable_collections = cached_result["unverifiable_collections"]
# a directory with other collections (at /a/ietfdata/draft/collections as this is written) unexpected = cached_result["unexpected"]
unverifiable_collections = set([ else:
p for p in can_verify = set()
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*") for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
if p.name not in archive_verifiable_names can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
]) archive_verifiable_names = set([p.name for p in can_verify])
# Can also verify drafts in proceedings directories
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
unverifiable_collections.difference_update(can_verify) # N.B. This reflects the assumption that the internet draft archive dir is in the
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)]) unverifiable_collections = set([
maybe_unexpected = list( p for p in
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*") Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
) if p.name not in archive_verifiable_names
unexpected = [p for p in maybe_unexpected if p.name not in expected_names] ])
unverifiable_collections.difference_update(can_verify)
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
maybe_unexpected = list(
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
)
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
cache.set(
key=cache_key,
timeout=3600, # 1 hour
value={
"can_verify": can_verify,
"unverifiable_collections": unverifiable_collections,
"unexpected": unexpected,
}
)
return dict( return dict(
can_verify=can_verify, can_verify=can_verify,