feat: cache file investigation results (#8459)

* feat: cache result of investigate_fragment

* test: test caching
This commit is contained in:
Jennifer Richards 2025-01-22 14:19:21 -04:00 committed by GitHub
parent 9a72cc0d6a
commit 266d5bed3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 80 additions and 22 deletions

View file

@ -5,6 +5,8 @@
import os
import datetime
import io
from hashlib import sha384
from django.http import HttpRequest
import lxml
import bibtexparser
@ -3280,6 +3282,41 @@ class InvestigateTests(TestCase):
"draft-this-should-not-be-possible-00.txt",
)
@mock.patch("ietf.doc.utils.caches")
def test_investigate_fragment_cache(self, mock_caches):
"""investigate_fragment should cache its result"""
mock_default_cache = mock_caches["default"]
mock_default_cache.get.return_value = None # disable cache
result = investigate_fragment("this-is-active")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)
self.assertTrue(mock_default_cache.get.called)
self.assertTrue(mock_default_cache.set.called)
expected_key = f"investigate_fragment:{sha384(b'this-is-active').hexdigest()}"
self.assertEqual(mock_default_cache.set.call_args.kwargs["key"], expected_key)
cached_value = mock_default_cache.set.call_args.kwargs["value"] # hang on to this
mock_default_cache.reset_mock()
# Check that a cached value is used
mock_default_cache.get.return_value = cached_value
with mock.patch("ietf.doc.utils.Path") as mock_path:
result = investigate_fragment("this-is-active")
# Check that we got the same results
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)
# And that we used the cache
self.assertFalse(mock_path.called) # a proxy for "did the method do any real work"
self.assertTrue(mock_default_cache.get.called)
self.assertEqual(mock_default_cache.get.call_args, mock.call(expected_key))
def test_investigate_get(self):
"""GET with no querystring should retrieve the investigate UI"""
url = urlreverse("ietf.doc.views_doc.investigate")

View file

@ -11,12 +11,14 @@ import textwrap
from collections import defaultdict, namedtuple, Counter
from dataclasses import dataclass
from hashlib import sha384
from pathlib import Path
from typing import Iterator, Optional, Union
from zoneinfo import ZoneInfo
from django.conf import settings
from django.contrib import messages
from django.core.cache import caches
from django.db.models import OuterRef
from django.forms import ValidationError
from django.http import Http404
@ -1459,29 +1461,48 @@ def get_doc_email_aliases(name: Optional[str] = None):
return sorted(aliases, key=lambda a: (a["doc_name"]))
def investigate_fragment(name_fragment):
can_verify = set()
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
archive_verifiable_names = set([p.name for p in can_verify])
# Can also verify drafts in proceedings directories
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
# N.B. This reflects the assumption that the internet draft archive dir is in the
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
unverifiable_collections = set([
p for p in
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
if p.name not in archive_verifiable_names
])
def investigate_fragment(name_fragment: str):
cache = caches["default"]
# Ensure name_fragment does not interact badly with the cache key handling
name_digest = sha384(name_fragment.encode("utf8")).hexdigest()
cache_key = f"investigate_fragment:{name_digest}"
cached_result = cache.get(cache_key)
if cached_result is not None:
can_verify = cached_result["can_verify"]
unverifiable_collections = cached_result["unverifiable_collections"]
unexpected = cached_result["unexpected"]
else:
can_verify = set()
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
archive_verifiable_names = set([p.name for p in can_verify])
# Can also verify drafts in proceedings directories
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
unverifiable_collections.difference_update(can_verify)
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
maybe_unexpected = list(
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
)
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
# N.B. This reflects the assumption that the internet draft archive dir is in the
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
unverifiable_collections = set([
p for p in
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
if p.name not in archive_verifiable_names
])
unverifiable_collections.difference_update(can_verify)
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
maybe_unexpected = list(
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
)
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
cache.set(
key=cache_key,
timeout=3600, # 1 hour
value={
"can_verify": can_verify,
"unverifiable_collections": unverifiable_collections,
"unexpected": unexpected,
}
)
return dict(
can_verify=can_verify,