* feat: basic blobstore infrastructure for dev * refactor: (broken) attempt to put minio console behind nginx * feat: initialize blobstore with boto3 * fix: abandon attempt to proxy minio. Use docker compose instead. * feat: beginning of blob writes * feat: storage utilities * feat: test buckets * chore: black * chore: remove unused import * chore: avoid f string when not needed * fix: inform all settings files about blobstores * fix: declare types for some settings * ci: point to new target base * ci: adjust test workflow * fix: give the tests debug environment a blobstore * fix: "better" name declarations * ci: use devblobstore container * chore: identify places to write to blobstorage * chore: remove unreachable code * feat: store materials * feat: store statements * feat: store status changes * feat: store liaison attachments * feat: store agendas provided with Interim session requests * chore: capture TODOs * feat: store polls and chatlogs * chore: remove unneeded TODO * feat: store drafts on submit and post * fix: handle storage during doc expiration and resurrection * fix: mirror an unlink * chore: add/refine TODOs * feat: store slide submissions * fix: structure slide test correctly * fix: correct sense of existence check * feat: store some indexes * feat: BlobShadowFileSystemStorage * feat: shadow floorplans / host logos to the blob * chore: remove unused import * feat: strip path from blob shadow names * feat: shadow photos / thumbs * refactor: combine photo and photothumb blob kinds The photos / thumbs were already dropped in the same directory, so let's not add a distinction at this point. * style: whitespace * refactor: use kwargs consistently * chore: migrations * refactor: better deconstruct(); rebuild migrations * fix: use new class in mack patch * chore: add TODO * feat: store group index documents * chore: identify more TODO * feat: store reviews * fix: repair merge * chore: remove unnecessary TODO * feat: StoredObject metadata * fix: deburr some debugging code * fix: only set the deleted timestamp once * chore: correct typo * fix: get_or_create vs get and test * fix: avoid the questionable is_seekable helper * chore: capture future design consideration * chore: blob store cfg for k8s * chore: black * chore: copyright * ci: bucket name prefix option + run Black Adds/uses DATATRACKER_BLOB_STORE_BUCKET_PREFIX option. Other changes are just Black styling. * ci: fix typo in bucket name expression * chore: parameters in app-configure-blobstore Allows use with other blob stores. * ci: remove verify=False option * fix: don't return value from __init__ * feat: option to log timing of S3Storage calls * chore: units * fix: deleted->null when storing a file * style: Black * feat: log as JSON; refactor to share code; handle exceptions * ci: add ietf_log_blob_timing option for k8s * test: --no-manage-blobstore option for running tests * test: use blob store settings from env, if set * test: actually set a couple more storage opts * feat: offswitch (#8541) * feat: offswitch * fix: apply ENABLE_BLOBSTORAGE to BlobShadowFileSystemStorage behavior * chore: log timing of blob reads * chore: import Config from botocore.config * chore(deps): import boto3-stubs / botocore botocore is implicitly imported, but make it explicit since we refer to it directly * chore: drop type annotation that mypy loudly ignores * refactor: add storage methods via mixin Shares code between Document and DocHistory without putting it in the base DocumentInfo class, which lacks the name field. Also makes mypy happy. * feat: add timeout / retry limit to boto client * ci: let k8s config the timeouts via env * chore: repair merge resolution typo * chore: tweak settings imports * chore: simplify k8s/settings_local.py imports --------- Co-authored-by: Jennifer Richards <jennifer@staff.ietf.org>
193 lines
7.2 KiB
Python
193 lines
7.2 KiB
Python
# Copyright The IETF Trust 2025, All Rights Reserved
|
|
|
|
import debug # pyflakes:ignore
|
|
import json
|
|
|
|
from contextlib import contextmanager
|
|
from hashlib import sha384
|
|
from io import BufferedReader
|
|
from storages.backends.s3 import S3Storage
|
|
from typing import Optional, Union
|
|
|
|
from django.core.files.base import File
|
|
|
|
from ietf.doc.models import StoredObject
|
|
from ietf.utils.log import log
|
|
from ietf.utils.timezone import timezone
|
|
|
|
|
|
@contextmanager
|
|
def maybe_log_timing(enabled, op, **kwargs):
|
|
"""If enabled, log elapsed time and additional data from kwargs
|
|
|
|
Emits log even if an exception occurs
|
|
"""
|
|
before = timezone.now()
|
|
exception = None
|
|
try:
|
|
yield
|
|
except Exception as err:
|
|
exception = err
|
|
raise
|
|
finally:
|
|
if enabled:
|
|
dt = timezone.now() - before
|
|
log(
|
|
json.dumps(
|
|
{
|
|
"log": "S3Storage_timing",
|
|
"seconds": dt.total_seconds(),
|
|
"op": op,
|
|
"exception": "" if exception is None else repr(exception),
|
|
**kwargs,
|
|
}
|
|
)
|
|
)
|
|
|
|
|
|
# TODO-BLOBSTORE
|
|
# Consider overriding save directly so that
|
|
# we capture metadata for, e.g., ImageField objects
|
|
class CustomS3Storage(S3Storage):
|
|
|
|
def __init__(self, **settings):
|
|
self.in_flight_custom_metadata = {} # type is Dict[str, Dict[str, str]]
|
|
super().__init__(**settings)
|
|
|
|
def get_default_settings(self):
|
|
# add a default for the ietf_log_blob_timing boolean
|
|
return super().get_default_settings() | {"ietf_log_blob_timing": False}
|
|
|
|
def _save(self, name, content):
|
|
with maybe_log_timing(
|
|
self.ietf_log_blob_timing, "_save", bucket_name=self.bucket_name, name=name
|
|
):
|
|
return super()._save(name, content)
|
|
|
|
def _open(self, name, mode="rb"):
|
|
with maybe_log_timing(
|
|
self.ietf_log_blob_timing,
|
|
"_open",
|
|
bucket_name=self.bucket_name,
|
|
name=name,
|
|
mode=mode,
|
|
):
|
|
return super()._open(name, mode)
|
|
|
|
def delete(self, name):
|
|
with maybe_log_timing(
|
|
self.ietf_log_blob_timing, "delete", bucket_name=self.bucket_name, name=name
|
|
):
|
|
super().delete(name)
|
|
|
|
def store_file(
|
|
self,
|
|
kind: str,
|
|
name: str,
|
|
file: Union[File, BufferedReader],
|
|
allow_overwrite: bool = False,
|
|
doc_name: Optional[str] = None,
|
|
doc_rev: Optional[str] = None,
|
|
):
|
|
is_new = not self.exists_in_storage(kind, name)
|
|
# debug.show('f"Asked to store {name} in {kind}: is_new={is_new}, allow_overwrite={allow_overwrite}"')
|
|
if not allow_overwrite and not is_new:
|
|
log(f"Failed to save {kind}:{name} - name already exists in store")
|
|
debug.show('f"Failed to save {kind}:{name} - name already exists in store"')
|
|
# raise Exception("Not ignoring overwrite attempts while testing")
|
|
else:
|
|
try:
|
|
new_name = self.save(name, file)
|
|
now = timezone.now()
|
|
record, created = StoredObject.objects.get_or_create(
|
|
store=kind,
|
|
name=name,
|
|
defaults=dict(
|
|
sha384=self.in_flight_custom_metadata[name]["sha384"],
|
|
len=int(self.in_flight_custom_metadata[name]["len"]),
|
|
store_created=now,
|
|
created=now,
|
|
modified=now,
|
|
doc_name=doc_name, # Note that these are assumed to be invariant
|
|
doc_rev=doc_rev, # for a given name
|
|
),
|
|
)
|
|
if not created:
|
|
record.sha384 = self.in_flight_custom_metadata[name]["sha384"]
|
|
record.len = int(self.in_flight_custom_metadata[name]["len"])
|
|
record.modified = now
|
|
record.deleted = None
|
|
record.save()
|
|
if new_name != name:
|
|
complaint = f"Error encountered saving '{name}' - results stored in '{new_name}' instead."
|
|
log(complaint)
|
|
debug.show("complaint")
|
|
# Note that we are otherwise ignoring this condition - it should become an error later.
|
|
except Exception as e:
|
|
# Log and then swallow the exception while we're learning.
|
|
# Don't let failure pass so quietly when these are the autoritative bits.
|
|
complaint = f"Failed to save {kind}:{name}"
|
|
log(complaint, e)
|
|
debug.show('f"{complaint}: {e}"')
|
|
finally:
|
|
del self.in_flight_custom_metadata[name]
|
|
return None
|
|
|
|
def exists_in_storage(self, kind: str, name: str) -> bool:
|
|
try:
|
|
# open is realized with a HEAD
|
|
# See https://github.com/jschneier/django-storages/blob/b79ea310201e7afd659fe47e2882fe59aae5b517/storages/backends/s3.py#L528
|
|
with self.open(name):
|
|
return True
|
|
except FileNotFoundError:
|
|
return False
|
|
|
|
def remove_from_storage(
|
|
self, kind: str, name: str, warn_if_missing: bool = True
|
|
) -> None:
|
|
now = timezone.now()
|
|
try:
|
|
with self.open(name):
|
|
pass
|
|
self.delete(name)
|
|
# debug.show('f"deleted {name} from {kind} storage"')
|
|
except FileNotFoundError:
|
|
if warn_if_missing:
|
|
complaint = (
|
|
f"WARNING: Asked to delete non-existent {name} from {kind} storage"
|
|
)
|
|
log(complaint)
|
|
debug.show("complaint")
|
|
existing_record = StoredObject.objects.filter(store=kind, name=name)
|
|
if not existing_record.exists() and warn_if_missing:
|
|
complaint = f"WARNING: Asked to delete {name} from {kind} storage, but there was no matching StorageObject"
|
|
log(complaint)
|
|
debug.show("complaint")
|
|
else:
|
|
# Note that existing_record is a queryset that will have one matching object
|
|
existing_record.filter(deleted__isnull=True).update(deleted=now)
|
|
|
|
def _get_write_parameters(self, name, content=None):
|
|
# debug.show('f"getting write parameters for {name}"')
|
|
params = super()._get_write_parameters(name, content)
|
|
if "Metadata" not in params:
|
|
params["Metadata"] = {}
|
|
try:
|
|
content.seek(0)
|
|
except AttributeError: # TODO-BLOBSTORE
|
|
debug.say("Encountered Non-Seekable content")
|
|
raise NotImplementedError("cannot handle unseekable content")
|
|
content_bytes = content.read()
|
|
if not isinstance(
|
|
content_bytes, bytes
|
|
): # TODO-BLOBSTORE: This is sketch-development only -remove before committing
|
|
raise Exception(f"Expected bytes - got {type(content_bytes)}")
|
|
content.seek(0)
|
|
metadata = {
|
|
"len": f"{len(content_bytes)}",
|
|
"sha384": f"{sha384(content_bytes).hexdigest()}",
|
|
}
|
|
params["Metadata"].update(metadata)
|
|
self.in_flight_custom_metadata[name] = metadata
|
|
return params
|