datatracker/ietf/idindex/tasks.py
Robert Sparks 997239a2ea
feat: write objects to blob storage (#8557)
* feat: basic blobstore infrastructure for dev

* refactor: (broken) attempt to put minio console behind nginx

* feat: initialize blobstore with boto3

* fix: abandon attempt to proxy minio. Use docker compose instead.

* feat: beginning of blob writes

* feat: storage utilities

* feat: test buckets

* chore: black

* chore: remove unused import

* chore: avoid f string when not needed

* fix: inform all settings files about blobstores

* fix: declare types for some settings

* ci: point to new target base

* ci: adjust test workflow

* fix: give the tests debug environment a blobstore

* fix: "better" name declarations

* ci: use devblobstore container

* chore: identify places to write to blobstorage

* chore: remove unreachable code

* feat: store materials

* feat: store statements

* feat: store status changes

* feat: store liaison attachments

* feat: store agendas provided with Interim session requests

* chore: capture TODOs

* feat: store polls and chatlogs

* chore: remove unneeded TODO

* feat: store drafts on submit and post

* fix: handle storage during doc expiration and resurrection

* fix: mirror an unlink

* chore: add/refine TODOs

* feat: store slide submissions

* fix: structure slide test correctly

* fix: correct sense of existence check

* feat: store some indexes

* feat: BlobShadowFileSystemStorage

* feat: shadow floorplans / host logos to the blob

* chore: remove unused import

* feat: strip path from blob shadow names

* feat: shadow photos / thumbs

* refactor: combine photo and photothumb blob kinds

The photos / thumbs were already dropped in the same
directory, so let's not add a distinction at this point.

* style: whitespace

* refactor: use kwargs consistently

* chore: migrations

* refactor: better deconstruct(); rebuild migrations

* fix: use new class in mack patch

* chore: add TODO

* feat: store group index documents

* chore: identify more TODO

* feat: store reviews

* fix: repair merge

* chore: remove unnecessary TODO

* feat: StoredObject metadata

* fix: deburr some debugging code

* fix: only set the deleted timestamp once

* chore: correct typo

* fix: get_or_create vs get and test

* fix: avoid the questionable is_seekable helper

* chore: capture future design consideration

* chore: blob store cfg for k8s

* chore: black

* chore: copyright

* ci: bucket name prefix option + run Black

Adds/uses DATATRACKER_BLOB_STORE_BUCKET_PREFIX option. Other changes
are just Black styling.

* ci: fix typo in bucket name expression

* chore: parameters in app-configure-blobstore

Allows use with other blob stores.

* ci: remove verify=False option

* fix: don't return value from __init__

* feat: option to log timing of S3Storage calls

* chore: units

* fix: deleted->null when storing a file

* style: Black

* feat: log as JSON; refactor to share code; handle exceptions

* ci: add ietf_log_blob_timing option for k8s

* test: --no-manage-blobstore option for running tests

* test: use blob store settings from env, if set

* test: actually set a couple more storage opts

* feat: offswitch (#8541)

* feat: offswitch

* fix: apply ENABLE_BLOBSTORAGE to BlobShadowFileSystemStorage behavior

* chore: log timing of blob reads

* chore: import Config from botocore.config

* chore(deps): import boto3-stubs / botocore

botocore is implicitly imported, but make it explicit
since we refer to it directly

* chore: drop type annotation that mypy loudly ignores

* refactor: add storage methods via mixin

Shares code between Document and DocHistory without
putting it in the base DocumentInfo class, which
lacks the name field. Also makes mypy happy.

* feat: add timeout / retry limit to boto client

* ci: let k8s config the timeouts via env

* chore: repair merge resolution typo

* chore: tweak settings imports

* chore: simplify k8s/settings_local.py imports

---------

Co-authored-by: Jennifer Richards <jennifer@staff.ietf.org>
2025-02-19 17:41:10 -06:00

100 lines
4.1 KiB
Python

# Copyright The IETF Trust 2024, All Rights Reserved
#
# Celery task definitions
#
import os
import shutil
import debug # pyflakes:ignore
from celery import shared_task
from contextlib import AbstractContextManager
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import List
from django.conf import settings
from ietf.doc.storage_utils import store_file
from .index import all_id_txt, all_id2_txt, id_index_txt
class TempFileManager(AbstractContextManager):
def __init__(self, tmpdir=None) -> None:
self.cleanup_list: set[Path] = set()
self.dir = tmpdir
def make_temp_file(self, content):
with NamedTemporaryFile(mode="wt", delete=False, dir=self.dir) as tf:
tf_path = Path(tf.name)
self.cleanup_list.add(tf_path)
tf.write(content)
return tf_path
def move_into_place(self, src_path: Path, dest_path: Path, hardlink_dirs: List[Path] = []):
shutil.move(src_path, dest_path)
dest_path.chmod(0o644)
self.cleanup_list.remove(src_path)
for path in hardlink_dirs:
target = path / dest_path.name
target.unlink(missing_ok=True)
os.link(dest_path, target) # until python>=3.10
with dest_path.open("rb") as f:
store_file("indexes", dest_path.name, f, allow_overwrite=True)
def cleanup(self):
for tf_path in self.cleanup_list:
tf_path.unlink(missing_ok=True)
def __exit__(self, exc_type, exc_val, exc_tb):
self.cleanup()
return False # False: do not suppress the exception
@shared_task
def idindex_update_task():
"""Update I-D indexes"""
id_path = Path(settings.INTERNET_DRAFT_PATH)
derived_path = Path(settings.DERIVED_DIR)
download_path = Path(settings.ALL_ID_DOWNLOAD_DIR)
ftp_path = Path(settings.FTP_DIR) / "internet-drafts"
all_archive_path = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR)
with TempFileManager() as tmp_mgr:
# Generate copies of new contents
all_id_content = all_id_txt()
all_id_tmpfile = tmp_mgr.make_temp_file(all_id_content)
derived_all_id_tmpfile = tmp_mgr.make_temp_file(all_id_content)
download_all_id_tmpfile = tmp_mgr.make_temp_file(all_id_content)
id_index_content = id_index_txt()
id_index_tmpfile = tmp_mgr.make_temp_file(id_index_content)
derived_id_index_tmpfile = tmp_mgr.make_temp_file(id_index_content)
download_id_index_tmpfile = tmp_mgr.make_temp_file(id_index_content)
id_abstracts_content = id_index_txt(with_abstracts=True)
id_abstracts_tmpfile = tmp_mgr.make_temp_file(id_abstracts_content)
derived_id_abstracts_tmpfile = tmp_mgr.make_temp_file(id_abstracts_content)
download_id_abstracts_tmpfile = tmp_mgr.make_temp_file(id_abstracts_content)
all_id2_content = all_id2_txt()
all_id2_tmpfile = tmp_mgr.make_temp_file(all_id2_content)
derived_all_id2_tmpfile = tmp_mgr.make_temp_file(all_id2_content)
# Move temp files as-atomically-as-possible into place
tmp_mgr.move_into_place(all_id_tmpfile, id_path / "all_id.txt", [ftp_path, all_archive_path])
tmp_mgr.move_into_place(derived_all_id_tmpfile, derived_path / "all_id.txt")
tmp_mgr.move_into_place(download_all_id_tmpfile, download_path / "id-all.txt")
tmp_mgr.move_into_place(id_index_tmpfile, id_path / "1id-index.txt", [ftp_path, all_archive_path])
tmp_mgr.move_into_place(derived_id_index_tmpfile, derived_path / "1id-index.txt")
tmp_mgr.move_into_place(download_id_index_tmpfile, download_path / "id-index.txt")
tmp_mgr.move_into_place(id_abstracts_tmpfile, id_path / "1id-abstracts.txt", [ftp_path, all_archive_path])
tmp_mgr.move_into_place(derived_id_abstracts_tmpfile, derived_path / "1id-abstracts.txt")
tmp_mgr.move_into_place(download_id_abstracts_tmpfile, download_path / "id-abstract.txt")
tmp_mgr.move_into_place(all_id2_tmpfile, id_path / "all_id2.txt", [ftp_path, all_archive_path])
tmp_mgr.move_into_place(derived_all_id2_tmpfile, derived_path / "all_id2.txt")