datatracker/ietf/doc/tasks.py
Robert Sparks 997239a2ea
feat: write objects to blob storage (#8557)
* feat: basic blobstore infrastructure for dev

* refactor: (broken) attempt to put minio console behind nginx

* feat: initialize blobstore with boto3

* fix: abandon attempt to proxy minio. Use docker compose instead.

* feat: beginning of blob writes

* feat: storage utilities

* feat: test buckets

* chore: black

* chore: remove unused import

* chore: avoid f string when not needed

* fix: inform all settings files about blobstores

* fix: declare types for some settings

* ci: point to new target base

* ci: adjust test workflow

* fix: give the tests debug environment a blobstore

* fix: "better" name declarations

* ci: use devblobstore container

* chore: identify places to write to blobstorage

* chore: remove unreachable code

* feat: store materials

* feat: store statements

* feat: store status changes

* feat: store liaison attachments

* feat: store agendas provided with Interim session requests

* chore: capture TODOs

* feat: store polls and chatlogs

* chore: remove unneeded TODO

* feat: store drafts on submit and post

* fix: handle storage during doc expiration and resurrection

* fix: mirror an unlink

* chore: add/refine TODOs

* feat: store slide submissions

* fix: structure slide test correctly

* fix: correct sense of existence check

* feat: store some indexes

* feat: BlobShadowFileSystemStorage

* feat: shadow floorplans / host logos to the blob

* chore: remove unused import

* feat: strip path from blob shadow names

* feat: shadow photos / thumbs

* refactor: combine photo and photothumb blob kinds

The photos / thumbs were already dropped in the same
directory, so let's not add a distinction at this point.

* style: whitespace

* refactor: use kwargs consistently

* chore: migrations

* refactor: better deconstruct(); rebuild migrations

* fix: use new class in mack patch

* chore: add TODO

* feat: store group index documents

* chore: identify more TODO

* feat: store reviews

* fix: repair merge

* chore: remove unnecessary TODO

* feat: StoredObject metadata

* fix: deburr some debugging code

* fix: only set the deleted timestamp once

* chore: correct typo

* fix: get_or_create vs get and test

* fix: avoid the questionable is_seekable helper

* chore: capture future design consideration

* chore: blob store cfg for k8s

* chore: black

* chore: copyright

* ci: bucket name prefix option + run Black

Adds/uses DATATRACKER_BLOB_STORE_BUCKET_PREFIX option. Other changes
are just Black styling.

* ci: fix typo in bucket name expression

* chore: parameters in app-configure-blobstore

Allows use with other blob stores.

* ci: remove verify=False option

* fix: don't return value from __init__

* feat: option to log timing of S3Storage calls

* chore: units

* fix: deleted->null when storing a file

* style: Black

* feat: log as JSON; refactor to share code; handle exceptions

* ci: add ietf_log_blob_timing option for k8s

* test: --no-manage-blobstore option for running tests

* test: use blob store settings from env, if set

* test: actually set a couple more storage opts

* feat: offswitch (#8541)

* feat: offswitch

* fix: apply ENABLE_BLOBSTORAGE to BlobShadowFileSystemStorage behavior

* chore: log timing of blob reads

* chore: import Config from botocore.config

* chore(deps): import boto3-stubs / botocore

botocore is implicitly imported, but make it explicit
since we refer to it directly

* chore: drop type annotation that mypy loudly ignores

* refactor: add storage methods via mixin

Shares code between Document and DocHistory without
putting it in the base DocumentInfo class, which
lacks the name field. Also makes mypy happy.

* feat: add timeout / retry limit to boto client

* ci: let k8s config the timeouts via env

* chore: repair merge resolution typo

* chore: tweak settings imports

* chore: simplify k8s/settings_local.py imports

---------

Co-authored-by: Jennifer Richards <jennifer@staff.ietf.org>
2025-02-19 17:41:10 -06:00

131 lines
4.4 KiB
Python

# Copyright The IETF Trust 2024, All Rights Reserved
#
# Celery task definitions
#
import datetime
import debug # pyflakes:ignore
from celery import shared_task
from pathlib import Path
from django.conf import settings
from django.utils import timezone
from ietf.utils import log
from ietf.utils.timezone import datetime_today
from .expire import (
in_draft_expire_freeze,
get_expired_drafts,
expirable_drafts,
send_expire_notice_for_draft,
expire_draft,
clean_up_draft_files,
get_soon_to_expire_drafts,
send_expire_warning_for_draft,
)
from .lastcall import get_expired_last_calls, expire_last_call
from .models import Document, NewRevisionDocEvent
from .utils import (
generate_idnits2_rfc_status,
generate_idnits2_rfcs_obsoleted,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
)
@shared_task
def expire_ids_task():
try:
if not in_draft_expire_freeze():
log.log("Expiring drafts ...")
for doc in get_expired_drafts():
# verify expirability -- it might have changed after get_expired_drafts() was run
# (this whole loop took about 2 minutes on 04 Jan 2018)
# N.B., re-running expirable_drafts() repeatedly is fairly expensive. Where possible,
# it's much faster to run it once on a superset query of the objects you are going
# to test and keep its results. That's not desirable here because it would defeat
# the purpose of double-checking that a document is still expirable when it is actually
# being marked as expired.
if expirable_drafts(
Document.objects.filter(pk=doc.pk)
).exists() and doc.expires < datetime_today() + datetime.timedelta(1):
send_expire_notice_for_draft(doc)
expire_draft(doc)
log.log(f" Expired draft {doc.name}-{doc.rev}")
log.log("Cleaning up draft files")
clean_up_draft_files()
except Exception as e:
log.log("Exception in expire-ids: %s" % e)
raise
@shared_task
def notify_expirations_task(notify_days=14):
for doc in get_soon_to_expire_drafts(notify_days):
send_expire_warning_for_draft(doc)
@shared_task
def expire_last_calls_task():
for doc in get_expired_last_calls():
try:
expire_last_call(doc)
except Exception:
log.log(f"ERROR: Failed to expire last call for {doc.file_tag()} (id={doc.pk})")
else:
log.log(f"Expired last call for {doc.file_tag()} (id={doc.pk})")
@shared_task
def generate_idnits2_rfc_status_task():
outpath = Path(settings.DERIVED_DIR) / "idnits2-rfc-status"
blob = generate_idnits2_rfc_status()
try:
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
except Exception as e:
log.log(f"failed to write idnits2-rfc-status: {e}")
@shared_task
def generate_idnits2_rfcs_obsoleted_task():
outpath = Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted"
blob = generate_idnits2_rfcs_obsoleted()
try:
outpath.write_text(blob, encoding="utf8") # TODO-BLOBSTORE
except Exception as e:
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
@shared_task
def generate_draft_bibxml_files_task(days=7, process_all=False):
"""Generate bibxml files for recently updated docs
If process_all is False (the default), processes only docs with new revisions
in the last specified number of days.
"""
if not process_all and days < 1:
raise ValueError("Must call with days >= 1 or process_all=True")
ensure_draft_bibxml_path_exists()
doc_events = NewRevisionDocEvent.objects.filter(
type="new_revision",
doc__type_id="draft",
).order_by("time")
if not process_all:
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
for event in doc_events:
try:
update_or_create_draft_bibxml_file(event.doc, event.rev)
except Exception as err:
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
@shared_task(ignore_result=False)
def investigate_fragment_task(name_fragment: str):
return {
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}