datatracker/ietf/sync/tasks.py
Jennifer Richards 8d12071bf5
refactor: Move cron jobs to celery tasks (#6926)
* refactor: Factor out helper from fetch_meeting_attendance.py

* feat: Define fetch_meeting_attendance_task task

Equivalent to the fetch_meeting_attendance management command

* chore: Disable fetch_meeting_attendance in bin/daily

* feat: Log errors in fetch_meeting_attendance_task

* feat: Enable a result backend for celery

Ignore results by default, but enable the backend so we
can manage tasks

* feat: Define daily task in ietf.utils.tasks

* refactor: Make bin/send-review-reminders into a task

* refactor: Make bin/send-scheduled-mail into a task

* chore: Update copyright years

* refactor: Make bin/rfc-editor-index-updates into a task

* refactor: Accept date type in rfc index update fn

* chore: Update comment

* fix: Annotate param as Optional

* fix: Revert treating skip_older_than_date as str

Misunderstood the comment, "fixed" a non-bug. Oops.

* feat: mgmt command to create periodic tasks

* feat: add summary of tasks to mgmt cmd

* style: black

* fix: Remove debug statements

* feat: Enable/disable tasks

* chore: Disable periodic tasks by default

* chore: Revert changes to daily and every15m

* fix: Call intended function

* chore: Add task descriptions
2024-01-22 12:04:16 -06:00

68 lines
2.5 KiB
Python

# Copyright The IETF Trust 2024, All Rights Reserved
#
# Celery task definitions
#
import datetime
import io
import requests
from celery import shared_task
from django.conf import settings
from ietf.sync.rfceditor import MIN_ERRATA_RESULTS, MIN_INDEX_RESULTS, parse_index, update_docs_from_rfc_index
from ietf.utils import log
from ietf.utils.timezone import date_today
@shared_task
def rfc_editor_index_update_task(full_index=False):
"""Update metadata from the RFC index
Default is to examine only changes in the past 365 days. Call with full_index=True to update
the full RFC index.
According to comments on the original script, a year's worth took about 20s on production as of
August 2022
The original rfc-editor-index-update script had a long-disabled provision for running the
rebuild_reference_relations scripts after the update. That has not been brought over
at all because it should be implemented as its own task if it is needed.
"""
skip_date = None if full_index else date_today() - datetime.timedelta(days=365)
log.log(
"Updating document metadata from RFC index going back to {since}, from {url}".format(
since=skip_date if skip_date is not None else "the beginning",
url=settings.RFC_EDITOR_INDEX_URL,
)
)
try:
response = requests.get(
settings.RFC_EDITOR_INDEX_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log.log(f'GET request timed out retrieving RFC editor index: {exc}')
return # failed
rfc_index_xml = response.text
index_data = parse_index(io.StringIO(rfc_index_xml))
try:
response = requests.get(
settings.RFC_EDITOR_ERRATA_JSON_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log.log(f'GET request timed out retrieving RFC editor errata: {exc}')
return # failed
errata_data = response.json()
if len(index_data) < MIN_INDEX_RESULTS:
log.log("Not enough index entries, only %s" % len(index_data))
return # failed
if len(errata_data) < MIN_ERRATA_RESULTS:
log.log("Not enough errata entries, only %s" % len(errata_data))
return # failed
for rfc_number, changes, doc, rfc_published in update_docs_from_rfc_index(
index_data, errata_data, skip_older_than_date=skip_date
):
for c in changes:
log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c))