refactor: generate I-D bibxml files via celery (#7426)
* refactor: task to generate_draft_bibxml_files * test: test task/utility methods * chore: add periodic task * chore: remove generate_draft_bibxml_files.py * chore: further prune /bin/hourly
This commit is contained in:
parent
de8b3b5ce3
commit
ffb9eb12ff
18
bin/hourly
18
bin/hourly
|
@ -5,33 +5,15 @@
|
|||
# This script is expected to be triggered by cron from
|
||||
# /etc/cron.d/datatracker
|
||||
export LANG=en_US.UTF-8
|
||||
export PYTHONIOENCODING=utf-8
|
||||
|
||||
# Make sure we stop if something goes wrong:
|
||||
program=${0##*/}
|
||||
trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR
|
||||
|
||||
DTDIR=/a/www/ietf-datatracker/web
|
||||
cd $DTDIR/
|
||||
|
||||
# Set up the virtual environment
|
||||
source $DTDIR/env/bin/activate
|
||||
|
||||
logger -p user.info -t cron "Running $DTDIR/bin/hourly"
|
||||
|
||||
# Generate some static files
|
||||
ID=/a/ietfdata/doc/draft/repository
|
||||
DERIVED=/a/ietfdata/derived
|
||||
DOWNLOAD=/a/www/www6s/download
|
||||
|
||||
CHARTER=/a/www/ietf-ftp/charter
|
||||
wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt
|
||||
wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt
|
||||
|
||||
# Regenerate the last week of bibxml-ids
|
||||
$DTDIR/ietf/manage.py generate_draft_bibxml_files
|
||||
|
||||
# Create and update group wikis
|
||||
#$DTDIR/ietf/manage.py create_group_wikis
|
||||
|
||||
# exit 0
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
# Copyright The IETF Trust 2012-2020, All Rights Reserved
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import datetime
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils import timezone
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.doc.models import NewRevisionDocEvent
|
||||
from ietf.doc.utils import bibxml_for_draft
|
||||
|
||||
DEFAULT_DAYS = 7
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = ('Generate draft bibxml files for xml2rfc references, placing them in the '
|
||||
'directory configured in settings.BIBXML_BASE_PATH: %s. '
|
||||
'By default, generate files as needed for new Internet-Draft revisions from the '
|
||||
'last %s days.' % (settings.BIBXML_BASE_PATH, DEFAULT_DAYS))
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--all', action='store_true', default=False, help="Process all documents, not only recent submissions")
|
||||
parser.add_argument('--days', type=int, default=DEFAULT_DAYS, help="Look submissions from the last DAYS days, instead of %s" % DEFAULT_DAYS)
|
||||
|
||||
def say(self, msg):
|
||||
if self.verbosity > 0:
|
||||
sys.stdout.write(msg)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
def note(self, msg):
|
||||
if self.verbosity > 1:
|
||||
sys.stdout.write(msg)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
def mutter(self, msg):
|
||||
if self.verbosity > 2:
|
||||
sys.stdout.write(msg)
|
||||
sys.stdout.write('\n')
|
||||
|
||||
def write(self, fn, new):
|
||||
# normalize new
|
||||
new = re.sub(r'\r\n?', r'\n', new)
|
||||
try:
|
||||
with io.open(fn, encoding='utf-8') as f:
|
||||
old = f.read()
|
||||
except IOError:
|
||||
old = ""
|
||||
if old.strip() != new.strip():
|
||||
self.note('Writing %s' % os.path.basename(fn))
|
||||
with io.open(fn, "w", encoding='utf-8') as f:
|
||||
f.write(new)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.verbosity = options.get("verbosity", 1)
|
||||
process_all = options.get("all")
|
||||
days = options.get("days")
|
||||
#
|
||||
bibxmldir = os.path.join(settings.BIBXML_BASE_PATH, 'bibxml-ids')
|
||||
if not os.path.exists(bibxmldir):
|
||||
os.makedirs(bibxmldir)
|
||||
#
|
||||
if process_all:
|
||||
doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft')
|
||||
else:
|
||||
start = timezone.now() - datetime.timedelta(days=days)
|
||||
doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft', time__gte=start)
|
||||
doc_events = doc_events.order_by('time')
|
||||
|
||||
for e in doc_events:
|
||||
self.mutter('%s %s' % (e.time, e.doc.name))
|
||||
try:
|
||||
doc = e.doc
|
||||
bibxml = bibxml_for_draft(doc, e.rev)
|
||||
ref_rev_file_name = os.path.join(bibxmldir, 'reference.I-D.%s-%s.xml' % (doc.name, e.rev))
|
||||
self.write(ref_rev_file_name, bibxml)
|
||||
except Exception as ee:
|
||||
sys.stderr.write('\n%s-%s: %s\n' % (doc.name, doc.rev, ee))
|
|
@ -9,6 +9,7 @@ from celery import shared_task
|
|||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from ietf.utils import log
|
||||
from ietf.utils.timezone import datetime_today
|
||||
|
@ -24,8 +25,13 @@ from .expire import (
|
|||
send_expire_warning_for_draft,
|
||||
)
|
||||
from .lastcall import get_expired_last_calls, expire_last_call
|
||||
from .models import Document
|
||||
from .utils import generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted
|
||||
from .models import Document, NewRevisionDocEvent
|
||||
from .utils import (
|
||||
generate_idnits2_rfc_status,
|
||||
generate_idnits2_rfcs_obsoleted,
|
||||
update_or_create_draft_bibxml_file,
|
||||
ensure_draft_bibxml_path_exists,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
|
@ -90,3 +96,24 @@ def generate_idnits2_rfcs_obsoleted_task():
|
|||
outpath.write_text(blob, encoding="utf8")
|
||||
except Exception as e:
|
||||
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
|
||||
|
||||
|
||||
@shared_task
|
||||
def generate_draft_bibxml_files_task(days=7, process_all=False):
|
||||
"""Generate bibxml files for recently updated docs
|
||||
|
||||
If process_all is False (the default), processes only docs with new revisions
|
||||
in the last specified number of days.
|
||||
"""
|
||||
ensure_draft_bibxml_path_exists()
|
||||
doc_events = NewRevisionDocEvent.objects.filter(
|
||||
type="new_revision",
|
||||
doc__type_id="draft",
|
||||
).order_by("time")
|
||||
if not process_all:
|
||||
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
|
||||
for event in doc_events:
|
||||
try:
|
||||
update_or_create_draft_bibxml_file(event.doc, event.rev)
|
||||
except Exception as err:
|
||||
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
# Copyright The IETF Trust 2024, All Rights Reserved
|
||||
import datetime
|
||||
import mock
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from ietf.utils.test_utils import TestCase
|
||||
from ietf.utils.timezone import datetime_today
|
||||
|
||||
from .factories import DocumentFactory
|
||||
from .models import Document
|
||||
from .factories import DocumentFactory, NewRevisionDocEventFactory
|
||||
from .models import Document, NewRevisionDocEvent
|
||||
from .tasks import (
|
||||
expire_ids_task,
|
||||
expire_last_calls_task,
|
||||
generate_draft_bibxml_files_task,
|
||||
generate_idnits2_rfcs_obsoleted_task,
|
||||
generate_idnits2_rfc_status_task,
|
||||
notify_expirations_task,
|
||||
|
@ -114,3 +117,86 @@ class TaskTests(TestCase):
|
|||
"dåtå".encode("utf8"),
|
||||
(Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(),
|
||||
)
|
||||
|
||||
@mock.patch("ietf.doc.tasks.ensure_draft_bibxml_path_exists")
|
||||
@mock.patch("ietf.doc.tasks.update_or_create_draft_bibxml_file")
|
||||
def test_generate_draft_bibxml_files_task(self, mock_create, mock_ensure_path):
|
||||
now = timezone.now()
|
||||
very_old_event = NewRevisionDocEventFactory(
|
||||
time=now - datetime.timedelta(days=1000), rev="17"
|
||||
)
|
||||
old_event = NewRevisionDocEventFactory(
|
||||
time=now - datetime.timedelta(days=8), rev="03"
|
||||
)
|
||||
young_event = NewRevisionDocEventFactory(
|
||||
time=now - datetime.timedelta(days=6), rev="06"
|
||||
)
|
||||
# a couple that should always be ignored
|
||||
NewRevisionDocEventFactory(
|
||||
time=now - datetime.timedelta(days=6), rev="09", doc__type_id="rfc" # not a draft
|
||||
)
|
||||
NewRevisionDocEventFactory(
|
||||
type="changed_document", # not a "new_revision" type
|
||||
time=now - datetime.timedelta(days=6),
|
||||
rev="09",
|
||||
doc__type_id="rfc",
|
||||
)
|
||||
|
||||
# Get rid of the "00" events created by the factories -- they're just noise for this test
|
||||
NewRevisionDocEvent.objects.filter(rev="00").delete()
|
||||
|
||||
# default args - look back 7 days
|
||||
generate_draft_bibxml_files_task()
|
||||
self.assertTrue(mock_ensure_path.called)
|
||||
self.assertCountEqual(
|
||||
mock_create.call_args_list, [mock.call(young_event.doc, young_event.rev)]
|
||||
)
|
||||
mock_create.reset_mock()
|
||||
mock_ensure_path.reset_mock()
|
||||
|
||||
# shorter lookback
|
||||
generate_draft_bibxml_files_task(days=5)
|
||||
self.assertTrue(mock_ensure_path.called)
|
||||
self.assertCountEqual(mock_create.call_args_list, [])
|
||||
mock_create.reset_mock()
|
||||
mock_ensure_path.reset_mock()
|
||||
|
||||
# longer lookback
|
||||
generate_draft_bibxml_files_task(days=9)
|
||||
self.assertTrue(mock_ensure_path.called)
|
||||
self.assertCountEqual(
|
||||
mock_create.call_args_list,
|
||||
[
|
||||
mock.call(young_event.doc, young_event.rev),
|
||||
mock.call(old_event.doc, old_event.rev),
|
||||
],
|
||||
)
|
||||
mock_create.reset_mock()
|
||||
mock_ensure_path.reset_mock()
|
||||
|
||||
# everything
|
||||
generate_draft_bibxml_files_task(process_all=True)
|
||||
self.assertTrue(mock_ensure_path.called)
|
||||
self.assertCountEqual(
|
||||
mock_create.call_args_list,
|
||||
[
|
||||
mock.call(young_event.doc, young_event.rev),
|
||||
mock.call(old_event.doc, old_event.rev),
|
||||
mock.call(very_old_event.doc, very_old_event.rev),
|
||||
],
|
||||
)
|
||||
mock_create.reset_mock()
|
||||
mock_ensure_path.reset_mock()
|
||||
|
||||
# everything should still be tried, even if there's an exception
|
||||
mock_create.side_effect = RuntimeError
|
||||
generate_draft_bibxml_files_task(process_all=True)
|
||||
self.assertTrue(mock_ensure_path.called)
|
||||
self.assertCountEqual(
|
||||
mock_create.call_args_list,
|
||||
[
|
||||
mock.call(young_event.doc, young_event.rev),
|
||||
mock.call(old_event.doc, old_event.rev),
|
||||
mock.call(very_old_event.doc, very_old_event.rev),
|
||||
],
|
||||
)
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
import datetime
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from unittest.mock import patch
|
||||
from pathlib import Path
|
||||
from unittest.mock import call, patch
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import IntegrityError
|
||||
from django.test.utils import override_settings
|
||||
from django.utils import timezone
|
||||
|
@ -16,7 +18,8 @@ from ietf.person.models import Person
|
|||
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
|
||||
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
|
||||
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
|
||||
fuzzy_find_documents, rebuild_reference_relations, build_file_urls)
|
||||
fuzzy_find_documents, rebuild_reference_relations, build_file_urls,
|
||||
ensure_draft_bibxml_path_exists, update_or_create_draft_bibxml_file)
|
||||
from ietf.utils.draft import Draft, PlaintextDraft
|
||||
from ietf.utils.xmldraft import XMLDraft
|
||||
|
||||
|
@ -484,3 +487,49 @@ class RebuildReferenceRelationsTests(TestCase):
|
|||
(self.updated.name, 'updates'),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class DraftBibxmlTests(TestCase):
|
||||
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ["BIBXML_BASE_PATH"]
|
||||
|
||||
def test_ensure_draft_bibxml_path_exists(self):
|
||||
expected = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||
self.assertFalse(expected.exists())
|
||||
ensure_draft_bibxml_path_exists()
|
||||
self.assertTrue(expected.is_dir()) # false if does not exist or is not dir
|
||||
|
||||
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
|
||||
def test_create_draft_bibxml_file(self, mock):
|
||||
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
|
||||
|
||||
doc = DocumentFactory()
|
||||
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
|
||||
|
||||
update_or_create_draft_bibxml_file(doc, "26")
|
||||
self.assertEqual(mock.call_count, 1)
|
||||
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||
|
||||
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
|
||||
def test_update_draft_bibxml_file(self, mock):
|
||||
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
|
||||
|
||||
doc = DocumentFactory()
|
||||
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
|
||||
ref_path.write_text("Old data")
|
||||
|
||||
# should replace it
|
||||
update_or_create_draft_bibxml_file(doc, "26")
|
||||
self.assertEqual(mock.call_count, 1)
|
||||
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||
|
||||
# should leave it alone if it differs only by leading/trailing whitespace
|
||||
mock.reset_mock()
|
||||
mock.return_value = " \n This\nis\nmy\nbibxml "
|
||||
update_or_create_draft_bibxml_file(doc, "26")
|
||||
self.assertEqual(mock.call_count, 1)
|
||||
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||
|
|
|
@ -1413,3 +1413,20 @@ def investigate_fragment(name_fragment):
|
|||
unverifiable_collections=unverifiable_collections,
|
||||
unexpected=unexpected,
|
||||
)
|
||||
|
||||
|
||||
def update_or_create_draft_bibxml_file(doc, rev):
|
||||
log.assertion("doc.type_id == 'draft'")
|
||||
normalized_bibxml = re.sub(r"\r\n?", r"\n", bibxml_for_draft(doc, rev))
|
||||
ref_rev_file_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" / f"reference.I-D.{doc.name}-{rev}.xml"
|
||||
try:
|
||||
existing_bibxml = ref_rev_file_path.read_text(encoding="utf8")
|
||||
except IOError:
|
||||
existing_bibxml = ""
|
||||
if normalized_bibxml.strip() != existing_bibxml.strip():
|
||||
log.log(f"Writing {ref_rev_file_path}")
|
||||
ref_rev_file_path.write_text(normalized_bibxml, encoding="utf8")
|
||||
|
||||
|
||||
def ensure_draft_bibxml_path_exists():
|
||||
(Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True)
|
||||
|
|
|
@ -221,6 +221,16 @@ class Command(BaseCommand):
|
|||
),
|
||||
)
|
||||
|
||||
PeriodicTask.objects.get_or_create(
|
||||
name="Generate I-D bibxml files",
|
||||
task="ietf.doc.tasks.generate_draft_bibxml_files_task",
|
||||
defaults=dict(
|
||||
enabled=False,
|
||||
crontab=self.crontabs["hourly"],
|
||||
description="Generate draft bibxml files for the last week's drafts",
|
||||
),
|
||||
)
|
||||
|
||||
def show_tasks(self):
|
||||
for label, crontab in self.crontabs.items():
|
||||
tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(
|
||||
|
|
Loading…
Reference in a new issue