refactor: generate I-D bibxml files via celery (#7426)
* refactor: task to generate_draft_bibxml_files * test: test task/utility methods * chore: add periodic task * chore: remove generate_draft_bibxml_files.py * chore: further prune /bin/hourly
This commit is contained in:
parent
de8b3b5ce3
commit
ffb9eb12ff
18
bin/hourly
18
bin/hourly
|
@ -5,33 +5,15 @@
|
||||||
# This script is expected to be triggered by cron from
|
# This script is expected to be triggered by cron from
|
||||||
# /etc/cron.d/datatracker
|
# /etc/cron.d/datatracker
|
||||||
export LANG=en_US.UTF-8
|
export LANG=en_US.UTF-8
|
||||||
export PYTHONIOENCODING=utf-8
|
|
||||||
|
|
||||||
# Make sure we stop if something goes wrong:
|
# Make sure we stop if something goes wrong:
|
||||||
program=${0##*/}
|
program=${0##*/}
|
||||||
trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR
|
trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR
|
||||||
|
|
||||||
DTDIR=/a/www/ietf-datatracker/web
|
|
||||||
cd $DTDIR/
|
|
||||||
|
|
||||||
# Set up the virtual environment
|
|
||||||
source $DTDIR/env/bin/activate
|
|
||||||
|
|
||||||
logger -p user.info -t cron "Running $DTDIR/bin/hourly"
|
logger -p user.info -t cron "Running $DTDIR/bin/hourly"
|
||||||
|
|
||||||
# Generate some static files
|
|
||||||
ID=/a/ietfdata/doc/draft/repository
|
|
||||||
DERIVED=/a/ietfdata/derived
|
|
||||||
DOWNLOAD=/a/www/www6s/download
|
|
||||||
|
|
||||||
CHARTER=/a/www/ietf-ftp/charter
|
CHARTER=/a/www/ietf-ftp/charter
|
||||||
wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt
|
wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt
|
||||||
wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt
|
wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt
|
||||||
|
|
||||||
# Regenerate the last week of bibxml-ids
|
|
||||||
$DTDIR/ietf/manage.py generate_draft_bibxml_files
|
|
||||||
|
|
||||||
# Create and update group wikis
|
|
||||||
#$DTDIR/ietf/manage.py create_group_wikis
|
|
||||||
|
|
||||||
# exit 0
|
# exit 0
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
# Copyright The IETF Trust 2012-2020, All Rights Reserved
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import io
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
from django.utils import timezone
|
|
||||||
|
|
||||||
import debug # pyflakes:ignore
|
|
||||||
|
|
||||||
from ietf.doc.models import NewRevisionDocEvent
|
|
||||||
from ietf.doc.utils import bibxml_for_draft
|
|
||||||
|
|
||||||
DEFAULT_DAYS = 7
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
|
||||||
help = ('Generate draft bibxml files for xml2rfc references, placing them in the '
|
|
||||||
'directory configured in settings.BIBXML_BASE_PATH: %s. '
|
|
||||||
'By default, generate files as needed for new Internet-Draft revisions from the '
|
|
||||||
'last %s days.' % (settings.BIBXML_BASE_PATH, DEFAULT_DAYS))
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
|
||||||
parser.add_argument('--all', action='store_true', default=False, help="Process all documents, not only recent submissions")
|
|
||||||
parser.add_argument('--days', type=int, default=DEFAULT_DAYS, help="Look submissions from the last DAYS days, instead of %s" % DEFAULT_DAYS)
|
|
||||||
|
|
||||||
def say(self, msg):
|
|
||||||
if self.verbosity > 0:
|
|
||||||
sys.stdout.write(msg)
|
|
||||||
sys.stdout.write('\n')
|
|
||||||
|
|
||||||
def note(self, msg):
|
|
||||||
if self.verbosity > 1:
|
|
||||||
sys.stdout.write(msg)
|
|
||||||
sys.stdout.write('\n')
|
|
||||||
|
|
||||||
def mutter(self, msg):
|
|
||||||
if self.verbosity > 2:
|
|
||||||
sys.stdout.write(msg)
|
|
||||||
sys.stdout.write('\n')
|
|
||||||
|
|
||||||
def write(self, fn, new):
|
|
||||||
# normalize new
|
|
||||||
new = re.sub(r'\r\n?', r'\n', new)
|
|
||||||
try:
|
|
||||||
with io.open(fn, encoding='utf-8') as f:
|
|
||||||
old = f.read()
|
|
||||||
except IOError:
|
|
||||||
old = ""
|
|
||||||
if old.strip() != new.strip():
|
|
||||||
self.note('Writing %s' % os.path.basename(fn))
|
|
||||||
with io.open(fn, "w", encoding='utf-8') as f:
|
|
||||||
f.write(new)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
|
||||||
self.verbosity = options.get("verbosity", 1)
|
|
||||||
process_all = options.get("all")
|
|
||||||
days = options.get("days")
|
|
||||||
#
|
|
||||||
bibxmldir = os.path.join(settings.BIBXML_BASE_PATH, 'bibxml-ids')
|
|
||||||
if not os.path.exists(bibxmldir):
|
|
||||||
os.makedirs(bibxmldir)
|
|
||||||
#
|
|
||||||
if process_all:
|
|
||||||
doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft')
|
|
||||||
else:
|
|
||||||
start = timezone.now() - datetime.timedelta(days=days)
|
|
||||||
doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft', time__gte=start)
|
|
||||||
doc_events = doc_events.order_by('time')
|
|
||||||
|
|
||||||
for e in doc_events:
|
|
||||||
self.mutter('%s %s' % (e.time, e.doc.name))
|
|
||||||
try:
|
|
||||||
doc = e.doc
|
|
||||||
bibxml = bibxml_for_draft(doc, e.rev)
|
|
||||||
ref_rev_file_name = os.path.join(bibxmldir, 'reference.I-D.%s-%s.xml' % (doc.name, e.rev))
|
|
||||||
self.write(ref_rev_file_name, bibxml)
|
|
||||||
except Exception as ee:
|
|
||||||
sys.stderr.write('\n%s-%s: %s\n' % (doc.name, doc.rev, ee))
|
|
|
@ -9,6 +9,7 @@ from celery import shared_task
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
from ietf.utils import log
|
from ietf.utils import log
|
||||||
from ietf.utils.timezone import datetime_today
|
from ietf.utils.timezone import datetime_today
|
||||||
|
@ -24,8 +25,13 @@ from .expire import (
|
||||||
send_expire_warning_for_draft,
|
send_expire_warning_for_draft,
|
||||||
)
|
)
|
||||||
from .lastcall import get_expired_last_calls, expire_last_call
|
from .lastcall import get_expired_last_calls, expire_last_call
|
||||||
from .models import Document
|
from .models import Document, NewRevisionDocEvent
|
||||||
from .utils import generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted
|
from .utils import (
|
||||||
|
generate_idnits2_rfc_status,
|
||||||
|
generate_idnits2_rfcs_obsoleted,
|
||||||
|
update_or_create_draft_bibxml_file,
|
||||||
|
ensure_draft_bibxml_path_exists,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
|
@ -90,3 +96,24 @@ def generate_idnits2_rfcs_obsoleted_task():
|
||||||
outpath.write_text(blob, encoding="utf8")
|
outpath.write_text(blob, encoding="utf8")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
|
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def generate_draft_bibxml_files_task(days=7, process_all=False):
|
||||||
|
"""Generate bibxml files for recently updated docs
|
||||||
|
|
||||||
|
If process_all is False (the default), processes only docs with new revisions
|
||||||
|
in the last specified number of days.
|
||||||
|
"""
|
||||||
|
ensure_draft_bibxml_path_exists()
|
||||||
|
doc_events = NewRevisionDocEvent.objects.filter(
|
||||||
|
type="new_revision",
|
||||||
|
doc__type_id="draft",
|
||||||
|
).order_by("time")
|
||||||
|
if not process_all:
|
||||||
|
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
|
||||||
|
for event in doc_events:
|
||||||
|
try:
|
||||||
|
update_or_create_draft_bibxml_file(event.doc, event.rev)
|
||||||
|
except Exception as err:
|
||||||
|
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
|
||||||
|
|
|
@ -1,18 +1,21 @@
|
||||||
# Copyright The IETF Trust 2024, All Rights Reserved
|
# Copyright The IETF Trust 2024, All Rights Reserved
|
||||||
|
import datetime
|
||||||
import mock
|
import mock
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
from ietf.utils.test_utils import TestCase
|
from ietf.utils.test_utils import TestCase
|
||||||
from ietf.utils.timezone import datetime_today
|
from ietf.utils.timezone import datetime_today
|
||||||
|
|
||||||
from .factories import DocumentFactory
|
from .factories import DocumentFactory, NewRevisionDocEventFactory
|
||||||
from .models import Document
|
from .models import Document, NewRevisionDocEvent
|
||||||
from .tasks import (
|
from .tasks import (
|
||||||
expire_ids_task,
|
expire_ids_task,
|
||||||
expire_last_calls_task,
|
expire_last_calls_task,
|
||||||
|
generate_draft_bibxml_files_task,
|
||||||
generate_idnits2_rfcs_obsoleted_task,
|
generate_idnits2_rfcs_obsoleted_task,
|
||||||
generate_idnits2_rfc_status_task,
|
generate_idnits2_rfc_status_task,
|
||||||
notify_expirations_task,
|
notify_expirations_task,
|
||||||
|
@ -114,3 +117,86 @@ class TaskTests(TestCase):
|
||||||
"dåtå".encode("utf8"),
|
"dåtå".encode("utf8"),
|
||||||
(Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(),
|
(Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@mock.patch("ietf.doc.tasks.ensure_draft_bibxml_path_exists")
|
||||||
|
@mock.patch("ietf.doc.tasks.update_or_create_draft_bibxml_file")
|
||||||
|
def test_generate_draft_bibxml_files_task(self, mock_create, mock_ensure_path):
|
||||||
|
now = timezone.now()
|
||||||
|
very_old_event = NewRevisionDocEventFactory(
|
||||||
|
time=now - datetime.timedelta(days=1000), rev="17"
|
||||||
|
)
|
||||||
|
old_event = NewRevisionDocEventFactory(
|
||||||
|
time=now - datetime.timedelta(days=8), rev="03"
|
||||||
|
)
|
||||||
|
young_event = NewRevisionDocEventFactory(
|
||||||
|
time=now - datetime.timedelta(days=6), rev="06"
|
||||||
|
)
|
||||||
|
# a couple that should always be ignored
|
||||||
|
NewRevisionDocEventFactory(
|
||||||
|
time=now - datetime.timedelta(days=6), rev="09", doc__type_id="rfc" # not a draft
|
||||||
|
)
|
||||||
|
NewRevisionDocEventFactory(
|
||||||
|
type="changed_document", # not a "new_revision" type
|
||||||
|
time=now - datetime.timedelta(days=6),
|
||||||
|
rev="09",
|
||||||
|
doc__type_id="rfc",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get rid of the "00" events created by the factories -- they're just noise for this test
|
||||||
|
NewRevisionDocEvent.objects.filter(rev="00").delete()
|
||||||
|
|
||||||
|
# default args - look back 7 days
|
||||||
|
generate_draft_bibxml_files_task()
|
||||||
|
self.assertTrue(mock_ensure_path.called)
|
||||||
|
self.assertCountEqual(
|
||||||
|
mock_create.call_args_list, [mock.call(young_event.doc, young_event.rev)]
|
||||||
|
)
|
||||||
|
mock_create.reset_mock()
|
||||||
|
mock_ensure_path.reset_mock()
|
||||||
|
|
||||||
|
# shorter lookback
|
||||||
|
generate_draft_bibxml_files_task(days=5)
|
||||||
|
self.assertTrue(mock_ensure_path.called)
|
||||||
|
self.assertCountEqual(mock_create.call_args_list, [])
|
||||||
|
mock_create.reset_mock()
|
||||||
|
mock_ensure_path.reset_mock()
|
||||||
|
|
||||||
|
# longer lookback
|
||||||
|
generate_draft_bibxml_files_task(days=9)
|
||||||
|
self.assertTrue(mock_ensure_path.called)
|
||||||
|
self.assertCountEqual(
|
||||||
|
mock_create.call_args_list,
|
||||||
|
[
|
||||||
|
mock.call(young_event.doc, young_event.rev),
|
||||||
|
mock.call(old_event.doc, old_event.rev),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
mock_create.reset_mock()
|
||||||
|
mock_ensure_path.reset_mock()
|
||||||
|
|
||||||
|
# everything
|
||||||
|
generate_draft_bibxml_files_task(process_all=True)
|
||||||
|
self.assertTrue(mock_ensure_path.called)
|
||||||
|
self.assertCountEqual(
|
||||||
|
mock_create.call_args_list,
|
||||||
|
[
|
||||||
|
mock.call(young_event.doc, young_event.rev),
|
||||||
|
mock.call(old_event.doc, old_event.rev),
|
||||||
|
mock.call(very_old_event.doc, very_old_event.rev),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
mock_create.reset_mock()
|
||||||
|
mock_ensure_path.reset_mock()
|
||||||
|
|
||||||
|
# everything should still be tried, even if there's an exception
|
||||||
|
mock_create.side_effect = RuntimeError
|
||||||
|
generate_draft_bibxml_files_task(process_all=True)
|
||||||
|
self.assertTrue(mock_ensure_path.called)
|
||||||
|
self.assertCountEqual(
|
||||||
|
mock_create.call_args_list,
|
||||||
|
[
|
||||||
|
mock.call(young_event.doc, young_event.rev),
|
||||||
|
mock.call(old_event.doc, old_event.rev),
|
||||||
|
mock.call(very_old_event.doc, very_old_event.rev),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -2,8 +2,10 @@
|
||||||
import datetime
|
import datetime
|
||||||
import debug # pyflakes:ignore
|
import debug # pyflakes:ignore
|
||||||
|
|
||||||
from unittest.mock import patch
|
from pathlib import Path
|
||||||
|
from unittest.mock import call, patch
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
from django.test.utils import override_settings
|
from django.test.utils import override_settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
@ -16,7 +18,8 @@ from ietf.person.models import Person
|
||||||
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
|
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
|
||||||
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
|
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
|
||||||
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
|
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
|
||||||
fuzzy_find_documents, rebuild_reference_relations, build_file_urls)
|
fuzzy_find_documents, rebuild_reference_relations, build_file_urls,
|
||||||
|
ensure_draft_bibxml_path_exists, update_or_create_draft_bibxml_file)
|
||||||
from ietf.utils.draft import Draft, PlaintextDraft
|
from ietf.utils.draft import Draft, PlaintextDraft
|
||||||
from ietf.utils.xmldraft import XMLDraft
|
from ietf.utils.xmldraft import XMLDraft
|
||||||
|
|
||||||
|
@ -484,3 +487,49 @@ class RebuildReferenceRelationsTests(TestCase):
|
||||||
(self.updated.name, 'updates'),
|
(self.updated.name, 'updates'),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DraftBibxmlTests(TestCase):
|
||||||
|
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ["BIBXML_BASE_PATH"]
|
||||||
|
|
||||||
|
def test_ensure_draft_bibxml_path_exists(self):
|
||||||
|
expected = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||||
|
self.assertFalse(expected.exists())
|
||||||
|
ensure_draft_bibxml_path_exists()
|
||||||
|
self.assertTrue(expected.is_dir()) # false if does not exist or is not dir
|
||||||
|
|
||||||
|
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
|
||||||
|
def test_create_draft_bibxml_file(self, mock):
|
||||||
|
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||||
|
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
|
||||||
|
|
||||||
|
doc = DocumentFactory()
|
||||||
|
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
|
||||||
|
|
||||||
|
update_or_create_draft_bibxml_file(doc, "26")
|
||||||
|
self.assertEqual(mock.call_count, 1)
|
||||||
|
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||||
|
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||||
|
|
||||||
|
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
|
||||||
|
def test_update_draft_bibxml_file(self, mock):
|
||||||
|
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
|
||||||
|
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
|
||||||
|
|
||||||
|
doc = DocumentFactory()
|
||||||
|
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
|
||||||
|
ref_path.write_text("Old data")
|
||||||
|
|
||||||
|
# should replace it
|
||||||
|
update_or_create_draft_bibxml_file(doc, "26")
|
||||||
|
self.assertEqual(mock.call_count, 1)
|
||||||
|
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||||
|
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||||
|
|
||||||
|
# should leave it alone if it differs only by leading/trailing whitespace
|
||||||
|
mock.reset_mock()
|
||||||
|
mock.return_value = " \n This\nis\nmy\nbibxml "
|
||||||
|
update_or_create_draft_bibxml_file(doc, "26")
|
||||||
|
self.assertEqual(mock.call_count, 1)
|
||||||
|
self.assertEqual(mock.call_args, call(doc, "26"))
|
||||||
|
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
|
||||||
|
|
|
@ -1413,3 +1413,20 @@ def investigate_fragment(name_fragment):
|
||||||
unverifiable_collections=unverifiable_collections,
|
unverifiable_collections=unverifiable_collections,
|
||||||
unexpected=unexpected,
|
unexpected=unexpected,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def update_or_create_draft_bibxml_file(doc, rev):
|
||||||
|
log.assertion("doc.type_id == 'draft'")
|
||||||
|
normalized_bibxml = re.sub(r"\r\n?", r"\n", bibxml_for_draft(doc, rev))
|
||||||
|
ref_rev_file_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" / f"reference.I-D.{doc.name}-{rev}.xml"
|
||||||
|
try:
|
||||||
|
existing_bibxml = ref_rev_file_path.read_text(encoding="utf8")
|
||||||
|
except IOError:
|
||||||
|
existing_bibxml = ""
|
||||||
|
if normalized_bibxml.strip() != existing_bibxml.strip():
|
||||||
|
log.log(f"Writing {ref_rev_file_path}")
|
||||||
|
ref_rev_file_path.write_text(normalized_bibxml, encoding="utf8")
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_draft_bibxml_path_exists():
|
||||||
|
(Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True)
|
||||||
|
|
|
@ -221,6 +221,16 @@ class Command(BaseCommand):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PeriodicTask.objects.get_or_create(
|
||||||
|
name="Generate I-D bibxml files",
|
||||||
|
task="ietf.doc.tasks.generate_draft_bibxml_files_task",
|
||||||
|
defaults=dict(
|
||||||
|
enabled=False,
|
||||||
|
crontab=self.crontabs["hourly"],
|
||||||
|
description="Generate draft bibxml files for the last week's drafts",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
def show_tasks(self):
|
def show_tasks(self):
|
||||||
for label, crontab in self.crontabs.items():
|
for label, crontab in self.crontabs.items():
|
||||||
tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(
|
tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(
|
||||||
|
|
Loading…
Reference in a new issue