diff --git a/bin/hourly b/bin/hourly index 81638fa54..f6d5048ba 100755 --- a/bin/hourly +++ b/bin/hourly @@ -5,33 +5,15 @@ # This script is expected to be triggered by cron from # /etc/cron.d/datatracker export LANG=en_US.UTF-8 -export PYTHONIOENCODING=utf-8 # Make sure we stop if something goes wrong: program=${0##*/} trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR -DTDIR=/a/www/ietf-datatracker/web -cd $DTDIR/ - -# Set up the virtual environment -source $DTDIR/env/bin/activate - logger -p user.info -t cron "Running $DTDIR/bin/hourly" -# Generate some static files -ID=/a/ietfdata/doc/draft/repository -DERIVED=/a/ietfdata/derived -DOWNLOAD=/a/www/www6s/download - CHARTER=/a/www/ietf-ftp/charter wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt -# Regenerate the last week of bibxml-ids -$DTDIR/ietf/manage.py generate_draft_bibxml_files - -# Create and update group wikis -#$DTDIR/ietf/manage.py create_group_wikis - # exit 0 diff --git a/ietf/doc/management/commands/generate_draft_bibxml_files.py b/ietf/doc/management/commands/generate_draft_bibxml_files.py deleted file mode 100644 index eda67c401..000000000 --- a/ietf/doc/management/commands/generate_draft_bibxml_files.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright The IETF Trust 2012-2020, All Rights Reserved -# -*- coding: utf-8 -*- - - -import datetime -import io -import os -import re -import sys - -from django.conf import settings -from django.core.management.base import BaseCommand -from django.utils import timezone - -import debug # pyflakes:ignore - -from ietf.doc.models import NewRevisionDocEvent -from ietf.doc.utils import bibxml_for_draft - -DEFAULT_DAYS = 7 - -class Command(BaseCommand): - help = ('Generate draft bibxml files for xml2rfc references, placing them in the ' - 'directory configured in settings.BIBXML_BASE_PATH: %s. ' - 'By default, generate files as needed for new Internet-Draft revisions from the ' - 'last %s days.' % (settings.BIBXML_BASE_PATH, DEFAULT_DAYS)) - - def add_arguments(self, parser): - parser.add_argument('--all', action='store_true', default=False, help="Process all documents, not only recent submissions") - parser.add_argument('--days', type=int, default=DEFAULT_DAYS, help="Look submissions from the last DAYS days, instead of %s" % DEFAULT_DAYS) - - def say(self, msg): - if self.verbosity > 0: - sys.stdout.write(msg) - sys.stdout.write('\n') - - def note(self, msg): - if self.verbosity > 1: - sys.stdout.write(msg) - sys.stdout.write('\n') - - def mutter(self, msg): - if self.verbosity > 2: - sys.stdout.write(msg) - sys.stdout.write('\n') - - def write(self, fn, new): - # normalize new - new = re.sub(r'\r\n?', r'\n', new) - try: - with io.open(fn, encoding='utf-8') as f: - old = f.read() - except IOError: - old = "" - if old.strip() != new.strip(): - self.note('Writing %s' % os.path.basename(fn)) - with io.open(fn, "w", encoding='utf-8') as f: - f.write(new) - - def handle(self, *args, **options): - self.verbosity = options.get("verbosity", 1) - process_all = options.get("all") - days = options.get("days") - # - bibxmldir = os.path.join(settings.BIBXML_BASE_PATH, 'bibxml-ids') - if not os.path.exists(bibxmldir): - os.makedirs(bibxmldir) - # - if process_all: - doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft') - else: - start = timezone.now() - datetime.timedelta(days=days) - doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft', time__gte=start) - doc_events = doc_events.order_by('time') - - for e in doc_events: - self.mutter('%s %s' % (e.time, e.doc.name)) - try: - doc = e.doc - bibxml = bibxml_for_draft(doc, e.rev) - ref_rev_file_name = os.path.join(bibxmldir, 'reference.I-D.%s-%s.xml' % (doc.name, e.rev)) - self.write(ref_rev_file_name, bibxml) - except Exception as ee: - sys.stderr.write('\n%s-%s: %s\n' % (doc.name, doc.rev, ee)) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index d1cf6656a..209db035a 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -9,6 +9,7 @@ from celery import shared_task from pathlib import Path from django.conf import settings +from django.utils import timezone from ietf.utils import log from ietf.utils.timezone import datetime_today @@ -24,8 +25,13 @@ from .expire import ( send_expire_warning_for_draft, ) from .lastcall import get_expired_last_calls, expire_last_call -from .models import Document -from .utils import generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted +from .models import Document, NewRevisionDocEvent +from .utils import ( + generate_idnits2_rfc_status, + generate_idnits2_rfcs_obsoleted, + update_or_create_draft_bibxml_file, + ensure_draft_bibxml_path_exists, +) @shared_task @@ -90,3 +96,24 @@ def generate_idnits2_rfcs_obsoleted_task(): outpath.write_text(blob, encoding="utf8") except Exception as e: log.log(f"failed to write idnits2-rfcs-obsoleted: {e}") + + +@shared_task +def generate_draft_bibxml_files_task(days=7, process_all=False): + """Generate bibxml files for recently updated docs + + If process_all is False (the default), processes only docs with new revisions + in the last specified number of days. + """ + ensure_draft_bibxml_path_exists() + doc_events = NewRevisionDocEvent.objects.filter( + type="new_revision", + doc__type_id="draft", + ).order_by("time") + if not process_all: + doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days)) + for event in doc_events: + try: + update_or_create_draft_bibxml_file(event.doc, event.rev) + except Exception as err: + log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}") diff --git a/ietf/doc/tests_tasks.py b/ietf/doc/tests_tasks.py index 3eeae2b34..51a8556e6 100644 --- a/ietf/doc/tests_tasks.py +++ b/ietf/doc/tests_tasks.py @@ -1,18 +1,21 @@ # Copyright The IETF Trust 2024, All Rights Reserved +import datetime import mock from pathlib import Path from django.conf import settings +from django.utils import timezone from ietf.utils.test_utils import TestCase from ietf.utils.timezone import datetime_today -from .factories import DocumentFactory -from .models import Document +from .factories import DocumentFactory, NewRevisionDocEventFactory +from .models import Document, NewRevisionDocEvent from .tasks import ( expire_ids_task, expire_last_calls_task, + generate_draft_bibxml_files_task, generate_idnits2_rfcs_obsoleted_task, generate_idnits2_rfc_status_task, notify_expirations_task, @@ -114,3 +117,86 @@ class TaskTests(TestCase): "dåtå".encode("utf8"), (Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(), ) + + @mock.patch("ietf.doc.tasks.ensure_draft_bibxml_path_exists") + @mock.patch("ietf.doc.tasks.update_or_create_draft_bibxml_file") + def test_generate_draft_bibxml_files_task(self, mock_create, mock_ensure_path): + now = timezone.now() + very_old_event = NewRevisionDocEventFactory( + time=now - datetime.timedelta(days=1000), rev="17" + ) + old_event = NewRevisionDocEventFactory( + time=now - datetime.timedelta(days=8), rev="03" + ) + young_event = NewRevisionDocEventFactory( + time=now - datetime.timedelta(days=6), rev="06" + ) + # a couple that should always be ignored + NewRevisionDocEventFactory( + time=now - datetime.timedelta(days=6), rev="09", doc__type_id="rfc" # not a draft + ) + NewRevisionDocEventFactory( + type="changed_document", # not a "new_revision" type + time=now - datetime.timedelta(days=6), + rev="09", + doc__type_id="rfc", + ) + + # Get rid of the "00" events created by the factories -- they're just noise for this test + NewRevisionDocEvent.objects.filter(rev="00").delete() + + # default args - look back 7 days + generate_draft_bibxml_files_task() + self.assertTrue(mock_ensure_path.called) + self.assertCountEqual( + mock_create.call_args_list, [mock.call(young_event.doc, young_event.rev)] + ) + mock_create.reset_mock() + mock_ensure_path.reset_mock() + + # shorter lookback + generate_draft_bibxml_files_task(days=5) + self.assertTrue(mock_ensure_path.called) + self.assertCountEqual(mock_create.call_args_list, []) + mock_create.reset_mock() + mock_ensure_path.reset_mock() + + # longer lookback + generate_draft_bibxml_files_task(days=9) + self.assertTrue(mock_ensure_path.called) + self.assertCountEqual( + mock_create.call_args_list, + [ + mock.call(young_event.doc, young_event.rev), + mock.call(old_event.doc, old_event.rev), + ], + ) + mock_create.reset_mock() + mock_ensure_path.reset_mock() + + # everything + generate_draft_bibxml_files_task(process_all=True) + self.assertTrue(mock_ensure_path.called) + self.assertCountEqual( + mock_create.call_args_list, + [ + mock.call(young_event.doc, young_event.rev), + mock.call(old_event.doc, old_event.rev), + mock.call(very_old_event.doc, very_old_event.rev), + ], + ) + mock_create.reset_mock() + mock_ensure_path.reset_mock() + + # everything should still be tried, even if there's an exception + mock_create.side_effect = RuntimeError + generate_draft_bibxml_files_task(process_all=True) + self.assertTrue(mock_ensure_path.called) + self.assertCountEqual( + mock_create.call_args_list, + [ + mock.call(young_event.doc, young_event.rev), + mock.call(old_event.doc, old_event.rev), + mock.call(very_old_event.doc, very_old_event.rev), + ], + ) diff --git a/ietf/doc/tests_utils.py b/ietf/doc/tests_utils.py index 248ac345a..f610fe3d7 100644 --- a/ietf/doc/tests_utils.py +++ b/ietf/doc/tests_utils.py @@ -2,8 +2,10 @@ import datetime import debug # pyflakes:ignore -from unittest.mock import patch +from pathlib import Path +from unittest.mock import call, patch +from django.conf import settings from django.db import IntegrityError from django.test.utils import override_settings from django.utils import timezone @@ -16,7 +18,8 @@ from ietf.person.models import Person from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors, - fuzzy_find_documents, rebuild_reference_relations, build_file_urls) + fuzzy_find_documents, rebuild_reference_relations, build_file_urls, + ensure_draft_bibxml_path_exists, update_or_create_draft_bibxml_file) from ietf.utils.draft import Draft, PlaintextDraft from ietf.utils.xmldraft import XMLDraft @@ -484,3 +487,49 @@ class RebuildReferenceRelationsTests(TestCase): (self.updated.name, 'updates'), ] ) + + +class DraftBibxmlTests(TestCase): + settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ["BIBXML_BASE_PATH"] + + def test_ensure_draft_bibxml_path_exists(self): + expected = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" + self.assertFalse(expected.exists()) + ensure_draft_bibxml_path_exists() + self.assertTrue(expected.is_dir()) # false if does not exist or is not dir + + @patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml") + def test_create_draft_bibxml_file(self, mock): + bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" + bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate + + doc = DocumentFactory() + ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26 + + update_or_create_draft_bibxml_file(doc, "26") + self.assertEqual(mock.call_count, 1) + self.assertEqual(mock.call_args, call(doc, "26")) + self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml") + + @patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml") + def test_update_draft_bibxml_file(self, mock): + bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" + bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate + + doc = DocumentFactory() + ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26 + ref_path.write_text("Old data") + + # should replace it + update_or_create_draft_bibxml_file(doc, "26") + self.assertEqual(mock.call_count, 1) + self.assertEqual(mock.call_args, call(doc, "26")) + self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml") + + # should leave it alone if it differs only by leading/trailing whitespace + mock.reset_mock() + mock.return_value = " \n This\nis\nmy\nbibxml " + update_or_create_draft_bibxml_file(doc, "26") + self.assertEqual(mock.call_count, 1) + self.assertEqual(mock.call_args, call(doc, "26")) + self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml") diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index ddf6e015e..c3d7552f2 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -1413,3 +1413,20 @@ def investigate_fragment(name_fragment): unverifiable_collections=unverifiable_collections, unexpected=unexpected, ) + + +def update_or_create_draft_bibxml_file(doc, rev): + log.assertion("doc.type_id == 'draft'") + normalized_bibxml = re.sub(r"\r\n?", r"\n", bibxml_for_draft(doc, rev)) + ref_rev_file_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" / f"reference.I-D.{doc.name}-{rev}.xml" + try: + existing_bibxml = ref_rev_file_path.read_text(encoding="utf8") + except IOError: + existing_bibxml = "" + if normalized_bibxml.strip() != existing_bibxml.strip(): + log.log(f"Writing {ref_rev_file_path}") + ref_rev_file_path.write_text(normalized_bibxml, encoding="utf8") + + +def ensure_draft_bibxml_path_exists(): + (Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True) diff --git a/ietf/utils/management/commands/periodic_tasks.py b/ietf/utils/management/commands/periodic_tasks.py index 52f4932ed..c1a409ed8 100644 --- a/ietf/utils/management/commands/periodic_tasks.py +++ b/ietf/utils/management/commands/periodic_tasks.py @@ -221,6 +221,16 @@ class Command(BaseCommand): ), ) + PeriodicTask.objects.get_or_create( + name="Generate I-D bibxml files", + task="ietf.doc.tasks.generate_draft_bibxml_files_task", + defaults=dict( + enabled=False, + crontab=self.crontabs["hourly"], + description="Generate draft bibxml files for the last week's drafts", + ), + ) + def show_tasks(self): for label, crontab in self.crontabs.items(): tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(