From 46a00acefc249ac879a4c1cb876c9a1b5fd2ed98 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 May 2024 20:56:14 -0300 Subject: [PATCH] refactor: sync to RFC Editor queue via celery (#7415) * feat: rfc_editor_queue_updates_task * refactor: use rfc_editor_queue_updates_task() * chore: remove now-unused scripts * test: test new task * chore: de-lint --- ietf/bin/rfc-editor-index-updates | 110 ------------------------------ ietf/bin/rfc-editor-queue-updates | 44 ------------ ietf/sync/tasks.py | 28 ++++++++ ietf/sync/tests.py | 30 ++++++++ ietf/sync/views.py | 19 +----- 5 files changed, 61 insertions(+), 170 deletions(-) delete mode 100755 ietf/bin/rfc-editor-index-updates delete mode 100755 ietf/bin/rfc-editor-queue-updates diff --git a/ietf/bin/rfc-editor-index-updates b/ietf/bin/rfc-editor-index-updates deleted file mode 100755 index c3e8f1f46..000000000 --- a/ietf/bin/rfc-editor-index-updates +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python - -# This script requires that the proper virtual python environment has been -# invoked before start - -import datetime -import io -import os -import requests -import sys -import syslog -import traceback - -# boilerplate -basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) -sys.path = [ basedir ] + sys.path -os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings" - -# Before invoking django -syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER) - -import django -django.setup() - -from django.conf import settings -from optparse import OptionParser -from django.core.mail import mail_admins - -from ietf.doc.utils import rebuild_reference_relations -from ietf.utils.log import log -from ietf.utils.pipe import pipe -from ietf.utils.timezone import date_today - -import ietf.sync.rfceditor - - -parser = OptionParser() -parser.add_option("-d", dest="skip_date", - help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD") - -options, args = parser.parse_args() - -skip_date = date_today() - datetime.timedelta(days=365) -if options.skip_date: - skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date() - -log("Updating document metadata from RFC index going back to %s, from %s" % (skip_date, settings.RFC_EDITOR_INDEX_URL)) - - -try: - response = requests.get( - settings.RFC_EDITOR_INDEX_URL, - timeout=30, # seconds - ) -except requests.Timeout as exc: - log(f'GET request timed out retrieving RFC editor index: {exc}') - sys.exit(1) - - -rfc_index_xml = response.text -index_data = ietf.sync.rfceditor.parse_index(io.StringIO(rfc_index_xml)) - -try: - response = requests.get( - settings.RFC_EDITOR_ERRATA_JSON_URL, - timeout=30, # seconds - ) -except requests.Timeout as exc: - log(f'GET request timed out retrieving RFC editor errata: {exc}') - sys.exit(1) -errata_data = response.json() - -if len(index_data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS: - log("Not enough index entries, only %s" % len(index_data)) - sys.exit(1) - -if len(errata_data) < ietf.sync.rfceditor.MIN_ERRATA_RESULTS: - log("Not enough errata entries, only %s" % len(errata_data)) - sys.exit(1) - -new_rfcs = [] -for rfc_number, changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_index(index_data, errata_data, skip_older_than_date=skip_date): - if rfc_published: - new_rfcs.append(doc) - - for c in changes: - log("RFC%s, %s: %s" % (rfc_number, doc.name, c)) - -sys.exit(0) - -# This can be called while processing a notifying POST from the RFC Editor -# Spawn a child to sync the rfcs and calculate new reference relationships -# so that the POST - -newpid = os.fork() - -if newpid == 0: - try: - pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH)) - for rfc in new_rfcs: - rebuild_reference_relations(rfc) - log("Updated references for %s"%rfc.name) - except: - subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1]) - msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2])) - mail_admins(subject,msg,fail_silently=True) - log(subject) - os._exit(0) -else: - sys.exit(0) diff --git a/ietf/bin/rfc-editor-queue-updates b/ietf/bin/rfc-editor-queue-updates deleted file mode 100755 index b441e50eb..000000000 --- a/ietf/bin/rfc-editor-queue-updates +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -import io -import os -import requests -import sys - -# boilerplate -basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) -sys.path = [ basedir ] + sys.path -os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings" - -import django -django.setup() - -from django.conf import settings - -from ietf.sync.rfceditor import parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue -from ietf.utils.log import log - -log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL) - -try: - response = requests.get( - settings.RFC_EDITOR_QUEUE_URL, - timeout=30, # seconds - ) -except requests.Timeout as exc: - log(f'GET request timed out retrieving RFC editor queue: {exc}') - sys.exit(1) -drafts, warnings = parse_queue(io.StringIO(response.text)) -for w in warnings: - log(u"Warning: %s" % w) - -if len(drafts) < MIN_QUEUE_RESULTS: - log("Not enough results, only %s" % len(drafts)) - sys.exit(1) - -changed, warnings = update_drafts_from_queue(drafts) -for w in warnings: - log(u"Warning: %s" % w) - -for c in changed: - log(u"Updated %s" % c) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index bc1218601..53e23d791 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -13,6 +13,7 @@ from django.utils import timezone from ietf.sync import iana from ietf.sync import rfceditor +from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue from ietf.utils import log from ietf.utils.timezone import date_today @@ -70,6 +71,33 @@ def rfc_editor_index_update_task(full_index=False): log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c)) +@shared_task +def rfc_editor_queue_updates_task(): + log.log(f"Updating RFC Editor queue states from {settings.RFC_EDITOR_QUEUE_URL}") + try: + response = requests.get( + settings.RFC_EDITOR_QUEUE_URL, + timeout=30, # seconds + ) + except requests.Timeout as exc: + log.log(f"GET request timed out retrieving RFC editor queue: {exc}") + return # failed + drafts, warnings = parse_queue(io.StringIO(response.text)) + for w in warnings: + log.log(f"Warning: {w}") + + if len(drafts) < MIN_QUEUE_RESULTS: + log.log("Not enough results, only %s" % len(drafts)) + return # failed + + changed, warnings = update_drafts_from_queue(drafts) + for w in warnings: + log.log(f"Warning: {w}") + + for c in changed: + log.log(f"Updated {c}") + + @shared_task def iana_changes_update_task(): # compensate to avoid we ask for something that happened now and then diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index db5619095..b0cdf863f 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -886,6 +886,36 @@ class TaskTests(TestCase): tasks.rfc_editor_index_update_task(full_index=False) self.assertFalse(update_docs_mock.called) + @override_settings(RFC_EDITOR_QUEUE_URL="https://rfc-editor.example.com/queue/") + @mock.patch("ietf.sync.tasks.update_drafts_from_queue") + @mock.patch("ietf.sync.tasks.parse_queue") + def test_rfc_editor_queue_updates_task(self, mock_parse, mock_update): + # test a request timeout + self.requests_mock.get("https://rfc-editor.example.com/queue/", exc=requests.exceptions.Timeout) + tasks.rfc_editor_queue_updates_task() + self.assertFalse(mock_parse.called) + self.assertFalse(mock_update.called) + + # now return a value rather than an exception + self.requests_mock.get("https://rfc-editor.example.com/queue/", text="the response") + + # mock returning < MIN_QUEUE_RESULTS values - treated as an error, so no update takes place + mock_parse.return_value = ([n for n in range(rfceditor.MIN_QUEUE_RESULTS - 1)], ["a warning"]) + tasks.rfc_editor_queue_updates_task() + self.assertEqual(mock_parse.call_count, 1) + self.assertEqual(mock_parse.call_args[0][0].read(), "the response") + self.assertFalse(mock_update.called) + mock_parse.reset_mock() + + # mock returning +. MIN_QUEUE_RESULTS - should succeed + mock_parse.return_value = ([n for n in range(rfceditor.MIN_QUEUE_RESULTS)], ["a warning"]) + mock_update.return_value = ([1,2,3], ["another warning"]) + tasks.rfc_editor_queue_updates_task() + self.assertEqual(mock_parse.call_count, 1) + self.assertEqual(mock_parse.call_args[0][0].read(), "the response") + self.assertEqual(mock_update.call_count, 1) + self.assertEqual(mock_update.call_args, mock.call([n for n in range(rfceditor.MIN_QUEUE_RESULTS)])) + @override_settings(IANA_SYNC_CHANGES_URL="https://iana.example.com/sync/") @mock.patch("ietf.sync.tasks.iana.update_history_with_changes") @mock.patch("ietf.sync.tasks.iana.parse_changes_json") diff --git a/ietf/sync/views.py b/ietf/sync/views.py index 788e982f7..da407e1ef 100644 --- a/ietf/sync/views.py +++ b/ietf/sync/views.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import datetime -import subprocess import os import json @@ -79,30 +78,18 @@ def notify(request, org, notification): raise Http404 if request.method == "POST": - def runscript(name): - python = os.path.join(os.path.dirname(settings.BASE_DIR), "env", "bin", "python") - cmd = [python, os.path.join(SYNC_BIN_PATH, name)] - cmdstring = " ".join(cmd) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - out = out.decode('utf-8') - err = err.decode('utf-8') - if p.returncode: - log("Subprocess error %s when running '%s': %s %s" % (p.returncode, cmd, err, out)) - raise subprocess.CalledProcessError(p.returncode, cmdstring, "\n".join([err, out])) - if notification == "index": log("Queuing RFC Editor index sync from notify view POST") tasks.rfc_editor_index_update_task.delay() + elif notification == "queue": + log("Queuing RFC Editor queue sync from notify view POST") + tasks.rfc_editor_queue_updates_task.delay() elif notification == "changes": log("Queuing IANA changes sync from notify view POST") tasks.iana_changes_update_task.delay() elif notification == "protocols": log("Queuing IANA protocols sync from notify view POST") tasks.iana_protocols_update_task.delay() - elif notification == "queue": - log("Running sync script from notify view POST") - runscript("rfc-editor-queue-updates") return HttpResponse("OK", content_type="text/plain; charset=%s"%settings.DEFAULT_CHARSET)