refactor: sync to RFC Editor queue via celery (#7415)

* feat: rfc_editor_queue_updates_task

* refactor: use rfc_editor_queue_updates_task()

* chore: remove now-unused scripts

* test: test new task

* chore: de-lint
This commit is contained in:
Jennifer Richards 2024-05-14 20:56:14 -03:00 committed by GitHub
parent a4e0354090
commit 46a00acefc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 61 additions and 170 deletions

View file

@ -1,110 +0,0 @@
#!/usr/bin/env python
# This script requires that the proper virtual python environment has been
# invoked before start
import datetime
import io
import os
import requests
import sys
import syslog
import traceback
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings"
# Before invoking django
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
import django
django.setup()
from django.conf import settings
from optparse import OptionParser
from django.core.mail import mail_admins
from ietf.doc.utils import rebuild_reference_relations
from ietf.utils.log import log
from ietf.utils.pipe import pipe
from ietf.utils.timezone import date_today
import ietf.sync.rfceditor
parser = OptionParser()
parser.add_option("-d", dest="skip_date",
help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD")
options, args = parser.parse_args()
skip_date = date_today() - datetime.timedelta(days=365)
if options.skip_date:
skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date()
log("Updating document metadata from RFC index going back to %s, from %s" % (skip_date, settings.RFC_EDITOR_INDEX_URL))
try:
response = requests.get(
settings.RFC_EDITOR_INDEX_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log(f'GET request timed out retrieving RFC editor index: {exc}')
sys.exit(1)
rfc_index_xml = response.text
index_data = ietf.sync.rfceditor.parse_index(io.StringIO(rfc_index_xml))
try:
response = requests.get(
settings.RFC_EDITOR_ERRATA_JSON_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log(f'GET request timed out retrieving RFC editor errata: {exc}')
sys.exit(1)
errata_data = response.json()
if len(index_data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS:
log("Not enough index entries, only %s" % len(index_data))
sys.exit(1)
if len(errata_data) < ietf.sync.rfceditor.MIN_ERRATA_RESULTS:
log("Not enough errata entries, only %s" % len(errata_data))
sys.exit(1)
new_rfcs = []
for rfc_number, changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_index(index_data, errata_data, skip_older_than_date=skip_date):
if rfc_published:
new_rfcs.append(doc)
for c in changes:
log("RFC%s, %s: %s" % (rfc_number, doc.name, c))
sys.exit(0)
# This can be called while processing a notifying POST from the RFC Editor
# Spawn a child to sync the rfcs and calculate new reference relationships
# so that the POST
newpid = os.fork()
if newpid == 0:
try:
pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH))
for rfc in new_rfcs:
rebuild_reference_relations(rfc)
log("Updated references for %s"%rfc.name)
except:
subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1])
msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2]))
mail_admins(subject,msg,fail_silently=True)
log(subject)
os._exit(0)
else:
sys.exit(0)

View file

@ -1,44 +0,0 @@
#!/usr/bin/env python
import io
import os
import requests
import sys
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings"
import django
django.setup()
from django.conf import settings
from ietf.sync.rfceditor import parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue
from ietf.utils.log import log
log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
try:
response = requests.get(
settings.RFC_EDITOR_QUEUE_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log(f'GET request timed out retrieving RFC editor queue: {exc}')
sys.exit(1)
drafts, warnings = parse_queue(io.StringIO(response.text))
for w in warnings:
log(u"Warning: %s" % w)
if len(drafts) < MIN_QUEUE_RESULTS:
log("Not enough results, only %s" % len(drafts))
sys.exit(1)
changed, warnings = update_drafts_from_queue(drafts)
for w in warnings:
log(u"Warning: %s" % w)
for c in changed:
log(u"Updated %s" % c)

View file

@ -13,6 +13,7 @@ from django.utils import timezone
from ietf.sync import iana
from ietf.sync import rfceditor
from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue
from ietf.utils import log
from ietf.utils.timezone import date_today
@ -70,6 +71,33 @@ def rfc_editor_index_update_task(full_index=False):
log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c))
@shared_task
def rfc_editor_queue_updates_task():
log.log(f"Updating RFC Editor queue states from {settings.RFC_EDITOR_QUEUE_URL}")
try:
response = requests.get(
settings.RFC_EDITOR_QUEUE_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log.log(f"GET request timed out retrieving RFC editor queue: {exc}")
return # failed
drafts, warnings = parse_queue(io.StringIO(response.text))
for w in warnings:
log.log(f"Warning: {w}")
if len(drafts) < MIN_QUEUE_RESULTS:
log.log("Not enough results, only %s" % len(drafts))
return # failed
changed, warnings = update_drafts_from_queue(drafts)
for w in warnings:
log.log(f"Warning: {w}")
for c in changed:
log.log(f"Updated {c}")
@shared_task
def iana_changes_update_task():
# compensate to avoid we ask for something that happened now and then

View file

@ -886,6 +886,36 @@ class TaskTests(TestCase):
tasks.rfc_editor_index_update_task(full_index=False)
self.assertFalse(update_docs_mock.called)
@override_settings(RFC_EDITOR_QUEUE_URL="https://rfc-editor.example.com/queue/")
@mock.patch("ietf.sync.tasks.update_drafts_from_queue")
@mock.patch("ietf.sync.tasks.parse_queue")
def test_rfc_editor_queue_updates_task(self, mock_parse, mock_update):
# test a request timeout
self.requests_mock.get("https://rfc-editor.example.com/queue/", exc=requests.exceptions.Timeout)
tasks.rfc_editor_queue_updates_task()
self.assertFalse(mock_parse.called)
self.assertFalse(mock_update.called)
# now return a value rather than an exception
self.requests_mock.get("https://rfc-editor.example.com/queue/", text="the response")
# mock returning < MIN_QUEUE_RESULTS values - treated as an error, so no update takes place
mock_parse.return_value = ([n for n in range(rfceditor.MIN_QUEUE_RESULTS - 1)], ["a warning"])
tasks.rfc_editor_queue_updates_task()
self.assertEqual(mock_parse.call_count, 1)
self.assertEqual(mock_parse.call_args[0][0].read(), "the response")
self.assertFalse(mock_update.called)
mock_parse.reset_mock()
# mock returning +. MIN_QUEUE_RESULTS - should succeed
mock_parse.return_value = ([n for n in range(rfceditor.MIN_QUEUE_RESULTS)], ["a warning"])
mock_update.return_value = ([1,2,3], ["another warning"])
tasks.rfc_editor_queue_updates_task()
self.assertEqual(mock_parse.call_count, 1)
self.assertEqual(mock_parse.call_args[0][0].read(), "the response")
self.assertEqual(mock_update.call_count, 1)
self.assertEqual(mock_update.call_args, mock.call([n for n in range(rfceditor.MIN_QUEUE_RESULTS)]))
@override_settings(IANA_SYNC_CHANGES_URL="https://iana.example.com/sync/")
@mock.patch("ietf.sync.tasks.iana.update_history_with_changes")
@mock.patch("ietf.sync.tasks.iana.parse_changes_json")

View file

@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
import datetime
import subprocess
import os
import json
@ -79,30 +78,18 @@ def notify(request, org, notification):
raise Http404
if request.method == "POST":
def runscript(name):
python = os.path.join(os.path.dirname(settings.BASE_DIR), "env", "bin", "python")
cmd = [python, os.path.join(SYNC_BIN_PATH, name)]
cmdstring = " ".join(cmd)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
out = out.decode('utf-8')
err = err.decode('utf-8')
if p.returncode:
log("Subprocess error %s when running '%s': %s %s" % (p.returncode, cmd, err, out))
raise subprocess.CalledProcessError(p.returncode, cmdstring, "\n".join([err, out]))
if notification == "index":
log("Queuing RFC Editor index sync from notify view POST")
tasks.rfc_editor_index_update_task.delay()
elif notification == "queue":
log("Queuing RFC Editor queue sync from notify view POST")
tasks.rfc_editor_queue_updates_task.delay()
elif notification == "changes":
log("Queuing IANA changes sync from notify view POST")
tasks.iana_changes_update_task.delay()
elif notification == "protocols":
log("Queuing IANA protocols sync from notify view POST")
tasks.iana_protocols_update_task.delay()
elif notification == "queue":
log("Running sync script from notify view POST")
runscript("rfc-editor-queue-updates")
return HttpResponse("OK", content_type="text/plain; charset=%s"%settings.DEFAULT_CHARSET)