datatracker/ietf/bin/rfc-editor-index-updates
2023-08-18 10:59:55 -05:00

111 lines
3.3 KiB
Python
Executable file

#!/usr/bin/env python
# This script requires that the proper virtual python environment has been
# invoked before start
import datetime
import io
import os
import requests
import sys
import syslog
import traceback
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings"
# Before invoking django
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
import django
django.setup()
from django.conf import settings
from optparse import OptionParser
from django.core.mail import mail_admins
from ietf.doc.utils import rebuild_reference_relations
from ietf.utils.log import log
from ietf.utils.pipe import pipe
from ietf.utils.timezone import date_today
import ietf.sync.rfceditor
parser = OptionParser()
parser.add_option("-d", dest="skip_date",
help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD")
options, args = parser.parse_args()
skip_date = date_today() - datetime.timedelta(days=365)
if options.skip_date:
skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date()
log("Updating document metadata from RFC index going back to %s, from %s" % (skip_date, settings.RFC_EDITOR_INDEX_URL))
try:
response = requests.get(
settings.RFC_EDITOR_INDEX_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log(f'GET request timed out retrieving RFC editor index: {exc}')
sys.exit(1)
rfc_index_xml = response.text
index_data = ietf.sync.rfceditor.parse_index(io.StringIO(rfc_index_xml))
try:
response = requests.get(
settings.RFC_EDITOR_ERRATA_JSON_URL,
timeout=30, # seconds
)
except requests.Timeout as exc:
log(f'GET request timed out retrieving RFC editor errata: {exc}')
sys.exit(1)
errata_data = response.json()
if len(index_data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS:
log("Not enough index entries, only %s" % len(index_data))
sys.exit(1)
if len(errata_data) < ietf.sync.rfceditor.MIN_ERRATA_RESULTS:
log("Not enough errata entries, only %s" % len(errata_data))
sys.exit(1)
new_rfcs = []
for changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_index(index_data, errata_data, skip_older_than_date=skip_date):
if rfc_published:
new_rfcs.append(doc)
for c in changes:
log("RFC%s, %s: %s" % (doc.rfc_number, doc.name, c))
sys.exit(0)
# This can be called while processing a notifying POST from the RFC Editor
# Spawn a child to sync the rfcs and calculate new reference relationships
# so that the POST
newpid = os.fork()
if newpid == 0:
try:
pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH))
for rfc in new_rfcs:
rebuild_reference_relations(rfc)
log("Updated references for %s"%rfc.name)
except:
subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1])
msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2]))
mail_admins(subject,msg,fail_silently=True)
log(subject)
os._exit(0)
else:
sys.exit(0)