Added exception logging for ietf/bin/rfc-editor-queue-updates. Changed the logging in the ietf/bin/rfc-editor-* scripts to use ietf.utils.log.log(). Reordered some imports.

- Legacy-Id: 15318
This commit is contained in:
Henrik Levkowetz 2018-07-12 12:52:58 +00:00
parent 85c24b8450
commit d6565f0450
3 changed files with 152 additions and 143 deletions

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python
import os, sys, datetime
import syslog
import traceback
# boilerplate
@ -13,8 +12,6 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
if os.path.exists(virtualenv_activation):
execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
import django
django.setup()
@ -22,6 +19,13 @@ from django.conf import settings
from optparse import OptionParser
from django.core.mail import mail_admins
from ietf.doc.utils import rebuild_reference_relations
from ietf.utils.log import log
from ietf.utils.pipe import pipe
import ietf.sync.rfceditor
parser = OptionParser()
parser.add_option("-d", dest="skip_date",
help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD")
@ -32,17 +36,13 @@ skip_date = datetime.date.today() - datetime.timedelta(days=365)
if options.skip_date:
skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date()
from ietf.utils.pipe import pipe
from ietf.doc.utils import rebuild_reference_relations
import ietf.sync.rfceditor
syslog.syslog("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
log("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
response = ietf.sync.rfceditor.fetch_index_xml(settings.RFC_EDITOR_INDEX_URL)
data = ietf.sync.rfceditor.parse_index(response)
if len(data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS:
syslog.syslog("Not enough results, only %s" % len(data))
log("Not enough results, only %s" % len(data))
sys.exit(1)
new_rfcs = []
@ -51,8 +51,7 @@ for changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_inde
new_rfcs.append(doc)
for c in changes:
syslog.syslog("%s: %s" % (doc.name, c))
print "%s: %s" % (doc.name, c)
log("%s: %s" % (doc.name, c))
sys.exit(0)
@ -67,12 +66,12 @@ if newpid == 0:
pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH))
for rfc in new_rfcs:
rebuild_reference_relations(rfc)
syslog.syslog("Updated references for %s"%rfc.canonical_name())
log("Updated references for %s"%rfc.canonical_name())
except:
subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1])
msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2]))
mail_admins(subject,msg,fail_silently=True)
syslog.syslog(subject)
log(subject)
os._exit(0)
else:
sys.exit(0)

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python
import os, sys, re, json, datetime
import syslog
import os, sys
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
@ -12,28 +11,28 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
if os.path.exists(virtualenv_activation):
execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
import django
django.setup()
from django.conf import settings
from ietf.sync.rfceditor import *
syslog.syslog("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
from ietf.sync.rfceditor import fetch_queue_xml, parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue
from ietf.utils.log import log
log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
response = fetch_queue_xml(settings.RFC_EDITOR_QUEUE_URL)
drafts, warnings = parse_queue(response)
for w in warnings:
syslog.syslog(u"WARNING: %s" % w)
log(u"Warning: %s" % w)
if len(drafts) < MIN_QUEUE_RESULTS:
syslog.syslog("Not enough results, only %s" % len(drafts))
log("Not enough results, only %s" % len(drafts))
sys.exit(1)
changed, warnings = update_drafts_from_queue(drafts)
for w in warnings:
syslog.syslog(u"WARNING: %s" % w)
log(u"Warning: %s" % w)
for c in changed:
syslog.syslog(u"Updated %s" % c)
log(u"Updated %s" % c)

View file

@ -8,6 +8,8 @@ from xml.dom import pulldom, Node
from django.conf import settings
import debug # pyflakes:ignore
from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName,
DocTagName, DocTypeName, RelatedDocument )
from ietf.doc.expire import move_draft_files_to_archive
@ -45,68 +47,73 @@ def parse_queue(response):
stream = None
for event, node in events:
if event == pulldom.START_ELEMENT and node.tagName == "entry":
events.expandNode(node)
node.normalize()
draft_name = get_child_text(node, "draft").strip()
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
date_received = get_child_text(node, "date-received")
try:
if event == pulldom.START_ELEMENT and node.tagName == "entry":
events.expandNode(node)
node.normalize()
draft_name = get_child_text(node, "draft").strip()
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
date_received = get_child_text(node, "date-received")
state = ""
tags = []
missref_generation = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
state = child.firstChild.data
# state has some extra annotations encoded, parse
# them out
if '*R' in state:
tags.append("ref")
state = state.replace("*R", "")
if '*A' in state:
tags.append("iana")
state = state.replace("*A", "")
m = re.search(r"\(([0-9]+)G\)", state)
if m:
missref_generation = m.group(1)
state = state.replace("(%sG)" % missref_generation, "")
# AUTH48 link
auth48 = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
auth48 = child.firstChild.data
# cluster link (if it ever gets implemented)
cluster = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
cluster = child.firstChild.data
refs = []
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
ref_name = get_child_text(child, "ref-name")
ref_state = get_child_text(child, "ref-state")
in_queue = ref_state.startswith("IN-QUEUE")
refs.append((ref_name, ref_state, in_queue))
drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
elif event == pulldom.START_ELEMENT and node.tagName == "section":
name = node.getAttribute('name')
if name.startswith("IETF"):
stream = "ietf"
elif name.startswith("IAB"):
stream = "iab"
elif name.startswith("IRTF"):
stream = "irtf"
elif name.startswith("INDEPENDENT"):
stream = "ise"
else:
stream = None
warnings.append("unrecognized section " + name)
except Exception as e:
log("Exception when processing an RFC queue entry: %s" % e)
log("node: %s" % node)
raise
state = ""
tags = []
missref_generation = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
state = child.firstChild.data
# state has some extra annotations encoded, parse
# them out
if '*R' in state:
tags.append("ref")
state = state.replace("*R", "")
if '*A' in state:
tags.append("iana")
state = state.replace("*A", "")
m = re.search(r"\(([0-9]+)G\)", state)
if m:
missref_generation = m.group(1)
state = state.replace("(%sG)" % missref_generation, "")
# AUTH48 link
auth48 = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
auth48 = child.firstChild.data
# cluster link (if it ever gets implemented)
cluster = ""
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
cluster = child.firstChild.data
refs = []
for child in node.childNodes:
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
ref_name = get_child_text(child, "ref-name")
ref_state = get_child_text(child, "ref-state")
in_queue = ref_state.startswith("IN-QUEUE")
refs.append((ref_name, ref_state, in_queue))
drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
elif event == pulldom.START_ELEMENT and node.tagName == "section":
name = node.getAttribute('name')
if name.startswith("IETF"):
stream = "ietf"
elif name.startswith("IAB"):
stream = "iab"
elif name.startswith("IRTF"):
stream = "irtf"
elif name.startswith("INDEPENDENT"):
stream = "ise"
else:
stream = None
warnings.append("unrecognized section " + name)
return drafts, warnings
def update_drafts_from_queue(drafts):
@ -243,67 +250,71 @@ def parse_index(response):
data = []
events = pulldom.parse(response)
for event, node in events:
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
events.expandNode(node)
node.normalize()
bcpid = normalize_std_name(get_child_text(node, "doc-id"))
doclist = extract_doc_list(node, "is-also")
for docid in doclist:
if docid in also_list:
also_list[docid].append(bcpid)
try:
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
events.expandNode(node)
node.normalize()
bcpid = normalize_std_name(get_child_text(node, "doc-id"))
doclist = extract_doc_list(node, "is-also")
for docid in doclist:
if docid in also_list:
also_list[docid].append(bcpid)
else:
also_list[docid] = [bcpid]
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
events.expandNode(node)
node.normalize()
rfc_number = int(get_child_text(node, "doc-id")[3:])
title = get_child_text(node, "title")
authors = []
for author in node.getElementsByTagName("author"):
authors.append(get_child_text(author, "name"))
d = node.getElementsByTagName("date")[0]
year = int(get_child_text(d, "year"))
month = get_child_text(d, "month")
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
rfc_published_date = datetime.date(year, month, 1)
current_status = get_child_text(node, "current-status").title()
updates = extract_doc_list(node, "updates")
updated_by = extract_doc_list(node, "updated-by")
obsoletes = extract_doc_list(node, "obsoletes")
obsoleted_by = extract_doc_list(node, "obsoleted-by")
stream = get_child_text(node, "stream")
wg = get_child_text(node, "wg_acronym")
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
wg = None
l = []
pages = ""
for fmt in node.getElementsByTagName("format"):
l.append(get_child_text(fmt, "file-format"))
if get_child_text(fmt, "file-format") == "ASCII":
pages = get_child_text(fmt, "page-count")
file_formats = (",".join(l)).lower()
abstract = ""
for abstract in node.getElementsByTagName("abstract"):
abstract = get_child_text(abstract, "p")
draft = get_child_text(node, "draft")
if draft and re.search("-\d\d$", draft):
draft = draft[0:-3]
if len(node.getElementsByTagName("errata-url")) > 0:
has_errata = 1
else:
also_list[docid] = [bcpid]
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
events.expandNode(node)
node.normalize()
rfc_number = int(get_child_text(node, "doc-id")[3:])
title = get_child_text(node, "title")
authors = []
for author in node.getElementsByTagName("author"):
authors.append(get_child_text(author, "name"))
d = node.getElementsByTagName("date")[0]
year = int(get_child_text(d, "year"))
month = get_child_text(d, "month")
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
rfc_published_date = datetime.date(year, month, 1)
current_status = get_child_text(node, "current-status").title()
updates = extract_doc_list(node, "updates")
updated_by = extract_doc_list(node, "updated-by")
obsoletes = extract_doc_list(node, "obsoletes")
obsoleted_by = extract_doc_list(node, "obsoleted-by")
stream = get_child_text(node, "stream")
wg = get_child_text(node, "wg_acronym")
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
wg = None
l = []
pages = ""
for fmt in node.getElementsByTagName("format"):
l.append(get_child_text(fmt, "file-format"))
if get_child_text(fmt, "file-format") == "ASCII":
pages = get_child_text(fmt, "page-count")
file_formats = (",".join(l)).lower()
abstract = ""
for abstract in node.getElementsByTagName("abstract"):
abstract = get_child_text(abstract, "p")
draft = get_child_text(node, "draft")
if draft and re.search("-\d\d$", draft):
draft = draft[0:-3]
if len(node.getElementsByTagName("errata-url")) > 0:
has_errata = 1
else:
has_errata = 0
data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
has_errata = 0
data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
except Exception as e:
log("Exception when processing an RFC index entry: %s" % e)
log("node: %s" % node)
raise
for d in data:
k = "RFC%04d" % d[0]
if k in also_list: