From d6565f045013624291cfd9eca04fa6324143a931 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Thu, 12 Jul 2018 12:52:58 +0000 Subject: [PATCH] Added exception logging for ietf/bin/rfc-editor-queue-updates. Changed the logging in the ietf/bin/rfc-editor-* scripts to use ietf.utils.log.log(). Reordered some imports. - Legacy-Id: 15318 --- ietf/bin/rfc-editor-index-updates | 25 ++- ietf/bin/rfc-editor-queue-updates | 19 ++- ietf/sync/rfceditor.py | 251 ++++++++++++++++-------------- 3 files changed, 152 insertions(+), 143 deletions(-) diff --git a/ietf/bin/rfc-editor-index-updates b/ietf/bin/rfc-editor-index-updates index 2af69219f..6354229da 100755 --- a/ietf/bin/rfc-editor-index-updates +++ b/ietf/bin/rfc-editor-index-updates @@ -1,7 +1,6 @@ #!/usr/bin/env python import os, sys, datetime -import syslog import traceback # boilerplate @@ -13,8 +12,6 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py") if os.path.exists(virtualenv_activation): execfile(virtualenv_activation, dict(__file__=virtualenv_activation)) -syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER) - import django django.setup() @@ -22,6 +19,13 @@ from django.conf import settings from optparse import OptionParser from django.core.mail import mail_admins +from ietf.doc.utils import rebuild_reference_relations +from ietf.utils.log import log +from ietf.utils.pipe import pipe + +import ietf.sync.rfceditor + + parser = OptionParser() parser.add_option("-d", dest="skip_date", help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD") @@ -32,17 +36,13 @@ skip_date = datetime.date.today() - datetime.timedelta(days=365) if options.skip_date: skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date() -from ietf.utils.pipe import pipe -from ietf.doc.utils import rebuild_reference_relations -import ietf.sync.rfceditor - -syslog.syslog("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL) +log("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL) response = ietf.sync.rfceditor.fetch_index_xml(settings.RFC_EDITOR_INDEX_URL) data = ietf.sync.rfceditor.parse_index(response) if len(data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS: - syslog.syslog("Not enough results, only %s" % len(data)) + log("Not enough results, only %s" % len(data)) sys.exit(1) new_rfcs = [] @@ -51,8 +51,7 @@ for changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_inde new_rfcs.append(doc) for c in changes: - syslog.syslog("%s: %s" % (doc.name, c)) - print "%s: %s" % (doc.name, c) + log("%s: %s" % (doc.name, c)) sys.exit(0) @@ -67,12 +66,12 @@ if newpid == 0: pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH)) for rfc in new_rfcs: rebuild_reference_relations(rfc) - syslog.syslog("Updated references for %s"%rfc.canonical_name()) + log("Updated references for %s"%rfc.canonical_name()) except: subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1]) msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2])) mail_admins(subject,msg,fail_silently=True) - syslog.syslog(subject) + log(subject) os._exit(0) else: sys.exit(0) diff --git a/ietf/bin/rfc-editor-queue-updates b/ietf/bin/rfc-editor-queue-updates index b545dae31..8b3707018 100755 --- a/ietf/bin/rfc-editor-queue-updates +++ b/ietf/bin/rfc-editor-queue-updates @@ -1,7 +1,6 @@ #!/usr/bin/env python -import os, sys, re, json, datetime -import syslog +import os, sys # boilerplate basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) @@ -12,28 +11,28 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py") if os.path.exists(virtualenv_activation): execfile(virtualenv_activation, dict(__file__=virtualenv_activation)) -syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER) - import django django.setup() from django.conf import settings -from ietf.sync.rfceditor import * -syslog.syslog("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL) +from ietf.sync.rfceditor import fetch_queue_xml, parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue +from ietf.utils.log import log + +log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL) response = fetch_queue_xml(settings.RFC_EDITOR_QUEUE_URL) drafts, warnings = parse_queue(response) for w in warnings: - syslog.syslog(u"WARNING: %s" % w) + log(u"Warning: %s" % w) if len(drafts) < MIN_QUEUE_RESULTS: - syslog.syslog("Not enough results, only %s" % len(drafts)) + log("Not enough results, only %s" % len(drafts)) sys.exit(1) changed, warnings = update_drafts_from_queue(drafts) for w in warnings: - syslog.syslog(u"WARNING: %s" % w) + log(u"Warning: %s" % w) for c in changed: - syslog.syslog(u"Updated %s" % c) + log(u"Updated %s" % c) diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py index 8e29c669f..7389d946c 100644 --- a/ietf/sync/rfceditor.py +++ b/ietf/sync/rfceditor.py @@ -8,6 +8,8 @@ from xml.dom import pulldom, Node from django.conf import settings +import debug # pyflakes:ignore + from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName, DocTagName, DocTypeName, RelatedDocument ) from ietf.doc.expire import move_draft_files_to_archive @@ -45,68 +47,73 @@ def parse_queue(response): stream = None for event, node in events: - if event == pulldom.START_ELEMENT and node.tagName == "entry": - events.expandNode(node) - node.normalize() - draft_name = get_child_text(node, "draft").strip() - draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name) - date_received = get_child_text(node, "date-received") + try: + if event == pulldom.START_ELEMENT and node.tagName == "entry": + events.expandNode(node) + node.normalize() + draft_name = get_child_text(node, "draft").strip() + draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name) + date_received = get_child_text(node, "date-received") + + state = "" + tags = [] + missref_generation = "" + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE and child.localName == "state": + state = child.firstChild.data + # state has some extra annotations encoded, parse + # them out + if '*R' in state: + tags.append("ref") + state = state.replace("*R", "") + if '*A' in state: + tags.append("iana") + state = state.replace("*A", "") + m = re.search(r"\(([0-9]+)G\)", state) + if m: + missref_generation = m.group(1) + state = state.replace("(%sG)" % missref_generation, "") + + # AUTH48 link + auth48 = "" + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url": + auth48 = child.firstChild.data + + # cluster link (if it ever gets implemented) + cluster = "" + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url": + cluster = child.firstChild.data + + refs = [] + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef": + ref_name = get_child_text(child, "ref-name") + ref_state = get_child_text(child, "ref-state") + in_queue = ref_state.startswith("IN-QUEUE") + refs.append((ref_name, ref_state, in_queue)) + + drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs)) + + elif event == pulldom.START_ELEMENT and node.tagName == "section": + name = node.getAttribute('name') + if name.startswith("IETF"): + stream = "ietf" + elif name.startswith("IAB"): + stream = "iab" + elif name.startswith("IRTF"): + stream = "irtf" + elif name.startswith("INDEPENDENT"): + stream = "ise" + else: + stream = None + warnings.append("unrecognized section " + name) + except Exception as e: + log("Exception when processing an RFC queue entry: %s" % e) + log("node: %s" % node) + raise - state = "" - tags = [] - missref_generation = "" - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE and child.localName == "state": - state = child.firstChild.data - # state has some extra annotations encoded, parse - # them out - if '*R' in state: - tags.append("ref") - state = state.replace("*R", "") - if '*A' in state: - tags.append("iana") - state = state.replace("*A", "") - m = re.search(r"\(([0-9]+)G\)", state) - if m: - missref_generation = m.group(1) - state = state.replace("(%sG)" % missref_generation, "") - - # AUTH48 link - auth48 = "" - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url": - auth48 = child.firstChild.data - - # cluster link (if it ever gets implemented) - cluster = "" - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url": - cluster = child.firstChild.data - - refs = [] - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef": - ref_name = get_child_text(child, "ref-name") - ref_state = get_child_text(child, "ref-state") - in_queue = ref_state.startswith("IN-QUEUE") - refs.append((ref_name, ref_state, in_queue)) - - drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs)) - - elif event == pulldom.START_ELEMENT and node.tagName == "section": - name = node.getAttribute('name') - if name.startswith("IETF"): - stream = "ietf" - elif name.startswith("IAB"): - stream = "iab" - elif name.startswith("IRTF"): - stream = "irtf" - elif name.startswith("INDEPENDENT"): - stream = "ise" - else: - stream = None - warnings.append("unrecognized section " + name) - return drafts, warnings def update_drafts_from_queue(drafts): @@ -243,67 +250,71 @@ def parse_index(response): data = [] events = pulldom.parse(response) for event, node in events: - if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]: - events.expandNode(node) - node.normalize() - bcpid = normalize_std_name(get_child_text(node, "doc-id")) - doclist = extract_doc_list(node, "is-also") - for docid in doclist: - if docid in also_list: - also_list[docid].append(bcpid) + try: + if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]: + events.expandNode(node) + node.normalize() + bcpid = normalize_std_name(get_child_text(node, "doc-id")) + doclist = extract_doc_list(node, "is-also") + for docid in doclist: + if docid in also_list: + also_list[docid].append(bcpid) + else: + also_list[docid] = [bcpid] + + elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry": + events.expandNode(node) + node.normalize() + rfc_number = int(get_child_text(node, "doc-id")[3:]) + title = get_child_text(node, "title") + + authors = [] + for author in node.getElementsByTagName("author"): + authors.append(get_child_text(author, "name")) + + d = node.getElementsByTagName("date")[0] + year = int(get_child_text(d, "year")) + month = get_child_text(d, "month") + month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1 + rfc_published_date = datetime.date(year, month, 1) + + current_status = get_child_text(node, "current-status").title() + + updates = extract_doc_list(node, "updates") + updated_by = extract_doc_list(node, "updated-by") + obsoletes = extract_doc_list(node, "obsoletes") + obsoleted_by = extract_doc_list(node, "obsoleted-by") + stream = get_child_text(node, "stream") + wg = get_child_text(node, "wg_acronym") + if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15): + wg = None + + l = [] + pages = "" + for fmt in node.getElementsByTagName("format"): + l.append(get_child_text(fmt, "file-format")) + if get_child_text(fmt, "file-format") == "ASCII": + pages = get_child_text(fmt, "page-count") + file_formats = (",".join(l)).lower() + + abstract = "" + for abstract in node.getElementsByTagName("abstract"): + abstract = get_child_text(abstract, "p") + + draft = get_child_text(node, "draft") + if draft and re.search("-\d\d$", draft): + draft = draft[0:-3] + + if len(node.getElementsByTagName("errata-url")) > 0: + has_errata = 1 else: - also_list[docid] = [bcpid] - - elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry": - events.expandNode(node) - node.normalize() - rfc_number = int(get_child_text(node, "doc-id")[3:]) - title = get_child_text(node, "title") - - authors = [] - for author in node.getElementsByTagName("author"): - authors.append(get_child_text(author, "name")) - - d = node.getElementsByTagName("date")[0] - year = int(get_child_text(d, "year")) - month = get_child_text(d, "month") - month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1 - rfc_published_date = datetime.date(year, month, 1) - - current_status = get_child_text(node, "current-status").title() - - updates = extract_doc_list(node, "updates") - updated_by = extract_doc_list(node, "updated-by") - obsoletes = extract_doc_list(node, "obsoletes") - obsoleted_by = extract_doc_list(node, "obsoleted-by") - stream = get_child_text(node, "stream") - wg = get_child_text(node, "wg_acronym") - if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15): - wg = None - - l = [] - pages = "" - for fmt in node.getElementsByTagName("format"): - l.append(get_child_text(fmt, "file-format")) - if get_child_text(fmt, "file-format") == "ASCII": - pages = get_child_text(fmt, "page-count") - file_formats = (",".join(l)).lower() - - abstract = "" - for abstract in node.getElementsByTagName("abstract"): - abstract = get_child_text(abstract, "p") - - draft = get_child_text(node, "draft") - if draft and re.search("-\d\d$", draft): - draft = draft[0:-3] - - if len(node.getElementsByTagName("errata-url")) > 0: - has_errata = 1 - else: - has_errata = 0 - - data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract)) + has_errata = 0 + data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract)) + except Exception as e: + log("Exception when processing an RFC index entry: %s" % e) + log("node: %s" % node) + raise for d in data: k = "RFC%04d" % d[0] if k in also_list: