Add sync scripts for reading from IANA changes API, reading from the

protocols page (to see when references to newly published RFCs have
been updated) and parsing IANA review emails to be included as
comments
 - Legacy-Id: 4850
This commit is contained in:
Ole Laursen 2012-09-17 15:54:22 +00:00
parent 9fdcbc38ab
commit 5282bd1d07
4 changed files with 388 additions and 0 deletions

67
ietf/bin/iana-changes-updates Executable file
View file

@ -0,0 +1,67 @@
#!/usr/bin/env python
import os, sys, re, json, datetime, optparse
import syslog
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
from ietf import settings
from django.core import management
management.setup_environ(settings)
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-f", "--from", dest="start",
help="Start time, defaults to a little less than 23 hours ago", metavar="YYYY-MM-DD HH:MM:SS")
parser.add_option("-t", "--to", dest="end",
help="End time, defaults to 23 hours later than from", metavar="YYYY-MM-DD HH:MM:SS")
parser.add_option("", "--no-email", dest="send_email", default=True, action="store_false",
help="Skip sending emails")
options, args = parser.parse_args()
# compensate to avoid we ask for something that happened now and then
# don't get it back because our request interval is slightly off
CLOCK_SKEW_COMPENSATION = 5 # seconds
# actually the interface accepts 24 hours, but then we get into
# trouble with daylights savings - meh
MAX_INTERVAL_ACCEPTED_BY_IANA = datetime.timedelta(hours=23)
start = datetime.datetime.now() - datetime.timedelta(hours=23) + CLOCK_SKEW_COMPENSATION
if options.start:
start = datetime.datetime.strptime(options.start, "%Y-%m-%d %H:%M:%S")
end = start + datetime.timedelta(hours=23)
if options.end:
end = datetime.datetime.strptime(options.end, "%Y-%m-%d %H:%M:%S")
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
from ietf.sync.iana import *
syslog.syslog("Updating history log with new changes from IANA from %s, period %s - %s" % (CHANGES_URL, start, end))
t = start
while t < end:
# the IANA server doesn't allow us to fetch more than a certain
# period, so loop over the requested period and make multiple
# requests if necessary
text = fetch_changes_json(CHANGES_URL, t, min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA))
changes = parse_changes_json(text)
added_events, warnings = update_history_with_changes(changes, send_email=options.send_email)
for e in added_events:
syslog.syslog("Added event for %s %s: %s" % (e.doc_id, e.time, e.desc))
for w in warnings:
syslog.syslog("WARNING: %s" % w)
t += MAX_INTERVAL_ACCEPTED_BY_IANA

34
ietf/bin/iana-protocols-updates Executable file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env python
import os, sys, re, json, datetime
import syslog
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
from ietf import settings
from django.core import management
management.setup_environ(settings)
from ietf.sync.iana import *
def chunks(l, n):
"""Split list l up in chunks of max size n."""
return (l[i:i+n] for i in xrange(0, len(l), n))
syslog.syslog("Updating history log with new RFC entries from IANA protocols page %s" % PROTOCOLS_URL)
# FIXME: this needs to be the date where this tool is first deployed
rfc_must_published_later_than = datetime.datetime(2012, 8, 30, 0, 0, 0)
text = fetch_protocol_page(PROTOCOLS_URL)
rfc_numbers = parse_protocol_page(text)
for chunk in chunks(rfc_numbers, 100):
updated = update_rfc_log_from_protocol_page(chunk, rfc_must_published_later_than)
for d in updated:
syslog.syslog("Added history entry for %s" % d.display_name())

27
ietf/bin/iana-review-email Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
import os, sys, re, json, datetime, optparse
import syslog
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path = [ basedir ] + sys.path
from ietf import settings
from django.core import management
management.setup_environ(settings)
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
from ietf.sync.iana import *
msg = sys.stdin.read()
syslog.syslog("Reading IANA review email")
doc_name, review_time, by, comment = parse_review_email(msg)
add_review_comment(doc_name, review_time, by, comment)
if by.name == "(System)":
syslog.syslog("WARNING: person responsible for email does not have a IANA role")

260
ietf/sync/iana.py Normal file
View file

@ -0,0 +1,260 @@
import re, urllib2, json, email
from django.utils.http import urlquote
from ietf.doc.models import *
from ietf.doc.utils import add_state_change_event
from ietf.person.models import *
from ietf.idrfc.mails import email_owner, email_state_changed, email_authors
from ietf.utils.timezone import *
PROTOCOLS_URL = "http://www.iana.org/protocols/"
CHANGES_URL = "http://datatracker.dev.icann.org:8080/data-tracker/changes"
def fetch_protocol_page(url):
f = urllib2.urlopen(PROTOCOLS_URL)
text = f.read()
f.close()
return text
def parse_protocol_page(text):
"""Parse IANA protocols page to extract referenced RFCs (as
rfcXXXX document names)."""
matches = re.findall('RFC [0-9]+', text)
res = set()
for m in matches:
res.add("rfc" + m[len("RFC "):])
return list(res)
def update_rfc_log_from_protocol_page(rfc_names, rfc_must_published_later_than):
"""Add notices to RFC history log that IANA is now referencing the RFC."""
system = Person.objects.get(name="(System)")
updated = []
docs = Document.objects.filter(docalias__name__in=rfc_names).exclude(
docevent__type="rfc_in_iana_registry").filter(
# only take those that were published after cutoff since we
# have a big bunch of old RFCs that we unfortunately don't have data for
docevent__type="published_rfc", docevent__time__gte=rfc_must_published_later_than
).distinct()
for d in docs:
e = DocEvent(doc=d)
e.by = system
e.type = "rfc_in_iana_registry"
e.desc = "IANA registries were updated to include %s" % d.display_name()
e.save()
updated.append(d)
return updated
def fetch_changes_json(url, start, end):
url += "?start=%s&end=%s" % (urlquote(local_timezone_to_utc(start).strftime("%Y-%m-%d %H:%M:%S")),
urlquote(local_timezone_to_utc(end).strftime("%Y-%m-%d %H:%M:%S")))
f = urllib2.urlopen(url)
text = f.read()
f.close()
return text
def parse_changes_json(text):
response = json.loads(text)
if "error" in response:
raise Exception("IANA server returned error: %s" % response["error"])
changes = response["changes"]
# do some rudimentary validation
for i in changes:
for f in ['doc', 'type', 'time']:
if f not in i:
raise Exception('Error in response: Field %s missing in input: %s - %s' % (f, json.dumps(i), json.dumps(changes)))
# a little bit of cleaning
i["doc"] = i["doc"].strip()
if i["doc"].startswith("http://www.ietf.org/internet-drafts/"):
i["doc"] = i["doc"][len("http://www.ietf.org/internet-drafts/"):]
# make sure we process oldest entries first
changes.sort(key=lambda c: c["time"])
return changes
def update_history_with_changes(changes, send_email=True):
"""Take parsed changes from IANA and apply them. Note that we
expect to get these in chronologically sorted, otherwise the
change descriptions generated may not be right."""
# build up state lookup
states = {}
slookup = dict((s.slug, s)
for s in State.objects.filter(type=StateType.objects.get(slug="draft-iana-action")))
states["action"] = {
"": slookup["newdoc"],
"In Progress": slookup["inprog"],
"Open": slookup["inprog"],
"pre-approval In Progress": slookup["inprog"],
"Waiting on Authors": slookup["waitauth"],
"Author": slookup["waitauth"],
"Waiting on ADs": slookup["waitad"],
"Waiting on AD": slookup["waitad"],
"AD": slookup["waitad"],
"Waiting on WGC": slookup["waitwgc"],
"WGC": slookup["waitwgc"],
"Waiting on RFC-Editor": slookup["waitrfc"],
"Waiting on RFC Editor": slookup["waitrfc"],
"RFC-Editor": slookup["waitrfc"],
"RFC-Ed-ACK": slookup["rfcedack"],
"RFC-Editor-ACK": slookup["rfcedack"],
"Completed": slookup["rfcedack"],
"On Hold": slookup["onhold"],
"No IC": slookup["noic"],
}
slookup = dict((s.slug, s)
for s in State.objects.filter(type=StateType.objects.get(slug="draft-iana-review")))
states["review"] = {
"IANA Review Needed": slookup["need-rev"],
"IANA OK - Actions Needed": slookup["ok-act"],
"IANA OK - No Actions Needed": slookup["ok-noact"],
"IANA Not OK": slookup["not-ok"],
"Version Changed - Review Needed": slookup["changed"],
}
# so it turns out IANA has made a mistake and are including some
# wrong states, we'll have to skip those
wrong_action_states = ("Waiting on Reviewer", "Review Complete", "Last Call",
"Last Call - Questions", "Evaluation", "Evaluation - Questions",
"With Reviewer", "IESG Notification Received", "Watiing on Last Call",
"IANA Comments Submitted", "Waiting on Last Call")
system = Person.objects.get(name="(System)")
added_events = []
warnings = []
for c in changes:
docname = c['doc']
timestamp = datetime.datetime.strptime(c["time"], "%Y-%m-%d %H:%M:%S")
timestamp = utc_to_local_timezone(timestamp) # timestamps are in UTC
if c['type'] in ("iana_state", "iana_review"):
if c['type'] == "iana_state":
kind = "action"
if c["state"] in wrong_action_states:
warnings.append("Wrong action state '%s' encountered in changes from IANA" % c["state"])
continue
else:
kind = "review"
if c["state"] not in states[kind]:
warnings.append("Unknown IANA %s state %s (%s)" % (kind, c["state"], timestamp))
print "Unknown IANA %s state %s" % (kind, c["state"])
continue
state = states[kind][c["state"]]
state_type = "draft-iana-%s" % kind
e = StateDocEvent.objects.filter(type="changed_state", time=timestamp,
state_type=state_type, state=state)
if not e:
try:
doc = Document.objects.get(docalias__name=docname)
except Document.DoesNotExist:
warnings.append("Document %s not found" % docname)
continue
# the naive way of extracting prev_state here means
# that we assume these changes are cronologically
# applied
prev_state = doc.get_state(state_type)
e = add_state_change_event(doc, system, prev_state, state, timestamp)
added_events.append(e)
if not StateDocEvent.objects.filter(doc=doc, time__gt=timestamp, state_type=state_type):
save_document_in_history(doc)
doc.set_state(state)
if send_email:
email_state_changed(None, doc, "IANA %s state changed to %s" % (kind, state.name))
email_owner(None, doc, doc.ad, system, "IANA %s state changed to %s" % (kind, state.name))
if doc.time < timestamp:
doc.time = timestamp
doc.save()
return added_events, warnings
def parse_review_email(text):
msg = email.message_from_string(text)
# doc
doc_name = ""
m = re.search(r"<([^>]+)>", msg["Subject"])
if m:
doc_name = m.group(1).lower()
if re.search(r"\.\w{3}$", doc_name): # strip off extension
doc_name = doc_name[:-4]
if re.search(r"-\d{2}$", doc_name): # strip off revision
doc_name = doc_name[:-3]
# date
review_time = datetime.datetime.now()
if "Date" in msg:
review_time = email_time_to_local_timezone(msg["Date"])
# by
by = None
m = re.search(r"\"(.*)\"", msg["From"])
if m:
name = m.group(1).strip()
if name.endswith(" via RT"):
name = name[:-len(" via RT")]
try:
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
except Person.DoesNotExist:
pass
if not by:
by = Person.objects.get(name="(System)")
# comment
body = msg.get_payload().decode('quoted-printable').replace("\r", "")
b = body.find("(BEGIN IANA LAST CALL COMMENTS)")
e = body.find("(END IANA LAST CALL COMMENTS)")
comment = body[b + len("(BEGIN IANA LAST CALL COMMENTS)"):e].strip()
# strip leading IESG:
if comment.startswith("IESG:"):
comment = comment[len("IESG:"):].lstrip()
# strip ending Thanks, followed by signature
m = re.compile(r"^Thanks,\n\n", re.MULTILINE).search(comment)
if m:
comment = comment[:m.start()].rstrip()
return doc_name, review_time, by, comment
def add_review_comment(doc_name, review_time, by, comment):
try:
e = DocEvent.objects.get(doc__name=doc_name, time=review_time, type="iana_review")
except DocEvent.DoesNotExist:
doc = Document.objects.get(name=doc_name)
e = DocEvent(doc=doc, time=review_time, type="iana_review")
e.desc = comment
e.by = by
e.save()