datatracker/ietf/sync/iana.py
Ole Laursen e1f0917659 Summary: Add new document saving API, Document.save_with_history(events).
The new API requires at least one event and will automatically save a
snapshot of the document and related state. Document.save() will now
throw an exception if called directly, as the new API is intended to
ensure that documents are saved with both an appropriate snapsnot and
relevant history log, both of which are easily defeated by just
calling .save() directly.

To simplify things, the snapshot is generated after the changes to a
document have been made (in anticipation of coming changes), instead
of before as was usual.

While revising the existing code to work with this API, a couple of
missing events was discovered:

- In draft expiry, a "Document has expired" event was only generated
  in case an IESG process had started on the document - now it's
  always generated, as the document changes its state in any case

- Synchronization updates like title and abstract amendmends from the
  RFC Editor were silently (except for RFC publication) applied and
  not accompanied by a descriptive event - they now are

- do_replace in the Secretariat tools now adds an event

- Proceedings post_process in the Secretariat tools now adds an event

- do_withdraw in the Secretariat tools now adds an event

A migration is needed for snapshotting all documents, takes a while to
run. It turns out that a single document had a bad foreign key so the
migration fixes that too.
 - Legacy-Id: 10101
2015-09-28 14:01:03 +00:00

306 lines
11 KiB
Python

import base64
import datetime
import email
import json
import re
import urllib2
from django.utils.http import urlquote
from django.conf import settings
from ietf.doc.mails import email_ad, email_state_changed
from ietf.doc.models import Document, DocEvent, State, StateDocEvent, StateType
from ietf.doc.utils import add_state_change_event
from ietf.person.models import Person
from ietf.utils.timezone import local_timezone_to_utc, email_time_to_local_timezone, utc_to_local_timezone
#PROTOCOLS_URL = "https://www.iana.org/protocols/"
#CHANGES_URL = "https://datatracker.dev.icann.org:8080/data-tracker/changes"
def fetch_protocol_page(url):
f = urllib2.urlopen(settings.IANA_SYNC_PROTOCOLS_URL)
text = f.read()
f.close()
return text
def parse_protocol_page(text):
"""Parse IANA protocols page to extract referenced RFCs (as
rfcXXXX document names)."""
matches = re.findall('RFC [0-9]+', text)
res = set()
for m in matches:
res.add("rfc" + m[len("RFC "):])
return list(res)
def update_rfc_log_from_protocol_page(rfc_names, rfc_must_published_later_than):
"""Add notices to RFC history log that IANA is now referencing the RFC."""
system = Person.objects.get(name="(System)")
updated = []
docs = Document.objects.filter(docalias__name__in=rfc_names).exclude(
docevent__type="rfc_in_iana_registry").filter(
# only take those that were published after cutoff since we
# have a big bunch of old RFCs that we unfortunately don't have data for
docevent__type="published_rfc", docevent__time__gte=rfc_must_published_later_than
).distinct()
for d in docs:
e = DocEvent(doc=d)
e.by = system
e.type = "rfc_in_iana_registry"
e.desc = "IANA registries were updated to include %s" % d.display_name()
e.save()
updated.append(d)
return updated
def fetch_changes_json(url, start, end):
url += "?start=%s&end=%s" % (urlquote(local_timezone_to_utc(start).strftime("%Y-%m-%d %H:%M:%S")),
urlquote(local_timezone_to_utc(end).strftime("%Y-%m-%d %H:%M:%S")))
request = urllib2.Request(url)
# HTTP basic auth
username = "ietfsync"
password = settings.IANA_SYNC_PASSWORD
request.add_header("Authorization", "Basic %s" % base64.encodestring("%s:%s" % (username, password)).replace("\n", ""))
f = urllib2.urlopen(request)
text = f.read()
f.close()
return text
def parse_changes_json(text):
response = json.loads(text)
if "error" in response:
raise Exception("IANA server returned error: %s" % response["error"])
changes = response["changes"]
# do some rudimentary validation
for i in changes:
for f in ['doc', 'type', 'time']:
if f not in i:
raise Exception('Error in response: Field %s missing in input: %s - %s' % (f, json.dumps(i), json.dumps(changes)))
# a little bit of cleaning
i["doc"] = i["doc"].strip()
if i["doc"].startswith("https://www.ietf.org/internet-drafts/"):
i["doc"] = i["doc"][len("https://www.ietf.org/internet-drafts/"):]
# make sure we process oldest entries first
changes.sort(key=lambda c: c["time"])
return changes
def update_history_with_changes(changes, send_email=True):
"""Take parsed changes from IANA and apply them. Note that we
expect to get these chronologically sorted, otherwise the change
descriptions generated may not be right."""
# build up state lookup
states = {}
slookup = dict((s.slug, s)
for s in State.objects.filter(used=True, type=StateType.objects.get(slug="draft-iana-action")))
states["action"] = {
"": slookup["newdoc"],
"In Progress": slookup["inprog"],
"Open": slookup["inprog"],
"pre-approval In Progress": slookup["inprog"],
"Waiting on Authors": slookup["waitauth"],
"Author": slookup["waitauth"],
"Waiting on ADs": slookup["waitad"],
"Waiting on AD": slookup["waitad"],
"AD": slookup["waitad"],
"Waiting on WGC": slookup["waitwgc"],
"WGC": slookup["waitwgc"],
"Waiting on RFC-Editor": slookup["waitrfc"],
"Waiting on RFC Editor": slookup["waitrfc"],
"RFC-Editor": slookup["waitrfc"],
"RFC-Ed-ACK": slookup["rfcedack"],
"RFC-Editor-ACK": slookup["rfcedack"],
"Completed": slookup["rfcedack"],
"On Hold": slookup["onhold"],
"No IC": slookup["noic"],
}
slookup = dict((s.slug, s)
for s in State.objects.filter(used=True, type=StateType.objects.get(slug="draft-iana-review")))
states["review"] = {
"IANA Review Needed": slookup["need-rev"],
"IANA - Review Needed": slookup["need-rev"],
"IANA OK - Actions Needed": slookup["ok-act"],
"IANA OK - No Actions Needed": slookup["ok-noact"],
"IANA Not OK": slookup["not-ok"],
"IANA - Not OK": slookup["not-ok"],
"Version Changed - Review Needed": slookup["changed"],
}
# so it turns out IANA has made a mistake and are including some
# wrong states, we'll have to skip those
wrong_action_states = ("Waiting on Reviewer", "Review Complete", "Last Call",
"Last Call - Questions", "Evaluation", "Evaluation - Questions",
"With Reviewer", "IESG Notification Received", "Watiing on Last Call",
"IANA Comments Submitted", "Waiting on Last Call")
system = Person.objects.get(name="(System)")
added_events = []
warnings = []
for c in changes:
docname = c['doc']
timestamp = datetime.datetime.strptime(c["time"], "%Y-%m-%d %H:%M:%S")
timestamp = utc_to_local_timezone(timestamp) # timestamps are in UTC
if c['type'] in ("iana_state", "iana_review"):
if c['type'] == "iana_state":
kind = "action"
if c["state"] in wrong_action_states:
warnings.append("Wrong action state '%s' encountered in changes from IANA" % c["state"])
continue
else:
kind = "review"
if c["state"] not in states[kind]:
warnings.append("Unknown IANA %s state %s (%s)" % (kind, c["state"], timestamp))
continue
state = states[kind][c["state"]]
state_type = "draft-iana-%s" % kind
if state.slug in ("need-rev", "changed"):
# the Datatracker is the ultimate source of these
# states, so skip them
continue
e = StateDocEvent.objects.filter(type="changed_state", time=timestamp,
state_type=state_type, state=state)
if not e:
try:
doc = Document.objects.get(docalias__name=docname)
except Document.DoesNotExist:
warnings.append("Document %s not found" % docname)
continue
# the naive way of extracting prev_state here means
# that we assume these changes are cronologically
# applied
prev_state = doc.get_state(state_type)
e = add_state_change_event(doc, system, prev_state, state, timestamp=timestamp)
if e:
# for logging purposes
e.json = c
added_events.append(e)
if not StateDocEvent.objects.filter(doc=doc, time__gt=timestamp, state_type=state_type):
doc.set_state(state)
if e:
doc.save_with_history([e])
if send_email and (state != prev_state):
email_state_changed(None, doc, "IANA %s state changed to %s" % (kind, state.name))
email_ad(None, doc, doc.ad, system, "IANA %s state changed to %s" % (kind, state.name))
return added_events, warnings
def find_document_name(text):
prefixes = ['draft','conflict-review','status-change','charter']
leading_delimiter_re = '(?<![-a-zA-Z0-9])'
prefix_re = '(%s)' % '|'.join(prefixes)
tail_re = '(-[a-z0-9]+)+?(-\d\d\.txt)?'
trailing_delimiter_re = '((?![-a-zA-Z0-9])|$)'
name_re = '%s(%s%s)%s' % (leading_delimiter_re, prefix_re, tail_re, trailing_delimiter_re)
m = re.search(name_re,text)
return m and m.group(0).lower()
def strip_version_extension(text):
if re.search(r"\.\w{3}$", text): # strip off extension
text = text[:-4]
if re.search(r"-\d{2}$", text): # strip off revision
text = text[:-3]
return text
def parse_review_email(text):
msg = email.message_from_string(text)
# doc
doc_name = find_document_name(msg["Subject"]) or ""
doc_name = strip_version_extension(doc_name)
# date
review_time = datetime.datetime.now()
if "Date" in msg:
review_time = email_time_to_local_timezone(msg["Date"])
# by
by = None
m = re.search(r"\"(.*)\"", msg["From"])
if m:
name = m.group(1).strip()
if name.endswith(" via RT"):
name = name[:-len(" via RT")]
try:
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
except Person.DoesNotExist:
pass
if not by:
by = Person.objects.get(name="(System)")
# comment
body = msg.get_payload().decode('quoted-printable').replace("\r", "")
begin_search = re.search('\(BEGIN\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?(\s*:\s*[a-zA-Z0-9-\.]*)?\s*\)',body)
end_search = re.search('\(END\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?\)',body)
if begin_search and end_search:
begin_string = begin_search.group(0)
end_string = end_search.group(0)
b = body.find(begin_string)
e = body.find(end_string)
comment = body[b + len(begin_string):e].strip()
embedded_name = strip_version_extension(find_document_name(begin_string) or "")
if embedded_name:
doc_name = embedded_name
else:
comment = ""
# strip leading IESG:
if comment.startswith("IESG:"):
comment = comment[len("IESG:"):].lstrip()
# strip ending Thanks, followed by signature
m = re.compile(r"^Thanks,\n\n", re.MULTILINE).search(comment)
if m:
comment = comment[:m.start()].rstrip()
m = re.search(r"<(.*)>", msg["From"])
if m:
comment = '(Via %s): %s' % ( m.group(1).strip() , comment )
return doc_name, review_time, by, comment
def add_review_comment(doc_name, review_time, by, comment):
if comment:
try:
e = DocEvent.objects.get(doc__name=doc_name, time=review_time, type="iana_review")
except DocEvent.DoesNotExist:
doc = Document.objects.get(name=doc_name)
e = DocEvent(doc=doc, time=review_time, type="iana_review")
e.desc = comment
e.by = by
e.save()