# Copyright The IETF Trust 2012-2020, All Rights Reserved # -*- coding: utf-8 -*- import base64 import datetime import re import requests from urllib.parse import urlencode from xml.dom import pulldom, Node from django.conf import settings from django.utils import timezone from django.utils.encoding import smart_bytes, force_str import debug # pyflakes:ignore from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName, DocTagName, DocTypeName, RelatedDocument ) from ietf.doc.expire import move_draft_files_to_archive from ietf.doc.utils import add_state_change_event, prettify_std_name, update_action_holders from ietf.group.models import Group from ietf.name.models import StdLevelName, StreamName from ietf.person.models import Person from ietf.utils.log import log from ietf.utils.mail import send_mail_text from ietf.utils.timezone import datetime_from_date, RPC_TZINFO #QUEUE_URL = "https://www.rfc-editor.org/queue2.xml" #INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml" #POST_APPROVED_DRAFT_URL = "https://www.rfc-editor.org/sdev/jsonexp/jsonparser.php" MIN_ERRATA_RESULTS = 5000 MIN_INDEX_RESULTS = 8000 MIN_QUEUE_RESULTS = 10 def get_child_text(parent_node, tag_name): text = [] for node in parent_node.childNodes: if node.nodeType == Node.ELEMENT_NODE and node.localName == tag_name: text.append(node.firstChild.data) return '\n\n'.join(text) def parse_queue(response): """Parse RFC Editor queue XML into a bunch of tuples + warnings.""" events = pulldom.parse(response) drafts = [] warnings = [] stream = None for event, node in events: try: if event == pulldom.START_ELEMENT and node.tagName == "entry": events.expandNode(node) node.normalize() draft_name = get_child_text(node, "draft").strip() draft_name = re.sub(r"(-\d\d)?(.txt){1,2}$", "", draft_name) date_received = get_child_text(node, "date-received") state = "" tags = [] missref_generation = "" for child in node.childNodes: if child.nodeType == Node.ELEMENT_NODE and child.localName == "state": state = child.firstChild.data # state has some extra annotations encoded, parse # them out if '*R' in state: tags.append("ref") state = state.replace("*R", "") if '*A' in state: tags.append("iana") state = state.replace("*A", "") m = re.search(r"\(([0-9]+)G\)", state) if m: missref_generation = m.group(1) state = state.replace("(%sG)" % missref_generation, "") # AUTH48 link auth48 = "" for child in node.childNodes: if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url": auth48 = child.firstChild.data # cluster link (if it ever gets implemented) cluster = "" for child in node.childNodes: if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url": cluster = child.firstChild.data refs = [] for child in node.childNodes: if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef": ref_name = get_child_text(child, "ref-name") ref_state = get_child_text(child, "ref-state") in_queue = ref_state.startswith("IN-QUEUE") refs.append((ref_name, ref_state, in_queue)) drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs)) elif event == pulldom.START_ELEMENT and node.tagName == "section": name = node.getAttribute('name') if name.startswith("IETF"): stream = "ietf" elif name.startswith("IAB"): stream = "iab" elif name.startswith("IRTF"): stream = "irtf" elif name.startswith("INDEPENDENT"): stream = "ise" else: stream = None warnings.append("unrecognized section " + name) except Exception as e: log("Exception when processing an RFC queue entry: %s" % e) log("node: %s" % node) raise return drafts, warnings def update_drafts_from_queue(drafts): """Given a list of parsed drafts from the RFC Editor queue, update the documents in the database. Return those that were changed.""" tag_mapping = { 'IANA': DocTagName.objects.get(slug='iana'), 'REF': DocTagName.objects.get(slug='ref') } slookup = dict((s.slug, s) for s in State.objects.filter(used=True, type=StateType.objects.get(slug="draft-rfceditor"))) state_mapping = { 'AUTH': slookup['auth'], 'AUTH48': slookup['auth48'], 'AUTH48-DONE': slookup['auth48-done'], 'EDIT': slookup['edit'], 'IANA': slookup['iana'], 'IESG': slookup['iesg'], 'ISR': slookup['isr'], 'ISR-AUTH': slookup['isr-auth'], 'REF': slookup['ref'], 'RFC-EDITOR': slookup['rfc-edit'], 'TI': slookup['tooling-issue'], 'TO': slookup['timeout'], 'MISSREF': slookup['missref'], } system = Person.objects.get(name="(System)") warnings = [] names = [t[0] for t in drafts] drafts_in_db = dict((d.name, d) for d in Document.objects.filter(type="draft", docalias__name__in=names)) changed = set() for name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs in drafts: if name not in drafts_in_db: warnings.append("unknown document %s" % name) continue if not state or state not in state_mapping: warnings.append("unknown state '%s' for %s" % (state, name)) continue d = drafts_in_db[name] prev_state = d.get_state("draft-rfceditor") next_state = state_mapping[state] events = [] # check if we've noted it's been received if d.get_state_slug("draft-iesg") == "ann" and not prev_state and not d.latest_event(DocEvent, type="rfc_editor_received_announcement"): e = DocEvent(doc=d, rev=d.rev, by=system, type="rfc_editor_received_announcement") e.desc = "Announcement was received by RFC Editor" e.save() send_mail_text(None, "iesg-secretary@ietf.org", None, '%s in RFC Editor queue' % d.name, 'The announcement for %s has been received by the RFC Editor.' % d.name) # change draft-iesg state to RFC Ed Queue prev_iesg_state = State.objects.get(used=True, type="draft-iesg", slug="ann") next_iesg_state = State.objects.get(used=True, type="draft-iesg", slug="rfcqueue") d.set_state(next_iesg_state) e = add_state_change_event(d, system, prev_iesg_state, next_iesg_state) if e: events.append(e) e = update_action_holders(d, prev_iesg_state, next_iesg_state) if e: events.append(e) changed.add(name) # check draft-rfceditor state if prev_state != next_state: d.set_state(next_state) e = add_state_change_event(d, system, prev_state, next_state) if auth48: e.desc = re.sub(r"(.*)", "\\1" % auth48, e.desc) e.save() # Create or update the auth48 URL whether or not this is a state expected to have one. d.documenturl_set.update_or_create( tag_id='auth48', # look up existing based on this field defaults=dict(url=auth48) # create or update with this field ) else: # Remove any existing auth48 URL when an update does not have one. d.documenturl_set.filter(tag_id='auth48').delete() if e: events.append(e) changed.add(name) t = DocTagName.objects.filter(slug__in=tags) if set(t) != set(d.tags.all()): d.tags.clear() d.tags.set(t) changed.add(name) if events: d.save_with_history(events) # remove tags and states for those not in the queue anymore for d in Document.objects.exclude(docalias__name__in=names).filter(states__type="draft-rfceditor").distinct(): d.tags.remove(*list(tag_mapping.values())) d.unset_state("draft-rfceditor") # we do not add a history entry here - most likely we already # have something that explains what happened changed.add(name) return changed, warnings def parse_index(response): """Parse RFC Editor index XML into a bunch of tuples.""" def normalize_std_name(std_name): # remove zero padding prefix = std_name[:3] if prefix in ("RFC", "FYI", "BCP", "STD"): try: return prefix + str(int(std_name[3:])) except ValueError: pass return std_name def extract_doc_list(parentNode, tagName): l = [] for u in parentNode.getElementsByTagName(tagName): for d in u.getElementsByTagName("doc-id"): l.append(normalize_std_name(d.firstChild.data)) return l also_list = {} data = [] events = pulldom.parse(response) for event, node in events: try: if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]: events.expandNode(node) node.normalize() bcpid = normalize_std_name(get_child_text(node, "doc-id")) doclist = extract_doc_list(node, "is-also") for docid in doclist: if docid in also_list: also_list[docid].append(bcpid) else: also_list[docid] = [bcpid] elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry": events.expandNode(node) node.normalize() rfc_number = int(get_child_text(node, "doc-id")[3:]) title = get_child_text(node, "title") authors = [] for author in node.getElementsByTagName("author"): authors.append(get_child_text(author, "name")) d = node.getElementsByTagName("date")[0] year = int(get_child_text(d, "year")) month = get_child_text(d, "month") month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1 rfc_published_date = datetime.date(year, month, 1) current_status = get_child_text(node, "current-status").title() updates = extract_doc_list(node, "updates") updated_by = extract_doc_list(node, "updated-by") obsoletes = extract_doc_list(node, "obsoletes") obsoleted_by = extract_doc_list(node, "obsoleted-by") pages = get_child_text(node, "page-count") stream = get_child_text(node, "stream") wg = get_child_text(node, "wg_acronym") if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15): wg = None l = [] for fmt in node.getElementsByTagName("format"): l.append(get_child_text(fmt, "file-format")) file_formats = (",".join(l)).lower() abstract = "" for abstract in node.getElementsByTagName("abstract"): abstract = get_child_text(abstract, "p") draft = get_child_text(node, "draft") if draft and re.search(r"-\d\d$", draft): draft = draft[0:-3] if len(node.getElementsByTagName("errata-url")) > 0: has_errata = 1 else: has_errata = 0 data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract)) except Exception as e: log("Exception when processing an RFC index entry: %s" % e) log("node: %s" % node) raise for d in data: k = "RFC%04d" % d[0] if k in also_list: d[9].extend(also_list[k]) return data def update_docs_from_rfc_index(index_data, errata_data, skip_older_than_date=None): """Given parsed data from the RFC Editor index, update the documents in the database Yields a list of change descriptions for each document, if any. The skip_older_than_date is a bare date, not a datetime. """ errata = {} for item in errata_data: name = item['doc-id'] if not name in errata: errata[name] = [] errata[name].append(item) std_level_mapping = { "Standard": StdLevelName.objects.get(slug="std"), "Internet Standard": StdLevelName.objects.get(slug="std"), "Draft Standard": StdLevelName.objects.get(slug="ds"), "Proposed Standard": StdLevelName.objects.get(slug="ps"), "Informational": StdLevelName.objects.get(slug="inf"), "Experimental": StdLevelName.objects.get(slug="exp"), "Best Current Practice": StdLevelName.objects.get(slug="bcp"), "Historic": StdLevelName.objects.get(slug="hist"), "Unknown": StdLevelName.objects.get(slug="unkn"), } stream_mapping = { "IETF": StreamName.objects.get(slug="ietf"), "INDEPENDENT": StreamName.objects.get(slug="ise"), "IRTF": StreamName.objects.get(slug="irtf"), "IAB": StreamName.objects.get(slug="iab"), "Legacy": StreamName.objects.get(slug="legacy"), } tag_has_errata = DocTagName.objects.get(slug='errata') tag_has_verified_errata = DocTagName.objects.get(slug='verified-errata') relationship_obsoletes = DocRelationshipName.objects.get(slug="obs") relationship_updates = DocRelationshipName.objects.get(slug="updates") system = Person.objects.get(name="(System)") for rfc_number, title, authors, rfc_published_date, current_status, updates, updated_by, obsoletes, obsoleted_by, also, draft, has_errata, stream, wg, file_formats, pages, abstract in index_data: if skip_older_than_date and rfc_published_date < skip_older_than_date: # speed up the process by skipping old entries continue # we assume two things can happen: we get a new RFC, or an # attribute has been updated at the RFC Editor (RFC Editor # attributes take precedence over our local attributes) events = [] changes = [] rfc_published = False # make sure we got the document and alias doc = None name = "rfc%s" % rfc_number a = DocAlias.objects.filter(name=name) if a: doc = a[0].document else: if draft: try: doc = Document.objects.get(name=draft) except Document.DoesNotExist: pass if not doc: changes.append("created document %s" % prettify_std_name(name)) doc = Document.objects.create(name=name, type=DocTypeName.objects.get(slug="draft")) # add alias alias, __ = DocAlias.objects.get_or_create(name=name) alias.docs.add(doc) changes.append("created alias %s" % prettify_std_name(name)) # check attributes if title != doc.title: doc.title = title changes.append("changed title to '%s'" % doc.title) if abstract and abstract != doc.abstract: doc.abstract = abstract changes.append("changed abstract to '%s'" % doc.abstract) if pages and int(pages) != doc.pages: doc.pages = int(pages) changes.append("changed pages to %s" % doc.pages) if std_level_mapping[current_status] != doc.std_level: doc.std_level = std_level_mapping[current_status] changes.append("changed standardization level to %s" % doc.std_level) if doc.get_state_slug() != "rfc": doc.set_state(State.objects.get(used=True, type="draft", slug="rfc")) move_draft_files_to_archive(doc, doc.rev) changes.append("changed state to %s" % doc.get_state()) if doc.stream != stream_mapping[stream]: doc.stream = stream_mapping[stream] changes.append("changed stream to %s" % doc.stream) if not doc.group: # if we have no group assigned, check if RFC Editor has a suggestion if wg: doc.group = Group.objects.get(acronym=wg) changes.append("set group to %s" % doc.group) else: doc.group = Group.objects.get(type="individ") # fallback for newly created doc if not doc.latest_event(type="published_rfc"): e = DocEvent(doc=doc, rev=doc.rev, type="published_rfc") # unfortunately, rfc_published_date doesn't include the correct day # at the moment because the data only has month/year, so # try to deduce it # # Note: This is in done PST8PDT to preserve compatibility with events created when # USE_TZ was False. The published_rfc event was created with a timestamp whose # server-local datetime (PST8PDT) matched the publication date from the RFC index. # When switching to USE_TZ=True, the timestamps were migrated so they still # matched the publication date in PST8PDT. When interpreting the event timestamp # as a publication date, you must treat it in the PST8PDT time zone. The # RPC_TZINFO constant in ietf.utils.timezone is defined for this purpose. d = datetime_from_date(rfc_published_date, RPC_TZINFO) synthesized = timezone.now().astimezone(RPC_TZINFO) if abs(d - synthesized) > datetime.timedelta(days=60): synthesized = d else: direction = -1 if (d - synthesized).total_seconds() < 0 else +1 while synthesized.month != d.month or synthesized.year != d.year: synthesized += datetime.timedelta(days=direction) e.time = synthesized e.by = system e.desc = "RFC published" e.save() events.append(e) changes.append("added RFC published event at %s" % e.time.strftime("%Y-%m-%d")) rfc_published = True for t in ("draft-iesg", "draft-stream-iab", "draft-stream-irtf", "draft-stream-ise"): prev_state = doc.get_state(t) if prev_state is not None: if prev_state.slug not in ("pub", "idexists"): new_state = State.objects.select_related("type").get(used=True, type=t, slug="pub") doc.set_state(new_state) changes.append("changed %s to %s" % (new_state.type.label, new_state)) e = update_action_holders(doc, prev_state, new_state) if e: events.append(e) elif t == 'draft-iesg': doc.set_state(State.objects.get(type_id='draft-iesg', slug='idexists')) def parse_relation_list(l): res = [] for x in l: if x[:3] in ("NIC", "IEN", "STD", "RTR"): # try translating this to RFCs that we can handle # sensibly; otherwise we'll have to ignore them l = DocAlias.objects.filter(name__startswith="rfc", docs__docalias__name=x.lower()) else: l = DocAlias.objects.filter(name=x.lower()) for a in l: if a not in res: res.append(a) return res for x in parse_relation_list(obsoletes): if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_obsoletes): r = RelatedDocument.objects.create(source=doc, target=x, relationship=relationship_obsoletes) changes.append("created %s relation between %s and %s" % (r.relationship.name.lower(), prettify_std_name(r.source.name), prettify_std_name(r.target.name))) for x in parse_relation_list(updates): if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_updates): r = RelatedDocument.objects.create(source=doc, target=x, relationship=relationship_updates) changes.append("created %s relation between %s and %s" % (r.relationship.name.lower(), prettify_std_name(r.source.name), prettify_std_name(r.target.name))) if also: for a in also: a = a.lower() if not DocAlias.objects.filter(name=a): DocAlias.objects.create(name=a).docs.add(doc) changes.append("created alias %s" % prettify_std_name(a)) doc_errata = errata.get('RFC%04d'%rfc_number, []) all_rejected = doc_errata and all( er['errata_status_code']=='Rejected' for er in doc_errata ) if has_errata and not all_rejected: if not doc.tags.filter(pk=tag_has_errata.pk).exists(): doc.tags.add(tag_has_errata) changes.append("added Errata tag") has_verified_errata = any([ er['errata_status_code']=='Verified' for er in doc_errata ]) if has_verified_errata and not doc.tags.filter(pk=tag_has_verified_errata.pk).exists(): doc.tags.add(tag_has_verified_errata) changes.append("added Verified Errata tag") else: if doc.tags.filter(pk=tag_has_errata.pk): doc.tags.remove(tag_has_errata) if all_rejected: changes.append("removed Errata tag (all errata rejected)") else: changes.append("removed Errata tag") if doc.tags.filter(pk=tag_has_verified_errata.pk): doc.tags.remove(tag_has_verified_errata) changes.append("removed Verified Errata tag") if changes: events.append(DocEvent.objects.create( doc=doc, rev=doc.rev, by=system, type="sync_from_rfc_editor", desc="Received changes through RFC Editor sync (%s)" % ", ".join(changes), )) doc.save_with_history(events) if changes: yield changes, doc, rfc_published def post_approved_draft(url, name): """Post an approved draft to the RFC Editor so they can retrieve the data from the Datatracker and start processing it. Returns response and error (empty string if no error).""" # HTTP basic auth username = "dtracksync" password = settings.RFC_EDITOR_SYNC_PASSWORD headers = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain", "Authorization": "Basic %s" % force_str(base64.encodebytes(smart_bytes("%s:%s" % (username, password)))).replace("\n", ""), } log("Posting RFC-Editor notification of approved Internet-Draft '%s' to '%s'" % (name, url)) text = error = "" try: r = requests.post( url, headers=headers, data=smart_bytes(urlencode({ 'draft': name })), timeout=settings.DEFAULT_REQUESTS_TIMEOUT, ) log("RFC-Editor notification result for Internet-Draft '%s': %s:'%s'" % (name, r.status_code, r.text)) if r.status_code != 200: raise RuntimeError("Status code is not 200 OK (it's %s)." % r.status_code) if force_str(r.text) != "OK": raise RuntimeError('Response is not "OK" (it\'s "%s").' % r.text) except Exception as e: # catch everything so we don't leak exceptions, convert them # into string instead msg = "Exception on RFC-Editor notification for Internet-Draft '%s': %s: %s" % (name, type(e), str(e)) log(msg) if settings.SERVER_MODE == 'test': debug.say(msg) error = str(e) return text, error