Revamp sync from RFC Editor to include more info, fix a couple of bugs
and email the Secretariet when a draft sent to the RFC Editor ends up in the queue, split it up so it's easier to test; also moved the location of the binaries to bin/ - Legacy-Id: 4848
This commit is contained in:
parent
8cbdc0b03d
commit
09e6203f18
30
ietf/bin/rfc-editor-index-updates
Executable file
30
ietf/bin/rfc-editor-index-updates
Executable file
|
@ -0,0 +1,30 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os, sys, re, json, datetime
|
||||||
|
import syslog
|
||||||
|
|
||||||
|
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
|
||||||
|
|
||||||
|
# boilerplate
|
||||||
|
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||||
|
sys.path = [ basedir ] + sys.path
|
||||||
|
|
||||||
|
from ietf import settings
|
||||||
|
from django.core import management
|
||||||
|
management.setup_environ(settings)
|
||||||
|
|
||||||
|
|
||||||
|
from ietf.sync.rfceditor import *
|
||||||
|
|
||||||
|
syslog.syslog("Updating document metadata from RFC index from %s" % QUEUE_URL)
|
||||||
|
|
||||||
|
response = fetch_index_xml(INDEX_URL)
|
||||||
|
data = parse_index(response)
|
||||||
|
|
||||||
|
if len(data) < MIN_INDEX_RESULTS:
|
||||||
|
syslog.syslog("Not enough results, only %s" % len(data))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
changed = update_docs_from_rfc_index(data)
|
||||||
|
for c in changed:
|
||||||
|
syslog.syslog(c)
|
35
ietf/bin/rfc-editor-queue-updates
Executable file
35
ietf/bin/rfc-editor-queue-updates
Executable file
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os, sys, re, json, datetime
|
||||||
|
import syslog
|
||||||
|
|
||||||
|
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
|
||||||
|
|
||||||
|
# boilerplate
|
||||||
|
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||||
|
sys.path = [ basedir ] + sys.path
|
||||||
|
|
||||||
|
from ietf import settings
|
||||||
|
from django.core import management
|
||||||
|
management.setup_environ(settings)
|
||||||
|
|
||||||
|
|
||||||
|
from ietf.sync.rfceditor import *
|
||||||
|
|
||||||
|
syslog.syslog("Updating RFC Editor queue states from %s" % QUEUE_URL)
|
||||||
|
|
||||||
|
response = fetch_queue_xml(QUEUE_URL)
|
||||||
|
drafts, warnings = parse_queue(response)
|
||||||
|
for w in warnings:
|
||||||
|
syslog.syslog(u"WARNING: %s" % w)
|
||||||
|
|
||||||
|
if len(drafts) < MIN_QUEUE_RESULTS:
|
||||||
|
syslog.syslog("Not enough results, only %s" % len(drafts))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
changed, warnings = update_drafts_from_queue(drafts)
|
||||||
|
for w in warnings:
|
||||||
|
syslog.syslog(u"WARNING: %s" % w)
|
||||||
|
|
||||||
|
for c in changed:
|
||||||
|
syslog.syslog(u"Updated %s" % c)
|
|
@ -125,9 +125,9 @@ class IdWrapper:
|
||||||
if settings.USE_DB_REDESIGN_PROXY_CLASSES:
|
if settings.USE_DB_REDESIGN_PROXY_CLASSES:
|
||||||
s = self._draft.get_state("draft-rfceditor")
|
s = self._draft.get_state("draft-rfceditor")
|
||||||
if s:
|
if s:
|
||||||
# extract possible extra states
|
# extract possible extra annotations
|
||||||
tags = self._draft.tags.filter(slug__in=("iana-crd", "ref", "missref"))
|
tags = self._draft.tags.filter(slug__in=("iana", "ref"))
|
||||||
return " ".join([s.name] + [t.slug.replace("-crd", "").upper() for t in tags])
|
return "*".join([s.name] + [t.slug.upper() for t in tags])
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -1,293 +0,0 @@
|
||||||
# Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
|
|
||||||
# All rights reserved. Contact: Pasi Eronen <pasi.eronen@nokia.com>
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# * Redistributions in binary form must reproduce the above
|
|
||||||
# copyright notice, this list of conditions and the following
|
|
||||||
# disclaimer in the documentation and/or other materials provided
|
|
||||||
# with the distribution.
|
|
||||||
#
|
|
||||||
# * Neither the name of the Nokia Corporation and/or its
|
|
||||||
# subsidiary(-ies) nor the names of its contributors may be used
|
|
||||||
# to endorse or promote products derived from this software
|
|
||||||
# without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
from ietf import settings
|
|
||||||
from django.core import management
|
|
||||||
management.setup_environ(settings)
|
|
||||||
from django import db
|
|
||||||
|
|
||||||
from xml.dom import pulldom, Node
|
|
||||||
import re
|
|
||||||
import urllib2
|
|
||||||
from datetime import datetime
|
|
||||||
import socket
|
|
||||||
import sys
|
|
||||||
|
|
||||||
QUEUE_URL = "http://www.rfc-editor.org/queue2.xml"
|
|
||||||
TABLE = "rfc_editor_queue_mirror"
|
|
||||||
REF_TABLE = "rfc_editor_queue_mirror_refs"
|
|
||||||
|
|
||||||
log_data = ""
|
|
||||||
def log(line):
|
|
||||||
global log_data
|
|
||||||
if __name__ == '__main__' and len(sys.argv) > 1:
|
|
||||||
print line
|
|
||||||
else:
|
|
||||||
log_data += line + "\n"
|
|
||||||
|
|
||||||
def parse(response):
|
|
||||||
def getChildText(parentNode, tagName):
|
|
||||||
for node in parentNode.childNodes:
|
|
||||||
if node.nodeType == Node.ELEMENT_NODE and node.localName == tagName:
|
|
||||||
return node.firstChild.data
|
|
||||||
return None
|
|
||||||
|
|
||||||
events = pulldom.parse(response)
|
|
||||||
drafts = []
|
|
||||||
refs = []
|
|
||||||
for (event, node) in events:
|
|
||||||
if event == pulldom.START_ELEMENT and node.tagName == "entry":
|
|
||||||
events.expandNode(node)
|
|
||||||
node.normalize()
|
|
||||||
draft_name = getChildText(node, "draft").strip()
|
|
||||||
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
|
|
||||||
date_received = getChildText(node, "date-received")
|
|
||||||
|
|
||||||
states = []
|
|
||||||
for child in node.childNodes:
|
|
||||||
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
|
|
||||||
states.append(child.firstChild.data)
|
|
||||||
|
|
||||||
has_refs = False
|
|
||||||
for child in node.childNodes:
|
|
||||||
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
|
|
||||||
ref_name = getChildText(child, "ref-name")
|
|
||||||
ref_state = getChildText(child, "ref-state")
|
|
||||||
in_queue = ref_state.startswith("IN-QUEUE")
|
|
||||||
refs.append([draft_name, ref_name, in_queue, True])
|
|
||||||
has_refs = True
|
|
||||||
if has_refs and not "MISSREF" in states:
|
|
||||||
states.append("REF")
|
|
||||||
|
|
||||||
if len(states) == 0:
|
|
||||||
state = "?"
|
|
||||||
else:
|
|
||||||
state = " ".join(states)
|
|
||||||
drafts.append([draft_name, date_received, state, stream])
|
|
||||||
|
|
||||||
elif event == pulldom.START_ELEMENT and node.tagName == "section":
|
|
||||||
name = node.getAttribute('name')
|
|
||||||
if name.startswith("IETF"):
|
|
||||||
stream = 1
|
|
||||||
elif name.startswith("IAB"):
|
|
||||||
stream = 2
|
|
||||||
elif name.startswith("IRTF"):
|
|
||||||
stream = 3
|
|
||||||
elif name.startswith("INDEPENDENT"):
|
|
||||||
stream = 4
|
|
||||||
else:
|
|
||||||
stream = 0
|
|
||||||
log("WARNING: unrecognized section "+name)
|
|
||||||
return (drafts, refs)
|
|
||||||
|
|
||||||
# Find set of all normative references (whether direct or via some
|
|
||||||
# other normative reference)
|
|
||||||
def find_indirect_refs(drafts, refs):
|
|
||||||
result = []
|
|
||||||
draft_names = set()
|
|
||||||
for draft in drafts:
|
|
||||||
draft_names.add(draft[0])
|
|
||||||
|
|
||||||
def recurse(draft_name, ref_set, level):
|
|
||||||
for (source, destination, in_queue, direct) in refs:
|
|
||||||
if source == draft_name:
|
|
||||||
if destination not in ref_set:
|
|
||||||
ref_set.add(destination)
|
|
||||||
recurse(destination, ref_set, level+1)
|
|
||||||
if level == 0:
|
|
||||||
# Remove self-reference
|
|
||||||
ref_set.remove(draft_name)
|
|
||||||
# Remove direct references
|
|
||||||
for (source, destination, in_queue, direct) in refs:
|
|
||||||
if source == draft_name:
|
|
||||||
if destination in ref_set:
|
|
||||||
ref_set.remove(destination)
|
|
||||||
# The rest are indirect references
|
|
||||||
for ref in ref_set:
|
|
||||||
if draft_name != ref:
|
|
||||||
result.append([draft_name, ref, ref in draft_names, False])
|
|
||||||
|
|
||||||
for draft_name in draft_names:
|
|
||||||
recurse(draft_name, set([draft_name]), 0)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Convert filenames to id_document_tags
|
|
||||||
def find_document_ids(cursor, drafts, refs):
|
|
||||||
draft_ids = {}
|
|
||||||
drafts2 = []
|
|
||||||
for draft in drafts:
|
|
||||||
cursor.execute("SELECT id_document_tag FROM internet_drafts WHERE filename=%s", [draft[0]])
|
|
||||||
row = cursor.fetchone()
|
|
||||||
if not row:
|
|
||||||
log("WARNING: cannot find id for "+draft[0])
|
|
||||||
else:
|
|
||||||
draft_ids[draft[0]] = row[0]
|
|
||||||
drafts2.append([row[0]]+draft[1:])
|
|
||||||
refs2 = []
|
|
||||||
for ref in refs:
|
|
||||||
if ref[0] in draft_ids:
|
|
||||||
refs2.append([draft_ids[ref[0]]]+ref[1:])
|
|
||||||
return (drafts2, refs2)
|
|
||||||
|
|
||||||
def parse_all(response):
|
|
||||||
log("parsing...")
|
|
||||||
(drafts, refs) = parse(response)
|
|
||||||
log("got "+ str(len(drafts)) + " drafts and "+str(len(refs))+" direct refs")
|
|
||||||
|
|
||||||
indirect_refs = find_indirect_refs(drafts, refs)
|
|
||||||
log("found " + str(len(indirect_refs)) + " indirect refs")
|
|
||||||
refs.extend(indirect_refs)
|
|
||||||
del(indirect_refs)
|
|
||||||
|
|
||||||
if settings.USE_DB_REDESIGN_PROXY_CLASSES: # note: return before id lookup
|
|
||||||
return (drafts, refs)
|
|
||||||
|
|
||||||
# convert filenames to id_document_tags
|
|
||||||
log("connecting to database...")
|
|
||||||
cursor = db.connection.cursor()
|
|
||||||
log("finding id_document_tags...")
|
|
||||||
(drafts, refs) = find_document_ids(cursor, drafts, refs)
|
|
||||||
cursor.close()
|
|
||||||
return (drafts, refs)
|
|
||||||
|
|
||||||
def insert_into_database(drafts, refs):
|
|
||||||
log("connecting to database...")
|
|
||||||
cursor = db.connection.cursor()
|
|
||||||
log("removing old data...")
|
|
||||||
cursor.execute("DELETE FROM "+TABLE)
|
|
||||||
cursor.execute("DELETE FROM "+REF_TABLE)
|
|
||||||
log("inserting new data...")
|
|
||||||
cursor.executemany("INSERT INTO "+TABLE+" (id_document_tag, date_received, state, stream) VALUES (%s, %s, %s, %s)", drafts)
|
|
||||||
cursor.execute("DELETE FROM "+REF_TABLE)
|
|
||||||
cursor.executemany("INSERT INTO "+REF_TABLE+" (source, destination, in_queue, direct) VALUES (%s, %s, %s, %s)", refs)
|
|
||||||
cursor.close()
|
|
||||||
db.connection._commit()
|
|
||||||
db.connection.close()
|
|
||||||
|
|
||||||
import django.db.transaction
|
|
||||||
|
|
||||||
def get_rfc_tag_mapping():
|
|
||||||
"""Return dict with RFC Editor state name -> DocTagName"""
|
|
||||||
from ietf.name.models import DocTagName
|
|
||||||
from ietf.name.utils import name
|
|
||||||
|
|
||||||
return {
|
|
||||||
'IANA': name(DocTagName, 'iana-crd', 'IANA coordination', "RFC-Editor/IANA Registration Coordination"),
|
|
||||||
'REF': name(DocTagName, 'ref', 'Holding for references', "Holding for normative reference"),
|
|
||||||
'MISSREF': name(DocTagName, 'missref', 'Missing references', "Awaiting missing normative reference"),
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_rfc_state_mapping():
|
|
||||||
"""Return dict with RFC Editor state name -> State"""
|
|
||||||
from ietf.doc.models import State, StateType
|
|
||||||
t = StateType.objects.get(slug="draft-rfceditor")
|
|
||||||
return {
|
|
||||||
'AUTH': State.objects.get_or_create(type=t, slug='auth', name='AUTH', desc="Awaiting author action")[0],
|
|
||||||
'AUTH48': State.objects.get_or_create(type=t, slug='auth48', name="AUTH48", desc="Awaiting final author approval")[0],
|
|
||||||
'AUTH48-DONE': State.objects.get_or_create(type=t, slug='auth48done', name="AUTH48-DONE", desc="Final approvals are complete")[0],
|
|
||||||
'EDIT': State.objects.get_or_create(type=t, slug='edit', name='EDIT', desc="Approved by the stream manager (e.g., IESG, IAB, IRSG, ISE), awaiting processing and publishing")[0],
|
|
||||||
'IANA': State.objects.get_or_create(type=t, slug='iana-crd', name='IANA', desc="RFC-Editor/IANA Registration Coordination")[0],
|
|
||||||
'IESG': State.objects.get_or_create(type=t, slug='iesg', name='IESG', desc="Holding for IESG action")[0],
|
|
||||||
'ISR': State.objects.get_or_create(type=t, slug='isr', name='ISR', desc="Independent Submission Review by the ISE ")[0],
|
|
||||||
'ISR-AUTH': State.objects.get_or_create(type=t, slug='isr-auth', name='ISR-AUTH', desc="Independent Submission awaiting author update, or in discussion between author and ISE")[0],
|
|
||||||
'REF': State.objects.get_or_create(type=t, slug='ref', name='REF', desc="Holding for normative reference")[0],
|
|
||||||
'RFC-EDITOR': State.objects.get_or_create(type=t, slug='rfc-edit', name='RFC-EDITOR', desc="Awaiting final RFC Editor review before AUTH48")[0],
|
|
||||||
'TO': State.objects.get_or_create(type=t, slug='timeout', name='TO', desc="Time-out period during which the IESG reviews document for conflict/concurrence with other IETF working group work")[0],
|
|
||||||
'MISSREF': State.objects.get_or_create(type=t, slug='missref', name='MISSREF', desc="Awaiting missing normative reference")[0],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@django.db.transaction.commit_on_success
|
|
||||||
def insert_into_databaseREDESIGN(drafts, refs):
|
|
||||||
from ietf.doc.models import Document
|
|
||||||
from ietf.name.models import DocTagName
|
|
||||||
|
|
||||||
tags = get_rfc_tag_mapping()
|
|
||||||
state_map = get_rfc_state_mapping()
|
|
||||||
|
|
||||||
rfc_editor_tags = tags.values()
|
|
||||||
|
|
||||||
log("removing old data...")
|
|
||||||
for d in Document.objects.filter(states__type="draft-rfceditor").distinct():
|
|
||||||
d.tags.remove(*rfc_editor_tags)
|
|
||||||
d.unset_state("draft-rfceditor")
|
|
||||||
|
|
||||||
log("inserting new data...")
|
|
||||||
|
|
||||||
for name, date_received, state_info, stream_id in drafts:
|
|
||||||
try:
|
|
||||||
d = Document.objects.get(name=name)
|
|
||||||
except Document.DoesNotExist:
|
|
||||||
log("unknown document %s" % name)
|
|
||||||
continue
|
|
||||||
|
|
||||||
state_list = state_info.split(" ")
|
|
||||||
if state_list:
|
|
||||||
state = state_list[0]
|
|
||||||
# For now, ignore the '*R...' that's appeared for some states.
|
|
||||||
# FIXME : see if we need to add some refinement for this.
|
|
||||||
if '*' in state:
|
|
||||||
state = state.split("*")[0]
|
|
||||||
# first is state
|
|
||||||
d.set_state(state_map[state])
|
|
||||||
|
|
||||||
# remainding are tags
|
|
||||||
for x in state_list[1:]:
|
|
||||||
d.tags.add(tags[x])
|
|
||||||
|
|
||||||
if settings.USE_DB_REDESIGN_PROXY_CLASSES:
|
|
||||||
insert_into_database = insert_into_databaseREDESIGN
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
try:
|
|
||||||
log("output from mirror_rfc_editor_queue.py:\n")
|
|
||||||
log("time: "+str(datetime.now()))
|
|
||||||
log("host: "+socket.gethostname())
|
|
||||||
log("url: "+QUEUE_URL)
|
|
||||||
|
|
||||||
log("downloading...")
|
|
||||||
socket.setdefaulttimeout(30)
|
|
||||||
response = urllib2.urlopen(QUEUE_URL)
|
|
||||||
|
|
||||||
(drafts, refs) = parse_all(response)
|
|
||||||
if len(drafts) < 10 or len(refs) < 10:
|
|
||||||
raise Exception('not enough data')
|
|
||||||
|
|
||||||
insert_into_database(drafts, refs)
|
|
||||||
|
|
||||||
log("all done!")
|
|
||||||
if log_data.find("WARNING") < 0:
|
|
||||||
log_data = ""
|
|
||||||
finally:
|
|
||||||
if len(log_data) > 0:
|
|
||||||
print log_data
|
|
|
@ -1,365 +0,0 @@
|
||||||
# Copyright (C) 2009-2010 Nokia Corporation and/or its subsidiary(-ies).
|
|
||||||
# All rights reserved. Contact: Pasi Eronen <pasi.eronen@nokia.com>
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# * Redistributions in binary form must reproduce the above
|
|
||||||
# copyright notice, this list of conditions and the following
|
|
||||||
# disclaimer in the documentation and/or other materials provided
|
|
||||||
# with the distribution.
|
|
||||||
#
|
|
||||||
# * Neither the name of the Nokia Corporation and/or its
|
|
||||||
# subsidiary(-ies) nor the names of its contributors may be used
|
|
||||||
# to endorse or promote products derived from this software
|
|
||||||
# without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
from ietf import settings
|
|
||||||
from django.core import management
|
|
||||||
management.setup_environ(settings)
|
|
||||||
from django import db
|
|
||||||
|
|
||||||
from xml.dom import pulldom, Node
|
|
||||||
import re
|
|
||||||
import urllib2
|
|
||||||
from datetime import datetime, date, timedelta
|
|
||||||
import socket
|
|
||||||
import sys
|
|
||||||
|
|
||||||
INDEX_URL = "http://www.rfc-editor.org/rfc/rfc-index.xml"
|
|
||||||
TABLE = "rfc_index_mirror"
|
|
||||||
|
|
||||||
log_data = ""
|
|
||||||
def log(line):
|
|
||||||
global log_data
|
|
||||||
if __name__ == '__main__' and len(sys.argv) > 1:
|
|
||||||
print line
|
|
||||||
else:
|
|
||||||
log_data += line + "\n"
|
|
||||||
|
|
||||||
# python before 2.7 doesn't have the total_seconds method on datetime.timedelta.
|
|
||||||
def total_seconds(td):
|
|
||||||
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
|
||||||
|
|
||||||
def parse(response):
|
|
||||||
def getChildText(parentNode, tagName):
|
|
||||||
for node in parentNode.childNodes:
|
|
||||||
if node.nodeType == Node.ELEMENT_NODE and node.localName == tagName:
|
|
||||||
return node.firstChild.data
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getDocList(parentNode, tagName):
|
|
||||||
l = []
|
|
||||||
for u in parentNode.getElementsByTagName(tagName):
|
|
||||||
for d in u.getElementsByTagName("doc-id"):
|
|
||||||
l.append(d.firstChild.data)
|
|
||||||
if len(l) == 0:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return ",".join(l)
|
|
||||||
|
|
||||||
also_list = {}
|
|
||||||
data = []
|
|
||||||
events = pulldom.parse(response)
|
|
||||||
for (event, node) in events:
|
|
||||||
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
|
|
||||||
events.expandNode(node)
|
|
||||||
node.normalize()
|
|
||||||
bcpid = getChildText(node, "doc-id")
|
|
||||||
doclist = getDocList(node, "is-also")
|
|
||||||
if doclist:
|
|
||||||
for docid in doclist.split(","):
|
|
||||||
if docid in also_list:
|
|
||||||
also_list[docid].append(bcpid)
|
|
||||||
else:
|
|
||||||
also_list[docid] = [bcpid]
|
|
||||||
|
|
||||||
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
|
|
||||||
events.expandNode(node)
|
|
||||||
node.normalize()
|
|
||||||
rfc_number = int(getChildText(node, "doc-id")[3:])
|
|
||||||
title = getChildText(node, "title")
|
|
||||||
|
|
||||||
l = []
|
|
||||||
for author in node.getElementsByTagName("author"):
|
|
||||||
l.append(getChildText(author, "name"))
|
|
||||||
authors = "; ".join(l)
|
|
||||||
|
|
||||||
d = node.getElementsByTagName("date")[0]
|
|
||||||
year = int(getChildText(d, "year"))
|
|
||||||
month = getChildText(d, "month")
|
|
||||||
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
|
|
||||||
rfc_published_date = ("%d-%02d-01" % (year, month))
|
|
||||||
|
|
||||||
current_status = getChildText(node, "current-status").title()
|
|
||||||
|
|
||||||
updates = getDocList(node, "updates")
|
|
||||||
updated_by = getDocList(node, "updated-by")
|
|
||||||
obsoletes = getDocList(node, "obsoletes")
|
|
||||||
obsoleted_by = getDocList(node, "obsoleted-by")
|
|
||||||
stream = getChildText(node, "stream")
|
|
||||||
wg = getChildText(node, "wg_acronym")
|
|
||||||
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
|
|
||||||
wg = None
|
|
||||||
|
|
||||||
l = []
|
|
||||||
for format in node.getElementsByTagName("format"):
|
|
||||||
l.append(getChildText(format, "file-format"))
|
|
||||||
file_formats = (",".join(l)).lower()
|
|
||||||
|
|
||||||
draft = getChildText(node, "draft")
|
|
||||||
if draft and re.search("-\d\d$", draft):
|
|
||||||
draft = draft[0:-3]
|
|
||||||
|
|
||||||
if len(node.getElementsByTagName("errata-url")) > 0:
|
|
||||||
has_errata = 1
|
|
||||||
else:
|
|
||||||
has_errata = 0
|
|
||||||
|
|
||||||
data.append([rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,None,draft,has_errata,stream,wg,file_formats])
|
|
||||||
|
|
||||||
for d in data:
|
|
||||||
k = "RFC%04d" % d[0]
|
|
||||||
if k in also_list:
|
|
||||||
d[9] = ",".join(also_list[k])
|
|
||||||
return data
|
|
||||||
|
|
||||||
def insert_to_database(data):
|
|
||||||
log("connecting to database...")
|
|
||||||
cursor = db.connection.cursor()
|
|
||||||
log("removing old data...")
|
|
||||||
cursor.execute("DELETE FROM "+TABLE)
|
|
||||||
log("inserting new data...")
|
|
||||||
cursor.executemany("INSERT INTO "+TABLE+" (rfc_number, title, authors, rfc_published_date, current_status,updates,updated_by,obsoletes,obsoleted_by,also,draft,has_errata,stream,wg,file_formats) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", data)
|
|
||||||
cursor.close()
|
|
||||||
db.connection._commit()
|
|
||||||
db.connection.close()
|
|
||||||
|
|
||||||
def get_std_level_mapping():
|
|
||||||
from ietf.name.models import StdLevelName
|
|
||||||
from ietf.name.utils import name
|
|
||||||
return {
|
|
||||||
"Standard": name(StdLevelName, "std", "Standard"),
|
|
||||||
"Draft Standard": name(StdLevelName, "ds", "Draft Standard"),
|
|
||||||
"Proposed Standard": name(StdLevelName, "ps", "Proposed Standard"),
|
|
||||||
"Informational": name(StdLevelName, "inf", "Informational"),
|
|
||||||
"Experimental": name(StdLevelName, "exp", "Experimental"),
|
|
||||||
"Best Current Practice": name(StdLevelName, "bcp", "Best Current Practice"),
|
|
||||||
"Historic": name(StdLevelName, "hist", "Historic"),
|
|
||||||
"Unknown": name(StdLevelName, "unkn", "Unknown"),
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_stream_mapping():
|
|
||||||
from ietf.name.models import StreamName
|
|
||||||
from ietf.name.utils import name
|
|
||||||
|
|
||||||
return {
|
|
||||||
"IETF": name(StreamName, "ietf", "IETF", desc="IETF stream", order=1),
|
|
||||||
"INDEPENDENT": name(StreamName, "ise", "ISE", desc="Independent Submission Editor stream", order=2),
|
|
||||||
"IRTF": name(StreamName, "irtf", "IRTF", desc="Independent Submission Editor stream", order=3),
|
|
||||||
"IAB": name(StreamName, "iab", "IAB", desc="IAB stream", order=4),
|
|
||||||
"Legacy": name(StreamName, "legacy", "Legacy", desc="Legacy stream", order=5),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
import django.db.transaction
|
|
||||||
|
|
||||||
@django.db.transaction.commit_on_success
|
|
||||||
def insert_to_databaseREDESIGN(data):
|
|
||||||
from ietf.person.models import Person
|
|
||||||
from ietf.doc.models import Document, DocAlias, DocEvent, RelatedDocument, State, save_document_in_history
|
|
||||||
from ietf.group.models import Group
|
|
||||||
from ietf.name.models import DocTagName, DocRelationshipName
|
|
||||||
from ietf.name.utils import name
|
|
||||||
|
|
||||||
system = Person.objects.get(name="(System)")
|
|
||||||
std_level_mapping = get_std_level_mapping()
|
|
||||||
stream_mapping = get_stream_mapping()
|
|
||||||
tag_has_errata = name(DocTagName, 'errata', "Has errata")
|
|
||||||
relationship_obsoletes = name(DocRelationshipName, "obs", "Obsoletes")
|
|
||||||
relationship_updates = name(DocRelationshipName, "updates", "Updates")
|
|
||||||
|
|
||||||
skip_older_than_date = (date.today() - timedelta(days=365)).strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
log("updating data...")
|
|
||||||
for d in data:
|
|
||||||
rfc_number, title, authors, rfc_published_date, current_status, updates, updated_by, obsoletes, obsoleted_by, also, draft, has_errata, stream, wg, file_formats = d
|
|
||||||
|
|
||||||
if rfc_published_date < skip_older_than_date:
|
|
||||||
# speed up the process by skipping old entries
|
|
||||||
continue
|
|
||||||
|
|
||||||
# we assume two things can happen: we get a new RFC, or an
|
|
||||||
# attribute has been updated at the RFC Editor (RFC Editor
|
|
||||||
# attributes currently take precedence over our local
|
|
||||||
# attributes)
|
|
||||||
|
|
||||||
# make sure we got the document and alias
|
|
||||||
created = False
|
|
||||||
doc = None
|
|
||||||
name = "rfc%s" % rfc_number
|
|
||||||
a = DocAlias.objects.filter(name=name)
|
|
||||||
if a:
|
|
||||||
doc = a[0].document
|
|
||||||
else:
|
|
||||||
if draft:
|
|
||||||
try:
|
|
||||||
doc = Document.objects.get(name=draft)
|
|
||||||
except Document.DoesNotExist:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not doc:
|
|
||||||
created = True
|
|
||||||
log("created document %s" % name)
|
|
||||||
doc = Document.objects.create(name=name)
|
|
||||||
|
|
||||||
# add alias
|
|
||||||
DocAlias.objects.create(name=name, document=doc)
|
|
||||||
if not created:
|
|
||||||
created = True
|
|
||||||
log("created alias %s to %s" % (name, doc.name))
|
|
||||||
|
|
||||||
|
|
||||||
# check attributes
|
|
||||||
changed_attributes = {}
|
|
||||||
changed_states = []
|
|
||||||
created_relations = []
|
|
||||||
other_changes = False
|
|
||||||
if title != doc.title:
|
|
||||||
changed_attributes["title"] = title
|
|
||||||
|
|
||||||
if std_level_mapping[current_status] != doc.std_level:
|
|
||||||
changed_attributes["std_level"] = std_level_mapping[current_status]
|
|
||||||
|
|
||||||
if doc.get_state_slug() != "rfc":
|
|
||||||
changed_states.append(State.objects.get(type="draft", slug="rfc"))
|
|
||||||
|
|
||||||
if doc.stream != stream_mapping[stream]:
|
|
||||||
changed_attributes["stream"] = stream_mapping[stream]
|
|
||||||
|
|
||||||
if not doc.group and wg:
|
|
||||||
changed_attributes["group"] = Group.objects.get(acronym=wg)
|
|
||||||
|
|
||||||
if not doc.latest_event(type="published_rfc"):
|
|
||||||
e = DocEvent(doc=doc, type="published_rfc")
|
|
||||||
pubdate = datetime.strptime(rfc_published_date, "%Y-%m-%d")
|
|
||||||
# unfortunately, pubdate doesn't include the correct day
|
|
||||||
# at the moment because the data only has month/year, so
|
|
||||||
# try to deduce it
|
|
||||||
synthesized = datetime.now()
|
|
||||||
if abs(pubdate - synthesized) > timedelta(days=60):
|
|
||||||
synthesized = pubdate
|
|
||||||
else:
|
|
||||||
direction = -1 if total_seconds(pubdate - synthesized) < 0 else +1
|
|
||||||
while synthesized.month != pubdate.month or synthesized.year != pubdate.year:
|
|
||||||
synthesized += timedelta(days=direction)
|
|
||||||
e.time = synthesized
|
|
||||||
e.by = system
|
|
||||||
e.desc = "RFC published"
|
|
||||||
e.save()
|
|
||||||
other_changes = True
|
|
||||||
|
|
||||||
if doc.get_state_slug("draft-iesg") == "rfcqueue":
|
|
||||||
changed_states.append(State.objects.get(type="draft-iesg", slug="pub"))
|
|
||||||
|
|
||||||
def parse_relation_list(s):
|
|
||||||
if not s:
|
|
||||||
return []
|
|
||||||
res = []
|
|
||||||
for x in s.split(","):
|
|
||||||
if x[:3] in ("NIC", "IEN", "STD", "RTR"):
|
|
||||||
# try translating this to RFCs that we can handle
|
|
||||||
# sensibly; otherwise we'll have to ignore them
|
|
||||||
l = DocAlias.objects.filter(name__startswith="rfc", document__docalias__name=x.lower())
|
|
||||||
else:
|
|
||||||
l = DocAlias.objects.filter(name=x.lower())
|
|
||||||
|
|
||||||
for a in l:
|
|
||||||
if a not in res:
|
|
||||||
res.append(a)
|
|
||||||
return res
|
|
||||||
|
|
||||||
for x in parse_relation_list(obsoletes):
|
|
||||||
if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_obsoletes):
|
|
||||||
created_relations.append(RelatedDocument(source=doc, target=x, relationship=relationship_obsoletes))
|
|
||||||
|
|
||||||
for x in parse_relation_list(updates):
|
|
||||||
if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_updates):
|
|
||||||
created_relations.append(RelatedDocument(source=doc, target=x, relationship=relationship_updates))
|
|
||||||
|
|
||||||
if also:
|
|
||||||
for a in also.lower().split(","):
|
|
||||||
if not DocAlias.objects.filter(name=a):
|
|
||||||
DocAlias.objects.create(name=a, document=doc)
|
|
||||||
other_changes = True
|
|
||||||
|
|
||||||
if has_errata:
|
|
||||||
if not doc.tags.filter(pk=tag_has_errata.pk):
|
|
||||||
changed_attributes["tags"] = list(doc.tags.all()) + [tag_has_errata]
|
|
||||||
else:
|
|
||||||
if doc.tags.filter(pk=tag_has_errata.pk):
|
|
||||||
changed_attributes["tags"] = set(doc.tags.all()) - set([tag_has_errata])
|
|
||||||
|
|
||||||
if changed_attributes or changed_states or created_relations or other_changes:
|
|
||||||
# apply changes
|
|
||||||
save_document_in_history(doc)
|
|
||||||
for k, v in changed_attributes.iteritems():
|
|
||||||
setattr(doc, k, v)
|
|
||||||
|
|
||||||
for s in changed_states:
|
|
||||||
doc.set_state(s)
|
|
||||||
|
|
||||||
for o in created_relations:
|
|
||||||
o.save()
|
|
||||||
|
|
||||||
doc.time = datetime.now()
|
|
||||||
doc.save()
|
|
||||||
|
|
||||||
if not created:
|
|
||||||
log("%s changed" % name)
|
|
||||||
|
|
||||||
|
|
||||||
if settings.USE_DB_REDESIGN_PROXY_CLASSES:
|
|
||||||
insert_to_database = insert_to_databaseREDESIGN
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
try:
|
|
||||||
log("output from mirror_rfc_index.py:\n")
|
|
||||||
log("time: "+str(datetime.now()))
|
|
||||||
log("host: "+socket.gethostname())
|
|
||||||
log("url: "+INDEX_URL)
|
|
||||||
|
|
||||||
log("downloading...")
|
|
||||||
socket.setdefaulttimeout(30)
|
|
||||||
response = urllib2.urlopen(INDEX_URL)
|
|
||||||
log("parsing...")
|
|
||||||
data = parse(response)
|
|
||||||
|
|
||||||
log("got " + str(len(data)) + " entries")
|
|
||||||
if len(data) < 5000:
|
|
||||||
raise Exception('not enough data')
|
|
||||||
|
|
||||||
insert_to_database(data)
|
|
||||||
|
|
||||||
log("all done!")
|
|
||||||
log_data = ""
|
|
||||||
|
|
||||||
finally:
|
|
||||||
if len(log_data) > 0:
|
|
||||||
print log_data
|
|
|
@ -159,6 +159,7 @@ INSTALLED_APPS = (
|
||||||
'ietf.ietfworkflows',
|
'ietf.ietfworkflows',
|
||||||
'ietf.wgchairs',
|
'ietf.wgchairs',
|
||||||
'ietf.wgcharter',
|
'ietf.wgcharter',
|
||||||
|
'ietf.sync',
|
||||||
'ietf.community',
|
'ietf.community',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
0
ietf/sync/__init__.py
Normal file
0
ietf/sync/__init__.py
Normal file
0
ietf/sync/models.py
Normal file
0
ietf/sync/models.py
Normal file
446
ietf/sync/rfceditor.py
Normal file
446
ietf/sync/rfceditor.py
Normal file
|
@ -0,0 +1,446 @@
|
||||||
|
import re, urllib2, json, email, socket
|
||||||
|
from xml.dom import pulldom, Node
|
||||||
|
|
||||||
|
from django.utils.http import urlquote
|
||||||
|
|
||||||
|
from ietf.utils.mail import send_mail_text
|
||||||
|
|
||||||
|
from ietf.doc.models import *
|
||||||
|
from ietf.person.models import *
|
||||||
|
from ietf.name.models import *
|
||||||
|
from ietf.doc.utils import add_state_change_event
|
||||||
|
|
||||||
|
QUEUE_URL = "http://www.rfc-editor.org/queue2.xml"
|
||||||
|
INDEX_URL = "http://www.rfc-editor.org/rfc/rfc-index.xml"
|
||||||
|
|
||||||
|
MIN_QUEUE_RESULTS = 10
|
||||||
|
MIN_INDEX_RESULTS = 5000
|
||||||
|
|
||||||
|
# Python < 2.7 doesn't have the total_seconds method on datetime.timedelta.
|
||||||
|
def total_seconds(td):
|
||||||
|
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
||||||
|
|
||||||
|
def get_child_text(parent_node, tag_name):
|
||||||
|
for node in parent_node.childNodes:
|
||||||
|
if node.nodeType == Node.ELEMENT_NODE and node.localName == tag_name:
|
||||||
|
return node.firstChild.data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_queue_xml(url):
|
||||||
|
socket.setdefaulttimeout(30)
|
||||||
|
return urllib2.urlopen(url)
|
||||||
|
|
||||||
|
def parse_queue(response):
|
||||||
|
events = pulldom.parse(response)
|
||||||
|
drafts = []
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
for event, node in events:
|
||||||
|
if event == pulldom.START_ELEMENT and node.tagName == "entry":
|
||||||
|
events.expandNode(node)
|
||||||
|
node.normalize()
|
||||||
|
draft_name = get_child_text(node, "draft").strip()
|
||||||
|
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
|
||||||
|
date_received = get_child_text(node, "date-received")
|
||||||
|
|
||||||
|
state = ""
|
||||||
|
tags = []
|
||||||
|
missref_generation = ""
|
||||||
|
for child in node.childNodes:
|
||||||
|
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
|
||||||
|
state = child.firstChild.data
|
||||||
|
# state has some extra annotations encoded, parse
|
||||||
|
# them out
|
||||||
|
if '*R' in state:
|
||||||
|
tags.append("ref")
|
||||||
|
state = state.replace("*R", "")
|
||||||
|
if '*A' in state:
|
||||||
|
tags.append("iana")
|
||||||
|
state = state.replace("*A", "")
|
||||||
|
m = re.search(r"\(([0-9]+)G\)", state)
|
||||||
|
if m:
|
||||||
|
missref_generation = m.group(1)
|
||||||
|
state = state.replace("(%sG)" % missref_generation, "")
|
||||||
|
|
||||||
|
# AUTH48 link
|
||||||
|
auth48 = ""
|
||||||
|
for child in node.childNodes:
|
||||||
|
if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
|
||||||
|
auth48 = child.firstChild.data
|
||||||
|
|
||||||
|
# cluster link (if it ever gets implemented)
|
||||||
|
cluster = ""
|
||||||
|
for child in node.childNodes:
|
||||||
|
if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
|
||||||
|
cluster = child.firstChild.data
|
||||||
|
|
||||||
|
refs = []
|
||||||
|
for child in node.childNodes:
|
||||||
|
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
|
||||||
|
ref_name = get_child_text(child, "ref-name")
|
||||||
|
ref_state = get_child_text(child, "ref-state")
|
||||||
|
in_queue = ref_state.startswith("IN-QUEUE")
|
||||||
|
refs.append((ref_name, ref_state, in_queue))
|
||||||
|
|
||||||
|
drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
|
||||||
|
|
||||||
|
elif event == pulldom.START_ELEMENT and node.tagName == "section":
|
||||||
|
name = node.getAttribute('name')
|
||||||
|
if name.startswith("IETF"):
|
||||||
|
stream = "ietf"
|
||||||
|
elif name.startswith("IAB"):
|
||||||
|
stream = "iab"
|
||||||
|
elif name.startswith("IRTF"):
|
||||||
|
stream = "irtf"
|
||||||
|
elif name.startswith("INDEPENDENT"):
|
||||||
|
stream = "ise"
|
||||||
|
else:
|
||||||
|
stream = None
|
||||||
|
warnings.append("unrecognized section " + name)
|
||||||
|
|
||||||
|
return drafts, warnings
|
||||||
|
|
||||||
|
def update_drafts_from_queue(drafts):
|
||||||
|
tag_mapping = {
|
||||||
|
'IANA': DocTagName.objects.get(slug='iana'),
|
||||||
|
'REF': DocTagName.objects.get(slug='ref')
|
||||||
|
}
|
||||||
|
|
||||||
|
slookup = dict((s.slug, s)
|
||||||
|
for s in State.objects.filter(type=StateType.objects.get(slug="draft-rfceditor")))
|
||||||
|
state_mapping = {
|
||||||
|
'AUTH': slookup['auth'],
|
||||||
|
'AUTH48': slookup['auth48'],
|
||||||
|
'AUTH48-DONE': slookup['auth48-done'],
|
||||||
|
'EDIT': slookup['edit'],
|
||||||
|
'IANA': slookup['iana'],
|
||||||
|
'IESG': slookup['iesg'],
|
||||||
|
'ISR': slookup['isr'],
|
||||||
|
'ISR-AUTH': slookup['isr-auth'],
|
||||||
|
'REF': slookup['ref'],
|
||||||
|
'RFC-EDITOR': slookup['rfc-edit'],
|
||||||
|
'TO': slookup['timeout'],
|
||||||
|
'MISSREF': slookup['missref'],
|
||||||
|
}
|
||||||
|
|
||||||
|
system = Person.objects.get(name="(System)")
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
names = [t[0] for t in drafts]
|
||||||
|
|
||||||
|
drafts_in_db = dict((d.name, d)
|
||||||
|
for d in Document.objects.filter(type="draft", docalias__name__in=names))
|
||||||
|
|
||||||
|
changed = set()
|
||||||
|
|
||||||
|
for name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs in drafts:
|
||||||
|
if name not in drafts_in_db:
|
||||||
|
warnings.append("unknown document %s" % name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not state or state not in state_mapping:
|
||||||
|
warnings.append("unknown state '%s'" % state)
|
||||||
|
continue
|
||||||
|
|
||||||
|
d = drafts_in_db[name]
|
||||||
|
|
||||||
|
prev_state = d.get_state("draft-rfceditor")
|
||||||
|
next_state = state_mapping[state]
|
||||||
|
|
||||||
|
# check if we've noted it's been received
|
||||||
|
if d.get_state_slug("draft-iesg") == "ann" and not prev_state and not d.latest_event(DocEvent, type="rfc_editor_received_announcement"):
|
||||||
|
e = DocEvent(doc=d, by=system, type="rfc_editor_received_announcement")
|
||||||
|
e.desc = "Announcement was received by RFC Editor"
|
||||||
|
e.save()
|
||||||
|
send_mail_text(None, "iesg-secretary@ietf.org", None,
|
||||||
|
'%s in RFC Editor queue' % d.name,
|
||||||
|
'The announcement for %s has been received by the RFC Editor.' % d.name)
|
||||||
|
|
||||||
|
|
||||||
|
if prev_state != next_state:
|
||||||
|
save_document_in_history(d)
|
||||||
|
|
||||||
|
d.set_state(next_state)
|
||||||
|
|
||||||
|
e = add_state_change_event(d, system, prev_state, next_state)
|
||||||
|
|
||||||
|
if auth48:
|
||||||
|
e.desc = re.sub(r"(<b>.*</b>)", r"<a href=\"%s\">\1</a>" % auth48, e.desc)
|
||||||
|
e.save()
|
||||||
|
|
||||||
|
changed.add(name)
|
||||||
|
|
||||||
|
t = DocTagName.objects.filter(slug__in=tags)
|
||||||
|
if set(t) != set(d.tags.all()):
|
||||||
|
d.tags = t
|
||||||
|
changed.add(name)
|
||||||
|
|
||||||
|
|
||||||
|
# remove tags and states for those not in the queue anymore
|
||||||
|
for d in Document.objects.exclude(docalias__name__in=names).filter(states__type="draft-rfceditor").distinct():
|
||||||
|
d.tags.remove(*tag_mapping.values())
|
||||||
|
d.unset_state("draft-rfceditor")
|
||||||
|
# we do not add a history entry here - most likely we already
|
||||||
|
# have something that explains what happened
|
||||||
|
changed.add(name)
|
||||||
|
|
||||||
|
return changed, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_index_xml(url):
|
||||||
|
socket.setdefaulttimeout(30)
|
||||||
|
return urllib2.urlopen(url)
|
||||||
|
|
||||||
|
def parse_index(response):
|
||||||
|
def getDocList(parentNode, tagName):
|
||||||
|
l = []
|
||||||
|
for u in parentNode.getElementsByTagName(tagName):
|
||||||
|
for d in u.getElementsByTagName("doc-id"):
|
||||||
|
l.append(d.firstChild.data)
|
||||||
|
return l
|
||||||
|
|
||||||
|
also_list = {}
|
||||||
|
data = []
|
||||||
|
events = pulldom.parse(response)
|
||||||
|
for event, node in events:
|
||||||
|
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
|
||||||
|
events.expandNode(node)
|
||||||
|
node.normalize()
|
||||||
|
bcpid = get_child_text(node, "doc-id")
|
||||||
|
doclist = getDocList(node, "is-also")
|
||||||
|
for docid in doclist:
|
||||||
|
if docid in also_list:
|
||||||
|
also_list[docid].append(bcpid)
|
||||||
|
else:
|
||||||
|
also_list[docid] = [bcpid]
|
||||||
|
|
||||||
|
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
|
||||||
|
events.expandNode(node)
|
||||||
|
node.normalize()
|
||||||
|
rfc_number = int(get_child_text(node, "doc-id")[3:])
|
||||||
|
title = get_child_text(node, "title")
|
||||||
|
|
||||||
|
authors = []
|
||||||
|
for author in node.getElementsByTagName("author"):
|
||||||
|
authors.append(get_child_text(author, "name"))
|
||||||
|
|
||||||
|
d = node.getElementsByTagName("date")[0]
|
||||||
|
year = int(get_child_text(d, "year"))
|
||||||
|
month = get_child_text(d, "month")
|
||||||
|
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
|
||||||
|
rfc_published_date = datetime.date(year, month, 1)
|
||||||
|
|
||||||
|
current_status = get_child_text(node, "current-status").title()
|
||||||
|
|
||||||
|
updates = getDocList(node, "updates")
|
||||||
|
updated_by = getDocList(node, "updated-by")
|
||||||
|
obsoletes = getDocList(node, "obsoletes")
|
||||||
|
obsoleted_by = getDocList(node, "obsoleted-by")
|
||||||
|
stream = get_child_text(node, "stream")
|
||||||
|
wg = get_child_text(node, "wg_acronym")
|
||||||
|
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
|
||||||
|
wg = None
|
||||||
|
|
||||||
|
l = []
|
||||||
|
pages = ""
|
||||||
|
for fmt in node.getElementsByTagName("format"):
|
||||||
|
l.append(get_child_text(fmt, "file-format"))
|
||||||
|
if get_child_text(fmt, "file-format") == "ASCII":
|
||||||
|
pages = get_child_text(fmt, "page-count")
|
||||||
|
file_formats = (",".join(l)).lower()
|
||||||
|
|
||||||
|
abstract = ""
|
||||||
|
for abstract in node.getElementsByTagName("abstract"):
|
||||||
|
abstract = get_child_text(abstract, "p")
|
||||||
|
|
||||||
|
draft = get_child_text(node, "draft")
|
||||||
|
if draft and re.search("-\d\d$", draft):
|
||||||
|
draft = draft[0:-3]
|
||||||
|
|
||||||
|
if len(node.getElementsByTagName("errata-url")) > 0:
|
||||||
|
has_errata = 1
|
||||||
|
else:
|
||||||
|
has_errata = 0
|
||||||
|
|
||||||
|
data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
|
||||||
|
|
||||||
|
for d in data:
|
||||||
|
k = "RFC%04d" % d[0]
|
||||||
|
if k in also_list:
|
||||||
|
d[9].extend(also_list[k])
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
#skip_older_than_date = date.today() - timedelta(days=365)
|
||||||
|
def update_docs_from_rfc_index(data, skip_older_than_date=None):
|
||||||
|
std_level_mapping = {
|
||||||
|
"Standard": StdLevelName.objects.get(slug="std"),
|
||||||
|
"Draft Standard": StdLevelName.objects.get(slug="ds"),
|
||||||
|
"Proposed Standard": StdLevelName.objects.get(slug="ps"),
|
||||||
|
"Informational": StdLevelName.objects.get(slug="inf"),
|
||||||
|
"Experimental": StdLevelName.objects.get(slug="exp"),
|
||||||
|
"Best Current Practice": StdLevelName.objects.get(slug="bcp"),
|
||||||
|
"Historic": StdLevelName.objects.get(slug="hist"),
|
||||||
|
"Unknown": StdLevelName.objects.get(slug="unkn"),
|
||||||
|
}
|
||||||
|
|
||||||
|
stream_mapping = {
|
||||||
|
"IETF": StreamName.objects.get(slug="ietf"),
|
||||||
|
"INDEPENDENT": StreamName.objects.get(slug="ise"),
|
||||||
|
"IRTF": StreamName.objects.get(slug="irtf"),
|
||||||
|
"IAB": StreamName.objects.get(slug="iab"),
|
||||||
|
"Legacy": StreamName.objects.get(slug="legacy"),
|
||||||
|
}
|
||||||
|
|
||||||
|
tag_has_errata = DocTagName.objects.get(slug='errata')
|
||||||
|
relationship_obsoletes = DocRelationshipName.objects.get(slug="obs")
|
||||||
|
relationship_updates = DocRelationshipName.objects.get(slug="updates")
|
||||||
|
|
||||||
|
system = Person.objects.get(name="(System)")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for rfc_number, title, authors, rfc_published_date, current_status, updates, updated_by, obsoletes, obsoleted_by, also, draft, has_errata, stream, wg, file_formats, pages, abstract in data:
|
||||||
|
|
||||||
|
if skip_older_than_date and rfc_published_date < skip_older_than_date:
|
||||||
|
# speed up the process by skipping old entries
|
||||||
|
continue
|
||||||
|
|
||||||
|
# we assume two things can happen: we get a new RFC, or an
|
||||||
|
# attribute has been updated at the RFC Editor (RFC Editor
|
||||||
|
# attributes take precedence over our local attributes)
|
||||||
|
|
||||||
|
# make sure we got the document and alias
|
||||||
|
created = False
|
||||||
|
doc = None
|
||||||
|
name = "rfc%s" % rfc_number
|
||||||
|
a = DocAlias.objects.filter(name=name).select_related("document")
|
||||||
|
if a:
|
||||||
|
doc = a[0].document
|
||||||
|
else:
|
||||||
|
if draft:
|
||||||
|
try:
|
||||||
|
doc = Document.objects.get(name=draft)
|
||||||
|
except Document.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not doc:
|
||||||
|
results.append("created document %s" % name)
|
||||||
|
doc = Document.objects.get_or_create(name=name)[0]
|
||||||
|
|
||||||
|
# add alias
|
||||||
|
DocAlias.objects.get_or_create(name=name, document=doc)
|
||||||
|
results.append("created alias %s to %s" % (name, doc.name))
|
||||||
|
created = True
|
||||||
|
|
||||||
|
|
||||||
|
# check attributes
|
||||||
|
changed_attributes = {}
|
||||||
|
changed_states = []
|
||||||
|
created_relations = []
|
||||||
|
other_changes = False
|
||||||
|
if title != doc.title:
|
||||||
|
changed_attributes["title"] = title
|
||||||
|
|
||||||
|
if abstract and abstract != doc.abstract:
|
||||||
|
changed_attributes["abstract"] = abstract
|
||||||
|
|
||||||
|
if int(pages) != doc.pages:
|
||||||
|
changed_attributes["pages"] = int(pages)
|
||||||
|
|
||||||
|
if std_level_mapping[current_status] != doc.std_level:
|
||||||
|
changed_attributes["std_level"] = std_level_mapping[current_status]
|
||||||
|
|
||||||
|
if doc.get_state_slug() != "rfc":
|
||||||
|
changed_states.append(State.objects.get(type="draft", slug="rfc"))
|
||||||
|
|
||||||
|
if doc.stream != stream_mapping[stream]:
|
||||||
|
changed_attributes["stream"] = stream_mapping[stream]
|
||||||
|
|
||||||
|
if not doc.group and wg:
|
||||||
|
changed_attributes["group"] = Group.objects.get(acronym=wg)
|
||||||
|
|
||||||
|
if not doc.latest_event(type="published_rfc"):
|
||||||
|
e = DocEvent(doc=doc, type="published_rfc")
|
||||||
|
# unfortunately, rfc_published_date doesn't include the correct day
|
||||||
|
# at the moment because the data only has month/year, so
|
||||||
|
# try to deduce it
|
||||||
|
d = datetime.datetime.combine(rfc_published_date, datetime.time())
|
||||||
|
synthesized = datetime.datetime.now()
|
||||||
|
if abs(d - synthesized) > datetime.timedelta(days=60):
|
||||||
|
synthesized = d
|
||||||
|
else:
|
||||||
|
direction = -1 if total_seconds(d - synthesized) < 0 else +1
|
||||||
|
while synthesized.month != d.month or synthesized.year != d.year:
|
||||||
|
synthesized += datetime.timedelta(days=direction)
|
||||||
|
e.time = synthesized
|
||||||
|
e.by = system
|
||||||
|
e.desc = "RFC published"
|
||||||
|
e.save()
|
||||||
|
other_changes = True
|
||||||
|
|
||||||
|
results.append("Added RFC published event: %s" % e.time.strftime("%Y-%m-%d"))
|
||||||
|
|
||||||
|
for t in ("draft-iesg", "draft-stream-iab", "draft-stream-irtf", "draft-stream-ise"):
|
||||||
|
if doc.get_state_slug(t) != "pub":
|
||||||
|
changed_states.append(State.objects.get(type=t, slug="pub"))
|
||||||
|
|
||||||
|
def parse_relation_list(l):
|
||||||
|
res = []
|
||||||
|
for x in l:
|
||||||
|
if x[:3] in ("NIC", "IEN", "STD", "RTR"):
|
||||||
|
# try translating this to RFCs that we can handle
|
||||||
|
# sensibly; otherwise we'll have to ignore them
|
||||||
|
l = DocAlias.objects.filter(name__startswith="rfc", document__docalias__name=x.lower())
|
||||||
|
else:
|
||||||
|
l = DocAlias.objects.filter(name=x.lower())
|
||||||
|
|
||||||
|
for a in l:
|
||||||
|
if a not in res:
|
||||||
|
res.append(a)
|
||||||
|
return res
|
||||||
|
|
||||||
|
for x in parse_relation_list(obsoletes):
|
||||||
|
if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_obsoletes):
|
||||||
|
created_relations.append(RelatedDocument(source=doc, target=x, relationship=relationship_obsoletes))
|
||||||
|
|
||||||
|
for x in parse_relation_list(updates):
|
||||||
|
if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_updates):
|
||||||
|
created_relations.append(RelatedDocument(source=doc, target=x, relationship=relationship_updates))
|
||||||
|
|
||||||
|
if also:
|
||||||
|
for a in also:
|
||||||
|
a = a.lower()
|
||||||
|
if not DocAlias.objects.filter(name=a):
|
||||||
|
DocAlias.objects.create(name=a, document=doc)
|
||||||
|
other_changes = True
|
||||||
|
results.append("Created alias %s to %s" % (a, doc.name))
|
||||||
|
|
||||||
|
if has_errata:
|
||||||
|
if not doc.tags.filter(pk=tag_has_errata.pk):
|
||||||
|
changed_attributes["tags"] = list(doc.tags.all()) + [tag_has_errata]
|
||||||
|
else:
|
||||||
|
if doc.tags.filter(pk=tag_has_errata.pk):
|
||||||
|
changed_attributes["tags"] = set(doc.tags.all()) - set([tag_has_errata])
|
||||||
|
|
||||||
|
if changed_attributes or changed_states or created_relations or other_changes:
|
||||||
|
# apply changes
|
||||||
|
save_document_in_history(doc)
|
||||||
|
for k, v in changed_attributes.iteritems():
|
||||||
|
setattr(doc, k, v)
|
||||||
|
results.append("Changed %s to %s on %s" % (k, v, doc.name))
|
||||||
|
|
||||||
|
for s in changed_states:
|
||||||
|
doc.set_state(s)
|
||||||
|
results.append("Set state %s on %s" % (s, doc.name))
|
||||||
|
|
||||||
|
for o in created_relations:
|
||||||
|
o.save()
|
||||||
|
results.append("Created %s" % o)
|
||||||
|
|
||||||
|
doc.time = datetime.datetime.now()
|
||||||
|
doc.save()
|
||||||
|
|
||||||
|
return results
|
Loading…
Reference in a new issue