# Copyright (C) 2009-2010 Nokia Corporation and/or its subsidiary(-ies).
# All rights reserved. Contact: Pasi Eronen <pasi.eronen@nokia.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#
#  * Neither the name of the Nokia Corporation and/or its
#    subsidiary(-ies) nor the names of its contributors may be used
#    to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from ietf import settings
from django.core import management
management.setup_environ(settings)
from django import db

from xml.dom import pulldom, Node
import re
import urllib2
from datetime import datetime, date, timedelta
import socket
import sys

INDEX_URL = "http://www.rfc-editor.org/rfc/rfc-index.xml"
TABLE = "rfc_index_mirror"

log_data = ""
def log(line):
    global log_data
    if __name__ == '__main__' and len(sys.argv) > 1:
        print line
    else:
        log_data += line + "\n"

def parse(response):
    def getChildText(parentNode, tagName):
        for node in parentNode.childNodes:
            if node.nodeType == Node.ELEMENT_NODE and node.localName == tagName:
                return node.firstChild.data
        return None

    def getDocList(parentNode, tagName):
        l = []
        for u in parentNode.getElementsByTagName(tagName):
            for d in u.getElementsByTagName("doc-id"):
                l.append(d.firstChild.data)
        if len(l) == 0:
            return None
        else:
            return ",".join(l)

    also_list = {}
    data = []
    events = pulldom.parse(response)
    for (event, node) in events:
        if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
            events.expandNode(node)
            node.normalize()
            bcpid = getChildText(node, "doc-id")
            doclist = getDocList(node, "is-also")
            if doclist:
                for docid in doclist.split(","):
                    if docid in also_list:
                        also_list[docid].append(bcpid)
                    else:
                        also_list[docid] = [bcpid]

        elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
            events.expandNode(node)
            node.normalize()
            rfc_number = int(getChildText(node, "doc-id")[3:])
            title = getChildText(node, "title")

            l = []
            for author in node.getElementsByTagName("author"):
                l.append(getChildText(author, "name"))
            authors = "; ".join(l)

            d = node.getElementsByTagName("date")[0]
            year = int(getChildText(d, "year"))
            month = getChildText(d, "month")
            month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
            rfc_published_date = ("%d-%02d-01" % (year, month))

            current_status = getChildText(node, "current-status").title()

            updates = getDocList(node, "updates") 
            updated_by = getDocList(node, "updated-by")
            obsoletes = getDocList(node, "obsoletes") 
            obsoleted_by = getDocList(node, "obsoleted-by")
            stream = getChildText(node, "stream")
            wg = getChildText(node, "wg_acronym")
            if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
                wg = None
           
            l = []
            for format in node.getElementsByTagName("format"):
                l.append(getChildText(format, "file-format"))
            file_formats = (",".join(l)).lower()

            draft = getChildText(node, "draft")
            if draft and re.search("-\d\d$", draft):
                draft = draft[0:-3]

            if len(node.getElementsByTagName("errata-url")) > 0:
                has_errata = 1
            else:
                has_errata = 0

            data.append([rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,None,draft,has_errata,stream,wg,file_formats])

    for d in data:
        k = "RFC%04d" % d[0]
        if k in also_list:
            d[9] = ",".join(also_list[k])
    return data

def insert_to_database(data):
    log("connecting to database...")
    cursor = db.connection.cursor()
    log("removing old data...")
    cursor.execute("DELETE FROM "+TABLE)
    log("inserting new data...")
    cursor.executemany("INSERT INTO "+TABLE+" (rfc_number, title, authors, rfc_published_date, current_status,updates,updated_by,obsoletes,obsoleted_by,also,draft,has_errata,stream,wg,file_formats) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", data)
    cursor.close()
    db.connection._commit()
    db.connection.close()

def get_std_level_mapping():
    from ietf.name.models import StdLevelName
    from ietf.name.utils import name
    return {
        "Standard": name(StdLevelName, "std", "Standard"),
        "Draft Standard": name(StdLevelName, "ds", "Draft Standard"),
        "Proposed Standard": name(StdLevelName, "ps", "Proposed Standard"),
        "Informational": name(StdLevelName, "inf", "Informational"),
        "Experimental": name(StdLevelName, "exp", "Experimental"),
        "Best Current Practice": name(StdLevelName, "bcp", "Best Current Practice"),
        "Historic": name(StdLevelName, "hist", "Historic"),
        "Unknown": name(StdLevelName, "unkn", "Unknown"),
        }

def get_stream_mapping():
    from ietf.name.models import StreamName
    from ietf.name.utils import name

    return {
        "Legacy": name(StreamName, "legacy", "Legacy"),
        "IETF": name(StreamName, "ietf", "IETF"),
        "INDEPENDENT": name(StreamName, "ise", "ISE", desc="Independent submission editor stream"),
        "IAB": name(StreamName, "iab", "IAB"),
        "IRTF": name(StreamName, "irtf", "IRTF"),
    }


import django.db.transaction

@django.db.transaction.commit_on_success
def insert_to_databaseREDESIGN(data):
    from ietf.person.models import Person
    from ietf.doc.models import Document, DocAlias, DocEvent, RelatedDocument, State
    from ietf.group.models import Group
    from ietf.name.models import DocTagName, DocRelationshipName
    from ietf.name.utils import name
    
    system = Person.objects.get(name="(System)")
    std_level_mapping = get_std_level_mapping()
    stream_mapping = get_stream_mapping()
    tag_has_errata = name(DocTagName, 'errata', "Has errata")
    relationship_obsoletes = name(DocRelationshipName, "obs", "Obsoletes")
    relationship_updates = name(DocRelationshipName, "updates", "Updates")

    skip_older_than_date = (date.today() - timedelta(days=365)).strftime("%Y-%m-%d")

    log("updating data...")
    for d in data:
        rfc_number, title, authors, rfc_published_date, current_status, updates, updated_by, obsoletes, obsoleted_by, also, draft, has_errata, stream, wg, file_formats = d

        if rfc_published_date < skip_older_than_date:
            # speed up the process by skipping old entries
            continue

        # we assume two things can happen: we get a new RFC, or an
        # attribute has been updated at the RFC Editor (RFC Editor
        # attributes currently take precedence over our local
        # attributes)

        # make sure we got the document and alias
        created = False
        doc = None
        name = "rfc%s" % rfc_number
        a = DocAlias.objects.filter(name=name)
        if a:
            doc = a[0].document
        else:
            if draft:
                try:
                    doc = Document.objects.get(name=draft)
                except Document.DoesNotExist:
                    pass

            if not doc:
                created = True
                log("created document %s" % name)
                doc = Document.objects.create(name=name)

            # add alias
            DocAlias.objects.create(name=name, document=doc)
            if not created:
                created = True
                log("created alias %s to %s" % (name, doc.name))

                
        # check attributes
        changed = False
        if title != doc.title:
            doc.title = title
            changed = True

        if std_level_mapping[current_status] != doc.std_level:
            doc.std_level = std_level_mapping[current_status]
            changed = True

        if doc.get_state_slug() != "rfc":
            doc.set_state(State.objects.get(type="draft", slug="rfc"))
            changed = True

        if doc.stream != stream_mapping[stream]:
            doc.stream = stream_mapping[stream]
            changed = True

        if not doc.group and wg:
            doc.group = Group.objects.get(acronym=wg)
            changed = True

        pubdate = datetime.strptime(rfc_published_date, "%Y-%m-%d")
        if not doc.latest_event(type="published_rfc", time=pubdate):
            e = DocEvent(doc=doc, type="published_rfc")
            e.time = pubdate
            e.by = system
            e.desc = "RFC published"
            e.save()
            changed = True

        def parse_relation_list(s):
            if not s:
                return []
            res = []
            for x in s.split(","):
                if x[:3] in ("NIC", "IEN", "STD", "RTR"):
                    # try translating this to RFCs that we can handle
                    # sensibly; otherwise we'll have to ignore them
                    l = DocAlias.objects.filter(name__startswith="rfc", document__docalias__name=x.lower())
                else:
                    l = DocAlias.objects.filter(name=x.lower())

                for a in l:
                    if a not in res:
                        res.append(a)
            return res

        for x in parse_relation_list(obsoletes):
            if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_obsoletes):
                RelatedDocument.objects.create(source=doc, target=x, relationship=relationship_obsoletes)
                changed = True
        
        for x in parse_relation_list(updates):
            if not RelatedDocument.objects.filter(source=doc, target=x, relationship=relationship_updates):
                RelatedDocument.objects.create(source=doc, target=x, relationship=relationship_updates)
                changed = True
        
        if also:
            for a in also.lower().split(","):
                if not DocAlias.objects.filter(name=a):
                    DocAlias.objects.create(name=a, document=doc)
                    changed = True

        if has_errata:
            if not doc.tags.filter(pk=tag_has_errata.pk):
                doc.tags.add(tag_has_errata)
                changed = True
        else:
            if doc.tags.filter(pk=tag_has_errata.pk):
                doc.tags.remove(tag_has_errata)
                changed = True

        if changed:
            if not created:
                log("%s changed" % name)
            doc.time = datetime.now()
            doc.save()
            

if settings.USE_DB_REDESIGN_PROXY_CLASSES:
    insert_to_database = insert_to_databaseREDESIGN
    
if __name__ == '__main__':
    try:
        log("output from mirror_rfc_index.py:\n")
        log("time: "+str(datetime.now()))
        log("host: "+socket.gethostname())
        log("url: "+INDEX_URL)

        log("downloading...")
        socket.setdefaulttimeout(30)
        response = urllib2.urlopen(INDEX_URL)
        log("parsing...")
        data = parse(response)

        log("got " + str(len(data)) + " entries")
        if len(data) < 5000:
            raise Exception('not enough data')

        insert_to_database(data)

        log("all done!")
        log_data = ""

    finally:
        if len(log_data) > 0:
            print log_data