From 3a7d0d6255dddd40447d1cfd262f6a58371f9230 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 18 Sep 2017 20:07:48 +0000 Subject: [PATCH] script to process the id-archive and add Document objects for drafts that are currently missing from the datatracker. Fixes #1316. Commit ready for merge. - Legacy-Id: 14138 --- bin/add-old-drafts-from-archive.py | 149 +++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 bin/add-old-drafts-from-archive.py diff --git a/bin/add-old-drafts-from-archive.py b/bin/add-old-drafts-from-archive.py new file mode 100755 index 000000000..195501a26 --- /dev/null +++ b/bin/add-old-drafts-from-archive.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python + +import datetime +import os +import sys +from pathlib2 import Path +from contextlib import closing + +os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings" + +import django +django.setup() + +from django.conf import settings +from django.core.validators import validate_email, ValidationError +from ietf.utils.draft import Draft +from ietf.submit.utils import update_authors + +import debug # pyflakes:ignore + +from ietf.doc.models import Document, NewRevisionDocEvent, DocEvent, State +from ietf.person.models import Person + +system = Person.objects.get(name="(System)") +expired = State.objects.get(type='draft',slug='expired') + +names = set() +print 'collecting draft names ...' +versions = 0 +for p in Path(settings.INTERNET_DRAFT_PATH).glob('draft*.txt'): + n = str(p).split('/')[-1].split('-') + if n[-1][:2].isdigit(): + name = '-'.join(n[:-1]) + if '--' in name or '.txt' in name or '[' in name or '=' in name or '&' in name: + continue + if name.startswith('draft-draft-'): + continue + if name == 'draft-ietf-trade-iotp-v1_0-dsig': + continue + if len(n[-1]) != 6: + continue + if name.startswith('draft-mlee-'): + continue + names.add('-'.join(n[:-1])) + +count=0 +print 'iterating through names ...' +for name in sorted(names): + if not Document.objects.filter(name=name).exists(): + paths = list(Path(settings.INTERNET_DRAFT_PATH).glob('%s-??.txt'%name)) + paths.sort() + doc = None + for p in paths: + n = str(p).split('/')[-1].split('-') + rev = n[-1][:2] + with open(str(p)) as txt_file: + raw = txt_file.read() + try: + text = raw.decode('utf8') + except UnicodeDecodeError: + text = raw.decode('latin1') + try: + draft = Draft(text, txt_file.name, name_from_source=True) + except Exception as e: + print name, rev, "Can't parse", p,":",e + continue + if draft.errors and draft.errors.keys()!=['draftname',]: + print "Errors - could not process", name, rev, datetime.datetime.fromtimestamp(p.stat().st_mtime), draft.errors, draft.get_title().encode('utf8') + else: + time = datetime.datetime.fromtimestamp(p.stat().st_mtime) + if not doc: + doc = Document.objects.create(name=name, + time=time, + type_id='draft', + title=draft.get_title(), + abstract=draft.get_abstract(), + rev = rev, + pages=draft.get_pagecount(), + words=draft.get_wordcount(), + expires=time+datetime.timedelta(settings.INTERNET_DRAFT_DAYS_TO_EXPIRE), + ) + doc.docalias_set.create(name=doc.name) + doc.states.add(expired) + # update authors + authors = [] + for author in draft.get_author_list(): + full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author + + author_name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip() + + if email: + try: + validate_email(email) + except ValidationError: + email = "" + + def turn_into_unicode(s): + if s is None: + return u"" + + if isinstance(s, unicode): + return s + else: + try: + return s.decode("utf-8") + except UnicodeDecodeError: + try: + return s.decode("latin-1") + except UnicodeDecodeError: + return "" + + author_name = turn_into_unicode(author_name) + email = turn_into_unicode(email) + company = turn_into_unicode(company) + + authors.append({ + "name": author_name, + "email": email, + "affiliation": company, + "country": country + }) + dummysubmission=type('', (), {})() #https://stackoverflow.com/questions/19476816/creating-an-empty-object-in-python + dummysubmission.authors = authors + update_authors(doc,dummysubmission) + + # add a docevent with words explaining where this came from + events = [] + e = NewRevisionDocEvent.objects.create( + type="new_revision", + doc=doc, + rev=rev, + by=system, + desc="New version available: %s-%s.txt" % (doc.name, doc.rev), + time=time, + ) + events.append(e) + e = DocEvent.objects.create( + type="comment", + doc = doc, + rev = rev, + by = system, + desc = "Revision added from id-archive on %s by %s"%(datetime.date.today(),sys.argv[0]), + time=time, + ) + events.append(e) + doc.time = time + doc.rev = rev + doc.save_with_history(events) + print "Added",name, rev