Add date limiting parameter to importer to only grab documents with

last_modified_date within date, restrict authors to those documents
and similar for meeting sessions
 - Legacy-Id: 3881
This commit is contained in:
Ole Laursen 2012-01-31 15:05:05 +00:00
parent 987f91ccd3
commit 74caa2ed8b
3 changed files with 30 additions and 7 deletions

View file

@ -27,9 +27,12 @@ from ietf.wgchairs.models import ProtoWriteUp
from workflows.models import State as StateOld
document_name_to_import = None
import_docs_from = document_name_to_import = None
if len(sys.argv) > 1:
document_name_to_import = sys.argv[1]
try:
import_docs_from = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d")
except:
document_name_to_import = sys.argv[1]
dont_save_queries()
@ -225,6 +228,8 @@ def iesg_login_is_secretary(l):
# Amy has two users, for some reason, we sometimes get the wrong one
return l.user_level == IESGLogin.SECRETARIAT_LEVEL or (l.first_name == "Amy" and l.last_name == "Vezza")
old_internetdraft_content_type_id = ContentType.objects.using("legacy").get(app_label="idtracker", model="internetdraft").pk
# regexps for parsing document comments
date_re_str = "(?P<year>[0-9][0-9][0-9][0-9])-(?P<month>[0-9][0-9]?)-(?P<day>[0-9][0-9]?)"
@ -765,15 +770,15 @@ def import_from_idinternal(d, idinternal):
all_drafts = InternetDraft.objects.all().order_by('pk').select_related()
if import_docs_from:
all_drafts = all_drafts.filter(last_modified_date__gte=import_docs_from)
if document_name_to_import:
if document_name_to_import.startswith("rfc"):
all_drafts = all_drafts.filter(rfc_number=document_name_to_import[3:])
else:
all_drafts = all_drafts.filter(filename=document_name_to_import)
#all_drafts = all_drafts[all_drafts.count() - 1000:]
#all_drafts = all_drafts.none()
old_internetdraft_content_type_id = ContentType.objects.using("legacy").get(app_label="idtracker", model="internetdraft").pk
for index, o in enumerate(all_drafts.iterator()):
print "importing", o.id_document_tag, o.filename, index, "ballot %s" % o.idinternal.ballot_id if o.idinternal and o.idinternal.ballot_id else ""

View file

@ -24,6 +24,11 @@ from redesign.importing.utils import old_person_to_person, dont_save_queries
from ietf.name.models import *
from ietf.name.utils import name
import_meetings_from = None
if len(sys.argv) > 1:
import_meetings_from = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d")
dont_save_queries()
# imports Meeting, MeetingVenue, MeetingRoom, NonSession,
@ -294,7 +299,11 @@ def import_materials(wg_meeting_session, session):
obviously_bogus_date = datetime.date(1970, 1, 1)
for o in WgMeetingSession.objects.all().order_by("pk").iterator():
all_sessions = WgMeetingSession.objects.all().order_by("pk")
if import_meetings_from:
all_sessions = all_sessions.filter(last_modified_date__gte=import_meetings_from)
for o in all_sessions.iterator():
# num_session is unfortunately not quite reliable, seems to be
# right for 1 or 2 but not 3 and it's sometimes null
sessions = o.num_session or 1

View file

@ -28,6 +28,11 @@ from redesign.importing.utils import *
# should probably import
# PersonOrOrgInfo/PostalAddress/EmailAddress/PhoneNumber fully
import_docs_from = None
if len(sys.argv) > 1:
import_docs_from = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d")
# make sure special system user/email is created
print "creating (System) person and email"
try:
@ -174,7 +179,11 @@ for o in PersonOrOrgInfo.objects.filter(wgproceedingsactivities__id__gte=1).orde
email = get_or_create_email(o, create_fake=True)
# IDAuthor persons
for o in IDAuthor.objects.all().order_by('id').select_related('person').iterator():
all_authors = IDAuthor.objects.all().order_by('id').select_related('person')
if import_docs_from:
all_authors = all_authors.filter(document__last_modified_date__gte=import_docs_from)
for o in all_authors.iterator():
print "importing IDAuthor", o.id, o.person_id, o.person.first_name.encode('utf-8'), o.person.last_name.encode('utf-8')
email = get_or_create_email(o, create_fake=True)