Refactor ietf/bin/merge-person-records to facilitate testing. Add tests. Fixes #2162. Commit ready for merge.

- Legacy-Id: 13567
This commit is contained in:
Ryan Cross 2017-06-08 23:28:43 +00:00
parent 77f4bf21ff
commit e9b7c5753e
5 changed files with 267 additions and 225 deletions

View file

@ -27,83 +27,12 @@ django.setup()
# ------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------
import argparse import argparse
import datetime
import pprint
import syslog
from django.contrib import admin from django.contrib import admin
from django.contrib.auth.models import User
from ietf.person.models import Person from ietf.person.models import Person
from ietf.person.utils import (merge_persons, send_merge_notification, handle_users,
determine_merge_order)
from ietf.utils.log import log from ietf.utils.log import log
from ietf.utils.mail import send_mail
def dedupe_aliaises(person):
'''
Check person for duplicate aliases and purge
'''
seen = []
for alias in person.alias_set.all():
if alias.name in seen:
alias.delete()
else:
seen.append(alias.name)
def determine_merge_order(source,target):
'''
Determine merge order. Select Person that has related User. If both have Users
select one with most recent login
'''
if source.user and not target.user:
source,target = target,source # swap merge order
if source.user and target.user:
source,target = sorted([source,target],key=lambda a: a.user.last_login if a.user.last_login else datetime.datetime.min)
return source,target
def get_extra_primary(source,target):
'''
Inspect email addresses and return list of those that should no longer be primary
'''
if source.email_set.filter(primary=True) and target.email_set.filter(primary=True):
return source.email_set.filter(primary=True)
else:
return []
def handle_users(source,target,check_only=False):
'''
Deletes extra Users. Retains target user. If check_only == True, just return a string
describing action, otherwise perform user changes and return string.
'''
if not (source.user or target.user):
return "DATATRACKER LOGIN ACTION: none (no login defined)"
if not source.user and target.user:
return "DATATRACKER LOGIN ACTION: retaining login {}".format(target.user)
if source.user and not target.user:
message = "DATATRACKER LOGIN ACTION: retaining login {}".format(source.user)
if not check_only:
target.user = source.user
target.save()
return message
if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only:
syslog.syslog('merge-person-records: deleting user {}'.format(source.user.username))
user = source.user
source.user = None
source.save()
#user.delete()
return message
def send_notification(person,changes):
'''
Send an email to the merge target (Person) notifying them of the changes
'''
send_mail(request = None,
to = person.email_address(),
frm = "IETF Secretariat <ietf-secretariat@ietf.org>",
subject = "IETF Datatracker records merged",
template = "utils/merge_person_records.txt",
context = dict(person=person,changes='\n'.join(changes)),
extra = {}
)
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -112,8 +41,6 @@ def main():
parser.add_argument('-f','--force', help='force merge order',action='store_true') parser.add_argument('-f','--force', help='force merge order',action='store_true')
parser.add_argument('-v','--verbose', help='verbose output',action='store_true') parser.add_argument('-v','--verbose', help='verbose output',action='store_true')
args = parser.parse_args() args = parser.parse_args()
changes = []
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
source = Person.objects.get(pk=args.source_id) source = Person.objects.get(pk=args.source_id)
target = Person.objects.get(pk=args.target_id) target = Person.objects.get(pk=args.target_id)
@ -129,62 +56,11 @@ def main():
if response.lower() != 'y': if response.lower() != 'y':
sys.exit() sys.exit()
# write log # perform merge
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk)) success, changes = merge_persons(source, target, verbose=args.verbose)
# handle primary emails
for email in get_extra_primary(source,target):
email.primary = False
email.save()
changes.append('EMAIL ACTION: {} no longer marked as primary'.format(email.address))
# handle users
changes.append(handle_users(source,target))
# find all related objects and migrate
related_objects = [ f for f in source._meta.get_fields()
if (f.one_to_many or f.one_to_one)
and f.auto_created and not f.concrete ]
for related_object in related_objects:
accessor = related_object.get_accessor_name()
field_name = related_object.field.name
queryset = getattr(source, accessor).all()
if args.verbose:
print "Merging {}:{}".format(accessor,queryset.count())
kwargs = { field_name:target }
queryset.update(**kwargs)
# check aliases
dedupe_aliaises(target)
# copy other attributes
for field in ('ascii','ascii_short','address','affiliation'):
if getattr(source,field) and not getattr(target,field):
setattr(target,field,getattr(source,field))
target.save()
# check for any remaining relationships and exit if more found
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects = admin.utils.get_deleted_objects(
objs, opts, user, admin_site, using)
deletable_objects_summary = deletable_objects[1]
if len(deletable_objects_summary) > 1: # should only inlcude one object (Person)
print "Not Deleting Person: {}({})".format(source.ascii,source.pk)
print "Related objects remain:"
pprint.pprint(deletable_objects[1])
sys.exit(1)
if args.verbose:
print "Deleting Person: {}({})".format(source.ascii,source.pk)
source.delete()
# send email notification # send email notification
send_notification(target,changes) send_merge_notification(target,changes)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -4,7 +4,6 @@ import datetime
import shutil import shutil
import urlparse import urlparse
from pyquery import PyQuery from pyquery import PyQuery
import StringIO
from django.db import IntegrityError from django.db import IntegrityError
from django.db.models import Max from django.db.models import Max
@ -22,8 +21,6 @@ from ietf.person.models import Email, Person
from ietf.group.models import Group from ietf.group.models import Group
from ietf.message.models import Message from ietf.message.models import Message
from ietf.person.utils import merge_persons
from ietf.nomcom.test_data import nomcom_test_data, generate_cert, check_comments, \ from ietf.nomcom.test_data import nomcom_test_data, generate_cert, check_comments, \
COMMUNITY_USER, CHAIR_USER, \ COMMUNITY_USER, CHAIR_USER, \
MEMBER_USER, SECRETARIAT_USER, EMAIL_DOMAIN, NOMCOM_YEAR MEMBER_USER, SECRETARIAT_USER, EMAIL_DOMAIN, NOMCOM_YEAR
@ -37,7 +34,7 @@ from ietf.nomcom.management.commands.send_reminders import Command, is_time_to_s
from ietf.nomcom.factories import NomComFactory, FeedbackFactory, TopicFactory, \ from ietf.nomcom.factories import NomComFactory, FeedbackFactory, TopicFactory, \
nomcom_kwargs_for_year, provide_private_key_to_test_client, \ nomcom_kwargs_for_year, provide_private_key_to_test_client, \
key key
from ietf.person.factories import PersonFactory, EmailFactory, UserFactory from ietf.person.factories import PersonFactory, EmailFactory
from ietf.dbtemplate.factories import DBTemplateFactory from ietf.dbtemplate.factories import DBTemplateFactory
from ietf.dbtemplate.models import DBTemplate from ietf.dbtemplate.models import DBTemplate
@ -1777,34 +1774,6 @@ class NoPublicKeyTests(TestCase):
# No questionnaire responses # No questionnaire responses
self.do_common_work(reverse('ietf.nomcom.views.private_questionnaire',kwargs={'year':self.nc.year()}),False) self.do_common_work(reverse('ietf.nomcom.views.private_questionnaire',kwargs={'year':self.nc.year()}),False)
class MergePersonTests(TestCase):
def setUp(self):
build_test_public_keys_dir(self)
self.nc = NomComFactory(**nomcom_kwargs_for_year())
self.author = PersonFactory.create().email_set.first().address
self.nominee1, self.nominee2 = self.nc.nominee_set.all()[:2]
self.person1, self.person2 = self.nominee1.person, self.nominee2.person
self.position = self.nc.position_set.first()
for nominee in [self.nominee1, self.nominee2]:
f = FeedbackFactory.create(author=self.author,nomcom=self.nc,type_id='nomina')
f.positions.add(self.position)
f.nominees.add(nominee)
UserFactory(is_superuser=True)
def tearDown(self):
clean_test_public_keys_dir(self)
def test_merge_person(self):
person1, person2 = [nominee.person for nominee in self.nc.nominee_set.all()[:2]]
stream = StringIO.StringIO()
self.assertEqual(self.nc.nominee_set.count(),4)
self.assertEqual(self.nominee1.feedback_set.count(),1)
self.assertEqual(self.nominee2.feedback_set.count(),1)
merge_persons(person1,person2,stream)
self.assertEqual(self.nc.nominee_set.count(),3)
self.assertEqual(self.nc.nominee_set.get(pk=self.nominee2.pk).feedback_set.count(),2)
self.assertFalse(self.nc.nominee_set.filter(pk=self.nominee1.pk).exists())
class AcceptingTests(TestCase): class AcceptingTests(TestCase):
def setUp(self): def setUp(self):

View file

@ -42,9 +42,7 @@ class PersonFactory(factory.DjangoModelFactory):
ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip())) ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip()))
class Params: class Params:
with_bio = factory.Trait( with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs()))
biography = u"\n\n".join(fake.paragraphs()),
)
@factory.post_generation @factory.post_generation
def default_aliases(obj, create, extracted, **kwargs): # pylint: disable=no-self-argument def default_aliases(obj, create, extracted, **kwargs): # pylint: disable=no-self-argument

View file

@ -1,15 +1,20 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import json import json
from pyquery import PyQuery from pyquery import PyQuery
from StringIO import StringIO
from django.urls import reverse as urlreverse from django.urls import reverse as urlreverse
import debug # pyflakes:ignore import debug # pyflakes:ignore
from ietf.person.factories import EmailFactory,PersonFactory #from ietf.nomcom.models import Nominee, NomCom
from ietf.person.models import Person #from ietf.nomcom.test_data import nomcom_test_data
from ietf.person.factories import EmailFactory, PersonFactory
from ietf.person.models import Person, Alias
from ietf.person.utils import (merge_persons, determine_merge_order, send_merge_notification,
handle_users, get_extra_primary, dedupe_aliases, move_related_objects)
from ietf.utils.test_data import make_test_data from ietf.utils.test_data import make_test_data
from ietf.utils.test_utils import TestCase from ietf.utils.test_utils import TestCase
from ietf.utils.mail import outbox, empty_outbox from ietf.utils.mail import outbox, empty_outbox
@ -78,3 +83,125 @@ class PersonTests(TestCase):
Person.objects.create(name="Duplicate Test") Person.objects.create(name="Duplicate Test")
Person.objects.create(name="Duplicate Test") Person.objects.create(name="Duplicate Test")
self.assertTrue("possible duplicate" in outbox[0]["Subject"].lower()) self.assertTrue("possible duplicate" in outbox[0]["Subject"].lower())
class PersonUtilsTests(TestCase):
def get_person_no_user(self):
person = PersonFactory()
person.user = None
person.save()
return person
def test_determine_merge_order(self):
p1 = self.get_person_no_user()
p2 = PersonFactory()
p3 = self.get_person_no_user()
p4 = PersonFactory()
# target has User
results = determine_merge_order(p1, p2)
self.assertEqual(results,(p1,p2))
# source has User
results = determine_merge_order(p2, p1)
self.assertEqual(results,(p1,p2))
# neither have User
results = determine_merge_order(p1, p3)
self.assertEqual(results,(p1,p3))
# both have User
today = datetime.datetime.today()
p2.user.last_login = today
p2.user.save()
p4.user.last_login = today - datetime.timedelta(days=30)
p4.user.save()
results = determine_merge_order(p2, p4)
self.assertEqual(results,(p4,p2))
def test_send_merge_notification(self):
person = PersonFactory()
len_before = len(outbox)
send_merge_notification(person,['Record Merged'])
self.assertEqual(len(outbox),len_before+1)
self.assertTrue('IETF Datatracker records merged' in outbox[-1]['Subject'])
def test_handle_users(self):
source1 = self.get_person_no_user()
target1 = self.get_person_no_user()
source2 = self.get_person_no_user()
target2 = PersonFactory()
source3 = PersonFactory()
target3 = self.get_person_no_user()
source4 = PersonFactory()
target4 = PersonFactory()
# no Users
result = handle_users(source1, target1)
self.assertTrue('DATATRACKER LOGIN ACTION: none' in result)
# target user
result = handle_users(source2, target2)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login {}".format(target2.user) in result)
# source user
user = source3.user
result = handle_users(source3, target3)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login {}".format(user) in result)
self.assertTrue(target3.user == user)
# both have user
source_user = source4.user
target_user = target4.user
result = handle_users(source4, target4)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target_user,source_user) in result)
self.assertTrue(target4.user == target_user)
self.assertTrue(source4.user == None)
def test_get_extra_primary(self):
source = PersonFactory()
target = PersonFactory()
extra = get_extra_primary(source, target)
self.assertTrue(extra == list(source.email_set.filter(primary=True)))
def test_dedupe_aliases(self):
person = PersonFactory()
Alias.objects.create(person=person, name='Joe')
Alias.objects.create(person=person, name='Joe')
self.assertEqual(person.alias_set.filter(name='Joe').count(),2)
dedupe_aliases(person)
self.assertEqual(person.alias_set.filter(name='Joe').count(),1)
"""
def test_merge_nominees(self):
nomcom_test_data()
nomcom = NomCom.objects.first()
source = PersonFactory()
source.nominee_set.create(nomcom=nomcom,email=source.email())
#source = Nominee.objects.first().email.person
target = PersonFactory()
print source
print source.nominee_set.all()
merge_nominees(source, target)
self.assertTrue(target.nominee_set.all())
"""
def test_move_related_objects(self):
source = PersonFactory()
target = PersonFactory()
source_email = source.email_set.first()
source_alias = source.alias_set.first()
move_related_objects(source, target, file=StringIO())
self.assertTrue(source_email in target.email_set.all())
self.assertTrue(source_alias in target.alias_set.all())
def test_merge_persons(self):
source = PersonFactory()
target = PersonFactory()
source_id = source.pk
source_email = source.email_set.first()
source_alias = source.alias_set.first()
merge_persons(source, target, file=StringIO())
self.assertTrue(source_email in target.email_set.all())
self.assertTrue(source_alias in target.alias_set.all())
self.assertFalse(Person.objects.filter(id=source_id))

View file

@ -1,54 +1,121 @@
from __future__ import unicode_literals from __future__ import unicode_literals, print_function
import datetime
import os
import pprint import pprint
import sys
import syslog
from django.contrib import admin from django.contrib import admin
from django.contrib.auth.models import User from django.contrib.auth.models import User
from ietf.person.models import Person from ietf.person.models import Person
from ietf.utils.mail import send_mail
def merge_persons(source,target,stream): def merge_persons(source, target, file=sys.stdout, verbose=False):
changes = []
# write log
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk))
# merge emails # handle primary emails
for email in source.email_set.all(): for email in get_extra_primary(source,target):
print >>stream, "Merging email: {}".format(email.address) email.primary = False
email.person = target
email.save() email.save()
changes.append('EMAIL ACTION: {} no longer marked as primary'.format(email.address))
changes.append(handle_users(source,target))
#merge_nominees(source, target)
move_related_objects(source, target, file=file, verbose=verbose)
dedupe_aliases(target)
# copy other attributes
for field in ('ascii','ascii_short','address','affiliation'):
if getattr(source,field) and not getattr(target,field):
setattr(target,field,getattr(source,field))
target.save()
# check for any remaining relationships and exit if more found
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects = admin.utils.get_deleted_objects(
objs, opts, user, admin_site, using)
deletable_objects_summary = deletable_objects[1]
if len(deletable_objects_summary) > 1: # should only inlcude one object (Person)
print("Not Deleting Person: {}({})".format(source.ascii,source.pk), file=file)
print("Related objects remain:", file=file)
pprint.pprint(deletable_objects[1], stream=file)
success = False
else:
success = True
print("Deleting Person: {}({})".format(source.ascii,source.pk), file=file)
source.delete()
# merge aliases return success, changes
target_aliases = [ a.name for a in target.alias_set.all() ]
for alias in source.alias_set.all(): def get_extra_primary(source,target):
if alias.name in target_aliases: '''
Inspect email addresses and return list of those that should no longer be primary
'''
if source.email_set.filter(primary=True) and target.email_set.filter(primary=True):
return source.email_set.filter(primary=True)
else:
return []
def handle_users(source,target,check_only=False):
'''
Deletes extra Users. Retains target user. If check_only == True, just return a string
describing action, otherwise perform user changes and return string.
'''
if not (source.user or target.user):
return "DATATRACKER LOGIN ACTION: none (no login defined)"
if not source.user and target.user:
return "DATATRACKER LOGIN ACTION: retaining login {}".format(target.user)
if source.user and not target.user:
message = "DATATRACKER LOGIN ACTION: retaining login {}".format(source.user)
if not check_only:
target.user = source.user
source.user = None
source.save()
target.save()
return message
if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only:
syslog.syslog('merge-person-records: deleting user {}'.format(source.user.username))
# user = source.user
source.user = None
source.save()
#user.delete()
return message
def move_related_objects(source, target, file, verbose=False):
'''Find all related objects and migrate'''
related_objects = [ f for f in source._meta.get_fields()
if (f.one_to_many or f.one_to_one)
and f.auto_created and not f.concrete ]
for related_object in related_objects:
accessor = related_object.get_accessor_name()
field_name = related_object.field.name
queryset = getattr(source, accessor).all()
if verbose:
print("Merging {}:{}".format(accessor,queryset.count()),file=file)
kwargs = { field_name:target }
queryset.update(**kwargs)
def dedupe_aliases(person):
'''Check person for duplicate aliases and purge'''
seen = []
for alias in person.alias_set.all():
if alias.name in seen:
alias.delete() alias.delete()
else: else:
print >>stream, "Merging alias: {}".format(alias.name) seen.append(alias.name)
alias.person = target
alias.save() def merge_nominees(source, target):
'''Move nominees and feedback to target'''
# merge DocEvents
for docevent in source.docevent_set.all():
docevent.by = target
docevent.save()
# merge SubmissionEvents
for subevent in source.submissionevent_set.all():
subevent.by = target
subevent.save()
# merge Messages
for message in source.message_set.all():
message.by = target
message.save()
# merge Constraints
for constraint in source.constraint_set.all():
constraint.person = target
constraint.save()
# merge Roles
for role in source.role_set.all():
role.person = target
role.save()
# merge Nominees
for nominee in source.nominee_set.all(): for nominee in source.nominee_set.all():
target_nominee = target.nominee_set.get(nomcom=nominee.nomcom) target_nominee = target.nominee_set.get(nomcom=nominee.nomcom)
if not target_nominee: if not target_nominee:
@ -68,22 +135,27 @@ def merge_persons(source,target,stream):
np.nominee=target_nominee np.nominee=target_nominee
np.save() np.save()
nominee.delete() nominee.delete()
# check for any remaining relationships and delete if none
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects, model_count, perms_needed, protected = ( def send_merge_notification(person,changes):
admin.utils.get_deleted_objects(objs, opts, user, admin_site, using) ) '''
Send an email to the merge target (Person) notifying them of the changes
if len(deletable_objects) > 1: '''
print >>stream, "Not Deleting Person: {}({})".format(source.ascii,source.pk) send_mail(request = None,
print >>stream, "Related objects remain:" to = person.email_address(),
pprint.pprint(deletable_objects[1],stream=stream) frm = "IETF Secretariat <ietf-secretariat@ietf.org>",
subject = "IETF Datatracker records merged",
else: template = "utils/merge_person_records.txt",
print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk) context = dict(person=person,changes='\n'.join(changes)),
source.delete() extra = {}
)
def determine_merge_order(source,target):
'''
Determine merge order. Select Person that has related User. If both have Users
select one with most recent login
'''
if source.user and not target.user:
source,target = target,source # swap merge order
if source.user and target.user:
source,target = sorted([source,target],key=lambda a: a.user.last_login if a.user.last_login else datetime.datetime.min)
return source,target