Refactor ietf/bin/merge-person-records to facilitate testing. Add tests. Fixes #2162. Commit ready for merge.

- Legacy-Id: 13567
This commit is contained in:
Ryan Cross 2017-06-08 23:28:43 +00:00
parent 77f4bf21ff
commit e9b7c5753e
5 changed files with 267 additions and 225 deletions

View file

@ -27,83 +27,12 @@ django.setup()
# -------------------------------------------------------------------------------------
import argparse
import datetime
import pprint
import syslog
from django.contrib import admin
from django.contrib.auth.models import User
from ietf.person.models import Person
from ietf.person.utils import (merge_persons, send_merge_notification, handle_users,
determine_merge_order)
from ietf.utils.log import log
from ietf.utils.mail import send_mail
def dedupe_aliaises(person):
'''
Check person for duplicate aliases and purge
'''
seen = []
for alias in person.alias_set.all():
if alias.name in seen:
alias.delete()
else:
seen.append(alias.name)
def determine_merge_order(source,target):
'''
Determine merge order. Select Person that has related User. If both have Users
select one with most recent login
'''
if source.user and not target.user:
source,target = target,source # swap merge order
if source.user and target.user:
source,target = sorted([source,target],key=lambda a: a.user.last_login if a.user.last_login else datetime.datetime.min)
return source,target
def get_extra_primary(source,target):
'''
Inspect email addresses and return list of those that should no longer be primary
'''
if source.email_set.filter(primary=True) and target.email_set.filter(primary=True):
return source.email_set.filter(primary=True)
else:
return []
def handle_users(source,target,check_only=False):
'''
Deletes extra Users. Retains target user. If check_only == True, just return a string
describing action, otherwise perform user changes and return string.
'''
if not (source.user or target.user):
return "DATATRACKER LOGIN ACTION: none (no login defined)"
if not source.user and target.user:
return "DATATRACKER LOGIN ACTION: retaining login {}".format(target.user)
if source.user and not target.user:
message = "DATATRACKER LOGIN ACTION: retaining login {}".format(source.user)
if not check_only:
target.user = source.user
target.save()
return message
if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only:
syslog.syslog('merge-person-records: deleting user {}'.format(source.user.username))
user = source.user
source.user = None
source.save()
#user.delete()
return message
def send_notification(person,changes):
'''
Send an email to the merge target (Person) notifying them of the changes
'''
send_mail(request = None,
to = person.email_address(),
frm = "IETF Secretariat <ietf-secretariat@ietf.org>",
subject = "IETF Datatracker records merged",
template = "utils/merge_person_records.txt",
context = dict(person=person,changes='\n'.join(changes)),
extra = {}
)
def main():
parser = argparse.ArgumentParser()
@ -112,8 +41,6 @@ def main():
parser.add_argument('-f','--force', help='force merge order',action='store_true')
parser.add_argument('-v','--verbose', help='verbose output',action='store_true')
args = parser.parse_args()
changes = []
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
source = Person.objects.get(pk=args.source_id)
target = Person.objects.get(pk=args.target_id)
@ -129,62 +56,11 @@ def main():
if response.lower() != 'y':
sys.exit()
# write log
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk))
# handle primary emails
for email in get_extra_primary(source,target):
email.primary = False
email.save()
changes.append('EMAIL ACTION: {} no longer marked as primary'.format(email.address))
# handle users
changes.append(handle_users(source,target))
# find all related objects and migrate
related_objects = [ f for f in source._meta.get_fields()
if (f.one_to_many or f.one_to_one)
and f.auto_created and not f.concrete ]
for related_object in related_objects:
accessor = related_object.get_accessor_name()
field_name = related_object.field.name
queryset = getattr(source, accessor).all()
if args.verbose:
print "Merging {}:{}".format(accessor,queryset.count())
kwargs = { field_name:target }
queryset.update(**kwargs)
# check aliases
dedupe_aliaises(target)
# copy other attributes
for field in ('ascii','ascii_short','address','affiliation'):
if getattr(source,field) and not getattr(target,field):
setattr(target,field,getattr(source,field))
target.save()
# check for any remaining relationships and exit if more found
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects = admin.utils.get_deleted_objects(
objs, opts, user, admin_site, using)
deletable_objects_summary = deletable_objects[1]
if len(deletable_objects_summary) > 1: # should only inlcude one object (Person)
print "Not Deleting Person: {}({})".format(source.ascii,source.pk)
print "Related objects remain:"
pprint.pprint(deletable_objects[1])
sys.exit(1)
if args.verbose:
print "Deleting Person: {}({})".format(source.ascii,source.pk)
source.delete()
# perform merge
success, changes = merge_persons(source, target, verbose=args.verbose)
# send email notification
send_notification(target,changes)
send_merge_notification(target,changes)
if __name__ == "__main__":
main()

View file

@ -4,7 +4,6 @@ import datetime
import shutil
import urlparse
from pyquery import PyQuery
import StringIO
from django.db import IntegrityError
from django.db.models import Max
@ -22,8 +21,6 @@ from ietf.person.models import Email, Person
from ietf.group.models import Group
from ietf.message.models import Message
from ietf.person.utils import merge_persons
from ietf.nomcom.test_data import nomcom_test_data, generate_cert, check_comments, \
COMMUNITY_USER, CHAIR_USER, \
MEMBER_USER, SECRETARIAT_USER, EMAIL_DOMAIN, NOMCOM_YEAR
@ -37,7 +34,7 @@ from ietf.nomcom.management.commands.send_reminders import Command, is_time_to_s
from ietf.nomcom.factories import NomComFactory, FeedbackFactory, TopicFactory, \
nomcom_kwargs_for_year, provide_private_key_to_test_client, \
key
from ietf.person.factories import PersonFactory, EmailFactory, UserFactory
from ietf.person.factories import PersonFactory, EmailFactory
from ietf.dbtemplate.factories import DBTemplateFactory
from ietf.dbtemplate.models import DBTemplate
@ -1777,34 +1774,6 @@ class NoPublicKeyTests(TestCase):
# No questionnaire responses
self.do_common_work(reverse('ietf.nomcom.views.private_questionnaire',kwargs={'year':self.nc.year()}),False)
class MergePersonTests(TestCase):
def setUp(self):
build_test_public_keys_dir(self)
self.nc = NomComFactory(**nomcom_kwargs_for_year())
self.author = PersonFactory.create().email_set.first().address
self.nominee1, self.nominee2 = self.nc.nominee_set.all()[:2]
self.person1, self.person2 = self.nominee1.person, self.nominee2.person
self.position = self.nc.position_set.first()
for nominee in [self.nominee1, self.nominee2]:
f = FeedbackFactory.create(author=self.author,nomcom=self.nc,type_id='nomina')
f.positions.add(self.position)
f.nominees.add(nominee)
UserFactory(is_superuser=True)
def tearDown(self):
clean_test_public_keys_dir(self)
def test_merge_person(self):
person1, person2 = [nominee.person for nominee in self.nc.nominee_set.all()[:2]]
stream = StringIO.StringIO()
self.assertEqual(self.nc.nominee_set.count(),4)
self.assertEqual(self.nominee1.feedback_set.count(),1)
self.assertEqual(self.nominee2.feedback_set.count(),1)
merge_persons(person1,person2,stream)
self.assertEqual(self.nc.nominee_set.count(),3)
self.assertEqual(self.nc.nominee_set.get(pk=self.nominee2.pk).feedback_set.count(),2)
self.assertFalse(self.nc.nominee_set.filter(pk=self.nominee1.pk).exists())
class AcceptingTests(TestCase):
def setUp(self):

View file

@ -42,9 +42,7 @@ class PersonFactory(factory.DjangoModelFactory):
ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip()))
class Params:
with_bio = factory.Trait(
biography = u"\n\n".join(fake.paragraphs()),
)
with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs()))
@factory.post_generation
def default_aliases(obj, create, extracted, **kwargs): # pylint: disable=no-self-argument

View file

@ -1,15 +1,20 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import datetime
import json
from pyquery import PyQuery
from StringIO import StringIO
from django.urls import reverse as urlreverse
import debug # pyflakes:ignore
from ietf.person.factories import EmailFactory,PersonFactory
from ietf.person.models import Person
#from ietf.nomcom.models import Nominee, NomCom
#from ietf.nomcom.test_data import nomcom_test_data
from ietf.person.factories import EmailFactory, PersonFactory
from ietf.person.models import Person, Alias
from ietf.person.utils import (merge_persons, determine_merge_order, send_merge_notification,
handle_users, get_extra_primary, dedupe_aliases, move_related_objects)
from ietf.utils.test_data import make_test_data
from ietf.utils.test_utils import TestCase
from ietf.utils.mail import outbox, empty_outbox
@ -78,3 +83,125 @@ class PersonTests(TestCase):
Person.objects.create(name="Duplicate Test")
Person.objects.create(name="Duplicate Test")
self.assertTrue("possible duplicate" in outbox[0]["Subject"].lower())
class PersonUtilsTests(TestCase):
def get_person_no_user(self):
person = PersonFactory()
person.user = None
person.save()
return person
def test_determine_merge_order(self):
p1 = self.get_person_no_user()
p2 = PersonFactory()
p3 = self.get_person_no_user()
p4 = PersonFactory()
# target has User
results = determine_merge_order(p1, p2)
self.assertEqual(results,(p1,p2))
# source has User
results = determine_merge_order(p2, p1)
self.assertEqual(results,(p1,p2))
# neither have User
results = determine_merge_order(p1, p3)
self.assertEqual(results,(p1,p3))
# both have User
today = datetime.datetime.today()
p2.user.last_login = today
p2.user.save()
p4.user.last_login = today - datetime.timedelta(days=30)
p4.user.save()
results = determine_merge_order(p2, p4)
self.assertEqual(results,(p4,p2))
def test_send_merge_notification(self):
person = PersonFactory()
len_before = len(outbox)
send_merge_notification(person,['Record Merged'])
self.assertEqual(len(outbox),len_before+1)
self.assertTrue('IETF Datatracker records merged' in outbox[-1]['Subject'])
def test_handle_users(self):
source1 = self.get_person_no_user()
target1 = self.get_person_no_user()
source2 = self.get_person_no_user()
target2 = PersonFactory()
source3 = PersonFactory()
target3 = self.get_person_no_user()
source4 = PersonFactory()
target4 = PersonFactory()
# no Users
result = handle_users(source1, target1)
self.assertTrue('DATATRACKER LOGIN ACTION: none' in result)
# target user
result = handle_users(source2, target2)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login {}".format(target2.user) in result)
# source user
user = source3.user
result = handle_users(source3, target3)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login {}".format(user) in result)
self.assertTrue(target3.user == user)
# both have user
source_user = source4.user
target_user = target4.user
result = handle_users(source4, target4)
self.assertTrue("DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target_user,source_user) in result)
self.assertTrue(target4.user == target_user)
self.assertTrue(source4.user == None)
def test_get_extra_primary(self):
source = PersonFactory()
target = PersonFactory()
extra = get_extra_primary(source, target)
self.assertTrue(extra == list(source.email_set.filter(primary=True)))
def test_dedupe_aliases(self):
person = PersonFactory()
Alias.objects.create(person=person, name='Joe')
Alias.objects.create(person=person, name='Joe')
self.assertEqual(person.alias_set.filter(name='Joe').count(),2)
dedupe_aliases(person)
self.assertEqual(person.alias_set.filter(name='Joe').count(),1)
"""
def test_merge_nominees(self):
nomcom_test_data()
nomcom = NomCom.objects.first()
source = PersonFactory()
source.nominee_set.create(nomcom=nomcom,email=source.email())
#source = Nominee.objects.first().email.person
target = PersonFactory()
print source
print source.nominee_set.all()
merge_nominees(source, target)
self.assertTrue(target.nominee_set.all())
"""
def test_move_related_objects(self):
source = PersonFactory()
target = PersonFactory()
source_email = source.email_set.first()
source_alias = source.alias_set.first()
move_related_objects(source, target, file=StringIO())
self.assertTrue(source_email in target.email_set.all())
self.assertTrue(source_alias in target.alias_set.all())
def test_merge_persons(self):
source = PersonFactory()
target = PersonFactory()
source_id = source.pk
source_email = source.email_set.first()
source_alias = source.alias_set.first()
merge_persons(source, target, file=StringIO())
self.assertTrue(source_email in target.email_set.all())
self.assertTrue(source_alias in target.alias_set.all())
self.assertFalse(Person.objects.filter(id=source_id))

View file

@ -1,54 +1,121 @@
from __future__ import unicode_literals
from __future__ import unicode_literals, print_function
import datetime
import os
import pprint
import sys
import syslog
from django.contrib import admin
from django.contrib.auth.models import User
from ietf.person.models import Person
from ietf.utils.mail import send_mail
def merge_persons(source,target,stream):
def merge_persons(source, target, file=sys.stdout, verbose=False):
changes = []
# merge emails
for email in source.email_set.all():
print >>stream, "Merging email: {}".format(email.address)
email.person = target
# write log
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk))
# handle primary emails
for email in get_extra_primary(source,target):
email.primary = False
email.save()
changes.append('EMAIL ACTION: {} no longer marked as primary'.format(email.address))
# merge aliases
target_aliases = [ a.name for a in target.alias_set.all() ]
for alias in source.alias_set.all():
if alias.name in target_aliases:
changes.append(handle_users(source,target))
#merge_nominees(source, target)
move_related_objects(source, target, file=file, verbose=verbose)
dedupe_aliases(target)
# copy other attributes
for field in ('ascii','ascii_short','address','affiliation'):
if getattr(source,field) and not getattr(target,field):
setattr(target,field,getattr(source,field))
target.save()
# check for any remaining relationships and exit if more found
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects = admin.utils.get_deleted_objects(
objs, opts, user, admin_site, using)
deletable_objects_summary = deletable_objects[1]
if len(deletable_objects_summary) > 1: # should only inlcude one object (Person)
print("Not Deleting Person: {}({})".format(source.ascii,source.pk), file=file)
print("Related objects remain:", file=file)
pprint.pprint(deletable_objects[1], stream=file)
success = False
else:
success = True
print("Deleting Person: {}({})".format(source.ascii,source.pk), file=file)
source.delete()
return success, changes
def get_extra_primary(source,target):
'''
Inspect email addresses and return list of those that should no longer be primary
'''
if source.email_set.filter(primary=True) and target.email_set.filter(primary=True):
return source.email_set.filter(primary=True)
else:
return []
def handle_users(source,target,check_only=False):
'''
Deletes extra Users. Retains target user. If check_only == True, just return a string
describing action, otherwise perform user changes and return string.
'''
if not (source.user or target.user):
return "DATATRACKER LOGIN ACTION: none (no login defined)"
if not source.user and target.user:
return "DATATRACKER LOGIN ACTION: retaining login {}".format(target.user)
if source.user and not target.user:
message = "DATATRACKER LOGIN ACTION: retaining login {}".format(source.user)
if not check_only:
target.user = source.user
source.user = None
source.save()
target.save()
return message
if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only:
syslog.syslog('merge-person-records: deleting user {}'.format(source.user.username))
# user = source.user
source.user = None
source.save()
#user.delete()
return message
def move_related_objects(source, target, file, verbose=False):
'''Find all related objects and migrate'''
related_objects = [ f for f in source._meta.get_fields()
if (f.one_to_many or f.one_to_one)
and f.auto_created and not f.concrete ]
for related_object in related_objects:
accessor = related_object.get_accessor_name()
field_name = related_object.field.name
queryset = getattr(source, accessor).all()
if verbose:
print("Merging {}:{}".format(accessor,queryset.count()),file=file)
kwargs = { field_name:target }
queryset.update(**kwargs)
def dedupe_aliases(person):
'''Check person for duplicate aliases and purge'''
seen = []
for alias in person.alias_set.all():
if alias.name in seen:
alias.delete()
else:
print >>stream, "Merging alias: {}".format(alias.name)
alias.person = target
alias.save()
seen.append(alias.name)
# merge DocEvents
for docevent in source.docevent_set.all():
docevent.by = target
docevent.save()
# merge SubmissionEvents
for subevent in source.submissionevent_set.all():
subevent.by = target
subevent.save()
# merge Messages
for message in source.message_set.all():
message.by = target
message.save()
# merge Constraints
for constraint in source.constraint_set.all():
constraint.person = target
constraint.save()
# merge Roles
for role in source.role_set.all():
role.person = target
role.save()
# merge Nominees
def merge_nominees(source, target):
'''Move nominees and feedback to target'''
for nominee in source.nominee_set.all():
target_nominee = target.nominee_set.get(nomcom=nominee.nomcom)
if not target_nominee:
@ -69,21 +136,26 @@ def merge_persons(source,target,stream):
np.save()
nominee.delete()
# check for any remaining relationships and delete if none
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
def send_merge_notification(person,changes):
'''
Send an email to the merge target (Person) notifying them of the changes
'''
send_mail(request = None,
to = person.email_address(),
frm = "IETF Secretariat <ietf-secretariat@ietf.org>",
subject = "IETF Datatracker records merged",
template = "utils/merge_person_records.txt",
context = dict(person=person,changes='\n'.join(changes)),
extra = {}
)
deletable_objects, model_count, perms_needed, protected = (
admin.utils.get_deleted_objects(objs, opts, user, admin_site, using) )
if len(deletable_objects) > 1:
print >>stream, "Not Deleting Person: {}({})".format(source.ascii,source.pk)
print >>stream, "Related objects remain:"
pprint.pprint(deletable_objects[1],stream=stream)
else:
print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk)
source.delete()
def determine_merge_order(source,target):
'''
Determine merge order. Select Person that has related User. If both have Users
select one with most recent login
'''
if source.user and not target.user:
source,target = target,source # swap merge order
if source.user and target.user:
source,target = sorted([source,target],key=lambda a: a.user.last_login if a.user.last_login else datetime.datetime.min)
return source,target