datatracker/ietf/person/utils.py
Ole Laursen ea93001c63 Remove debug output
- Legacy-Id: 12786
2017-02-03 18:54:23 +00:00

174 lines
5.8 KiB
Python
Executable file

import pprint
import re
from collections import defaultdict
from django.contrib import admin
from django.contrib.auth.models import User
from ietf.person.models import Person, AffiliationAlias, AffiliationIgnoredEnding
def merge_persons(source,target,stream):
# merge emails
for email in source.email_set.all():
print >>stream, "Merging email: {}".format(email.address)
email.person = target
email.save()
# merge aliases
target_aliases = [ a.name for a in target.alias_set.all() ]
for alias in source.alias_set.all():
if alias.name in target_aliases:
alias.delete()
else:
print >>stream,"Merging alias: {}".format(alias.name)
alias.person = target
alias.save()
# merge DocEvents
for docevent in source.docevent_set.all():
docevent.by = target
docevent.save()
# merge SubmissionEvents
for subevent in source.submissionevent_set.all():
subevent.by = target
subevent.save()
# merge Messages
for message in source.message_set.all():
message.by = target
message.save()
# merge Constraints
for constraint in source.constraint_set.all():
constraint.person = target
constraint.save()
# merge Roles
for role in source.role_set.all():
role.person = target
role.save()
# merge Nominees
for nominee in source.nominee_set.all():
target_nominee = target.nominee_set.get(nomcom=nominee.nomcom)
if not target_nominee:
target_nominee = target.nominee_set.create(nomcom=nominee.nomcom, email=target.email())
nominee.nomination_set.all().update(nominee=target_nominee)
for fb in nominee.feedback_set.all():
fb.nominees.remove(nominee)
fb.nominees.add(target_nominee)
for np in nominee.nomineeposition_set.all():
existing_target_np = target_nominee.nomineeposition_set.filter(position=np.position).first()
if existing_target_np:
if existing_target_np.state.slug=='pending':
existing_target_np.state = np.state
existing_target_np.save()
np.delete()
else:
np.nominee=target_nominee
np.save()
nominee.delete()
# check for any remaining relationships and delete if none
objs = [source]
opts = Person._meta
user = User.objects.filter(is_superuser=True).first()
admin_site = admin.site
using = 'default'
deletable_objects, model_count, perms_needed, protected = (
admin.utils.get_deleted_objects(objs, opts, user, admin_site, using) )
if len(deletable_objects) > 1:
print >>stream, "Not Deleting Person: {}({})".format(source.ascii,source.pk)
print >>stream, "Related objects remain:"
pprint.pprint(deletable_objects[1],stream=stream)
else:
print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk)
source.delete()
def compile_affiliation_ending_stripping_regexp():
parts = []
for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True):
try:
re.compile(ending_re)
except re.error:
pass
parts.append(ending_re)
re_str = ",? *({}) *$".format("|".join(parts))
return re.compile(re_str, re.IGNORECASE)
def get_aliased_affiliations(affiliations):
"""Given non-unique sequence of affiliations, returns dictionary with
aliases needed.
We employ the following strategies, interleaved:
- Stripping company endings like Inc., GmbH etc. from database
- Looking up aliases stored directly in the database, like
"Examplar International" -> "Examplar"
- Case-folding so Examplar and EXAMPLAR is merged with the
winner being the one with most occurrences (so input should not
be made unique) or most upper case letters in case of ties.
Case folding can be overridden by the aliases in the database."""
res = {}
ending_re = compile_affiliation_ending_stripping_regexp()
known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") }
affiliations_with_case_spellings = defaultdict(set)
case_spelling_count = defaultdict(int)
for affiliation in affiliations:
original_affiliation = affiliation
# check aliases from DB
alias = known_aliases.get(affiliation.lower())
if alias is not None:
affiliation = alias
res[original_affiliation] = affiliation
# strip ending
alias = ending_re.sub("", affiliation)
if alias != affiliation:
affiliation = alias
res[original_affiliation] = affiliation
# check aliases from DB
alias = known_aliases.get(affiliation.lower())
if alias is not None:
affiliation = alias
res[original_affiliation] = affiliation
affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation)
case_spelling_count[affiliation] += 1
def affiliation_sort_key(affiliation):
count = case_spelling_count[affiliation]
uppercase_letters = sum(1 for c in affiliation if c.isupper())
return (count, uppercase_letters)
# now we just need to pick the most popular uppercase/lowercase
# spelling for each affiliation with more than one
for similar_affiliations in affiliations_with_case_spellings.itervalues():
if len(similar_affiliations) > 1:
most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0]
for affiliation in similar_affiliations:
if affiliation != most_popular:
res[affiliation] = most_popular
return res