diff --git a/ietf/doc/admin.py b/ietf/doc/admin.py index c5db20e37..288c4a635 100644 --- a/ietf/doc/admin.py +++ b/ietf/doc/admin.py @@ -174,7 +174,7 @@ class BallotPositionDocEventAdmin(DocEventAdmin): admin.site.register(BallotPositionDocEvent, BallotPositionDocEventAdmin) class DocumentAuthorAdmin(admin.ModelAdmin): - list_display = ['id', 'document', 'person', 'email', 'order'] - search_fields = [ 'document__name', 'person__name', 'email__address', ] + list_display = ['id', 'document', 'person', 'email', 'affiliation', 'order'] + search_fields = [ 'document__name', 'person__name', 'email__address', 'affiliation'] admin.site.register(DocumentAuthor, DocumentAuthorAdmin) diff --git a/ietf/person/admin.py b/ietf/person/admin.py index 8c5ce62c0..563d212e3 100644 --- a/ietf/person/admin.py +++ b/ietf/person/admin.py @@ -1,7 +1,7 @@ from django.contrib import admin -from ietf.person.models import Email, Alias, Person +from ietf.person.models import Email, Alias, Person, AffiliationAlias, AffiliationIgnoredEnding from ietf.person.name import name_parts class EmailAdmin(admin.ModelAdmin): @@ -33,3 +33,13 @@ class PersonAdmin(admin.ModelAdmin): # actions = None admin.site.register(Person, PersonAdmin) +class AffiliationAliasAdmin(admin.ModelAdmin): + list_filter = ["name"] + list_display = ["alias", "name"] + search_fields = ["alias", "name"] +admin.site.register(AffiliationAlias, AffiliationAliasAdmin) + +class AffiliationIgnoredEndingAdmin(admin.ModelAdmin): + list_display = ["ending"] + search_fields = ["ending"] +admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin) diff --git a/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py b/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py new file mode 100644 index 000000000..1747fd224 --- /dev/null +++ b/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('person', '0014_auto_20160613_0751'), + ] + + operations = [ + migrations.CreateModel( + name='AffiliationAlias', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('alias', models.CharField(help_text=b'Note that aliases are matched without regarding case.', max_length=255)), + ('name', models.CharField(max_length=255)), + ], + ), + migrations.CreateModel( + name='AffiliationIgnoredEnding', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('ending', models.CharField(max_length=255)), + ], + ), + ] diff --git a/ietf/person/migrations/0016_auto_20170203_1030.py b/ietf/person/migrations/0016_auto_20170203_1030.py new file mode 100644 index 000000000..d5f4fd950 --- /dev/null +++ b/ietf/person/migrations/0016_auto_20170203_1030.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations + +def add_affiliation_info(apps, schema_editor): + AffiliationAlias = apps.get_model("person", "AffiliationAlias") + + AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems") + + AffiliationIgnoredEnding = apps.get_model("person", "AffiliationIgnoredEnding") + AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?") + + +class Migration(migrations.Migration): + + dependencies = [ + ('person', '0015_affiliationalias_affiliationignoredending'), + ] + + operations = [ + migrations.RunPython(add_affiliation_info, migrations.RunPython.noop) + ] diff --git a/ietf/person/models.py b/ietf/person/models.py index 61fa6b2c0..9b2392d4c 100644 --- a/ietf/person/models.py +++ b/ietf/person/models.py @@ -241,3 +241,26 @@ class Email(models.Model): return return self.address + +class AffiliationAlias(models.Model): + """Records that alias should be treated as name for statistical + purposes.""" + + alias = models.CharField(max_length=255, help_text="Note that aliases are matched without regarding case.") + name = models.CharField(max_length=255) + + def __unicode__(self): + return u"{} -> {}".format(self.alias, self.name) + + def save(self, *args, **kwargs): + self.alias = self.alias.lower() + super(AffiliationAlias, self).save(*args, **kwargs) + +class AffiliationIgnoredEnding(models.Model): + """Records that ending should be stripped from the affiliation for statistical purposes.""" + + ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!") + + def __unicode__(self): + return self.ending + diff --git a/ietf/person/utils.py b/ietf/person/utils.py index 55e7a6929..0dcf08ee6 100755 --- a/ietf/person/utils.py +++ b/ietf/person/utils.py @@ -1,8 +1,10 @@ -import pprint +import pprint +import re +from collections import defaultdict from django.contrib import admin from django.contrib.auth.models import User -from ietf.person.models import Person +from ietf.person.models import Person, AffiliationAlias, AffiliationIgnoredEnding def merge_persons(source,target,stream): @@ -86,3 +88,88 @@ def merge_persons(source,target,stream): else: print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk) source.delete() + + +def compile_affiliation_ending_stripping_regexp(): + parts = [] + for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True): + try: + re.compile(ending_re) + except re.error: + pass + + parts.append(ending_re) + + re_str = ",? *({}) *$".format("|".join(parts)) + + return re.compile(re_str, re.IGNORECASE) + + +def get_aliased_affiliations(affiliations): + """Given non-unique sequence of affiliations, returns dictionary with + aliases needed. + + We employ the following strategies, interleaved: + + - Stripping company endings like Inc., GmbH etc. from database + + - Looking up aliases stored directly in the database, like + "Examplar International" -> "Examplar" + + - Case-folding so Examplar and EXAMPLAR is merged with the + winner being the one with most occurrences (so input should not + be made unique) or most upper case letters in case of ties. + Case folding can be overridden by the aliases in the database.""" + + res = {} + + ending_re = compile_affiliation_ending_stripping_regexp() + + known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") } + + affiliations_with_case_spellings = defaultdict(set) + case_spelling_count = defaultdict(int) + for affiliation in affiliations: + original_affiliation = affiliation + + # check aliases from DB + alias = known_aliases.get(affiliation.lower()) + if alias is not None: + affiliation = alias + res[original_affiliation] = affiliation + + # strip ending + alias = ending_re.sub("", affiliation) + if alias != affiliation: + affiliation = alias + res[original_affiliation] = affiliation + + # check aliases from DB + alias = known_aliases.get(affiliation.lower()) + if alias is not None: + affiliation = alias + res[original_affiliation] = affiliation + + affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation) + case_spelling_count[affiliation] += 1 + + def affiliation_sort_key(affiliation): + count = case_spelling_count[affiliation] + uppercase_letters = sum(1 for c in affiliation if c.isupper()) + return (count, uppercase_letters) + + # now we just need to pick the most popular uppercase/lowercase + # spelling for each affiliation with more than one + for similar_affiliations in affiliations_with_case_spellings.itervalues(): + if len(similar_affiliations) > 1: + most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0] + print similar_affiliations, most_popular + for affiliation in similar_affiliations: + if affiliation != most_popular: + res[affiliation] = most_popular + print affiliation, "->", most_popular + + return res + + + diff --git a/ietf/static/ietf/css/ietf.css b/ietf/static/ietf/css/ietf.css index 76b391d58..f095e9eb7 100644 --- a/ietf/static/ietf/css/ietf.css +++ b/ietf/static/ietf/css/ietf.css @@ -568,7 +568,7 @@ table.simple-table td:last-child { width: 7em; } -.popover .docname { +.document-stats .popover .element { padding-left: 1em; text-indent: -1em; } diff --git a/ietf/static/ietf/js/document-stats.js b/ietf/static/ietf/js/document-stats.js index fdfbfa36b..25dfcc785 100644 --- a/ietf/static/ietf/js/document-stats.js +++ b/ietf/static/ietf/js/document-stats.js @@ -30,10 +30,10 @@ $(document).ready(function () { if (stdNameRegExp.test(element)) displayName = element.slice(0, 3).toUpperCase() + " " + element.slice(3); - html.push('
'); + html.push(''); } else { - html.push('Affiliation | +Percentage of authors | +Authors | +
---|---|---|
{{ affiliation|default:"(unknown)" }} | +{{ percentage|floatformat:2 }}% | +{% include "stats/includes/number_with_details_cell.html" %} | +