From b2ff10b0f2fb28570e062723fa8b2caca420aac0 Mon Sep 17 00:00:00 2001 From: Ole Laursen <olau@iola.dk> Date: Wed, 15 Feb 2017 18:43:57 +0000 Subject: [PATCH] Add support for extracting the country line from the author addresses to the draft parser (incorporating patch from trunk), store the extracted country instead of trying to turn it into an ISO country code, add country and continent name models and add initial data for those, add helper function for cleaning the countries, add author country and continent charts, move the affiliation models to stats/models.py, fix a bunch of bugs. - Legacy-Id: 12846 --- ietf/doc/admin.py | 5 +- .../doc/migrations/0020_auto_20170112_0753.py | 5 +- ietf/doc/models.py | 4 +- ietf/name/admin.py | 17 +- .../0019_continentname_countryname.py | 44 +++ .../0020_add_country_continent_names.py | 275 ++++++++++++++++++ ietf/name/models.py | 6 + ietf/name/resources.py | 37 ++- ietf/person/admin.py | 13 +- ...filiationalias_affiliationignoredending.py | 29 -- .../migrations/0016_auto_20170203_1030.py | 29 -- ietf/person/models.py | 23 -- ietf/person/resources.py | 29 +- ietf/person/utils.py | 87 +----- ietf/secr/drafts/forms.py | 4 +- ietf/settings.py | 2 +- ietf/stats/admin.py | 22 ++ ietf/stats/backfill_data.py | 30 +- ietf/stats/migrations/0001_initial.py | 37 +++ .../migrations/0002_add_initial_aliases.py | 87 ++++++ ietf/stats/migrations/__init__.py | 0 ietf/stats/models.py | 41 +++ ietf/stats/resources.py | 52 ++++ ietf/stats/tests.py | 3 +- ietf/stats/utils.py | 198 +++++++++++++ ietf/stats/views.py | 94 +++++- ietf/submit/forms.py | 15 +- ietf/submit/views.py | 4 +- .../document_stats_author_affiliation.html | 41 +++ .../document_stats_author_continent.html | 65 +++++ .../stats/document_stats_author_country.html | 124 ++++++++ ietf/templates/submit/submission_status.html | 6 +- ietf/utils/draft.py | 73 +++-- ietf/utils/templatetags/country.py | 14 - 34 files changed, 1234 insertions(+), 281 deletions(-) create mode 100644 ietf/name/migrations/0019_continentname_countryname.py create mode 100644 ietf/name/migrations/0020_add_country_continent_names.py delete mode 100644 ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py delete mode 100644 ietf/person/migrations/0016_auto_20170203_1030.py create mode 100644 ietf/stats/admin.py create mode 100644 ietf/stats/migrations/0001_initial.py create mode 100644 ietf/stats/migrations/0002_add_initial_aliases.py create mode 100644 ietf/stats/migrations/__init__.py create mode 100644 ietf/stats/models.py create mode 100644 ietf/stats/resources.py create mode 100644 ietf/stats/utils.py create mode 100644 ietf/templates/stats/document_stats_author_continent.html create mode 100644 ietf/templates/stats/document_stats_author_country.html delete mode 100644 ietf/utils/templatetags/country.py diff --git a/ietf/doc/admin.py b/ietf/doc/admin.py index 288c4a635..63109f45f 100644 --- a/ietf/doc/admin.py +++ b/ietf/doc/admin.py @@ -174,7 +174,8 @@ class BallotPositionDocEventAdmin(DocEventAdmin): admin.site.register(BallotPositionDocEvent, BallotPositionDocEventAdmin) class DocumentAuthorAdmin(admin.ModelAdmin): - list_display = ['id', 'document', 'person', 'email', 'affiliation', 'order'] - search_fields = [ 'document__name', 'person__name', 'email__address', 'affiliation'] + list_display = ['id', 'document', 'person', 'email', 'affiliation', 'country', 'order'] + search_fields = ['document__docalias__name', 'person__name', 'email__address', 'affiliation', 'country'] + raw_id_fields = ["document", "person", "email"] admin.site.register(DocumentAuthor, DocumentAuthorAdmin) diff --git a/ietf/doc/migrations/0020_auto_20170112_0753.py b/ietf/doc/migrations/0020_auto_20170112_0753.py index 9404b8aab..7335533b3 100644 --- a/ietf/doc/migrations/0020_auto_20170112_0753.py +++ b/ietf/doc/migrations/0020_auto_20170112_0753.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from django.db import migrations, models -import django_countries.fields class Migration(migrations.Migration): @@ -49,7 +48,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='dochistoryauthor', name='country', - field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2), + field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255), ), migrations.RenameField( model_name='dochistoryauthor', @@ -74,7 +73,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='documentauthor', name='country', - field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2), + field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255), ), migrations.RenameField( model_name='documentauthor', diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 3592b4578..b7c3371a5 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -11,8 +11,6 @@ from django.contrib.contenttypes.models import ContentType from django.conf import settings from django.utils.html import mark_safe -from django_countries.fields import CountryField - import debug # pyflakes:ignore from ietf.group.models import Group @@ -406,7 +404,7 @@ class DocumentAuthorInfo(models.Model): # email should only be null for some historic documents email = models.ForeignKey(Email, help_text="Email address used by author for submission", blank=True, null=True) affiliation = models.CharField(max_length=100, blank=True, help_text="Organization/company used by author for submission") - country = CountryField(blank=True, help_text="Country used by author for submission") + country = models.CharField(max_length=255, blank=True, help_text="Country used by author for submission") order = models.IntegerField(default=1) def formatted_email(self): diff --git a/ietf/name/admin.py b/ietf/name/admin.py index 642d36aff..c4d2caa35 100644 --- a/ietf/name/admin.py +++ b/ietf/name/admin.py @@ -1,7 +1,8 @@ from django.contrib import admin from ietf.name.models import ( - BallotPositionName, ConstraintName, DBTemplateTypeName, DocRelationshipName, + BallotPositionName, ConstraintName, ContinentName, CountryName, + DBTemplateTypeName, DocRelationshipName, DocReminderTypeName, DocTagName, DocTypeName, DraftSubmissionStateName, FeedbackTypeName, FormalLanguageName, GroupMilestoneStateName, GroupStateName, GroupTypeName, IntendedStdLevelName, IprDisclosureStateName, IprEventTypeName, IprLicenseTypeName, @@ -10,8 +11,11 @@ from ietf.name.models import ( ReviewRequestStateName, ReviewResultName, ReviewTypeName, RoleName, RoomResourceName, SessionStatusName, StdLevelName, StreamName, TimeSlotTypeName, ) +from ietf.stats.models import CountryAlias + class NameAdmin(admin.ModelAdmin): list_display = ["slug", "name", "desc", "used"] + search_fields = ["slug", "name"] prepopulate_from = { "slug": ("name",) } class DocRelationshipNameAdmin(NameAdmin): @@ -26,8 +30,19 @@ class GroupTypeNameAdmin(NameAdmin): list_display = ["slug", "name", "verbose_name", "desc", "used"] admin.site.register(GroupTypeName, GroupTypeNameAdmin) +class CountryAliasInline(admin.TabularInline): + model = CountryAlias + extra = 1 + +class CountryNameAdmin(NameAdmin): + list_display = ["slug", "name", "continent", "in_eu"] + list_filter = ["continent", "in_eu"] + inlines = [CountryAliasInline] +admin.site.register(CountryName, CountryNameAdmin) + admin.site.register(BallotPositionName, NameAdmin) admin.site.register(ConstraintName, NameAdmin) +admin.site.register(ContinentName, NameAdmin) admin.site.register(DBTemplateTypeName, NameAdmin) admin.site.register(DocReminderTypeName, NameAdmin) admin.site.register(DocTagName, NameAdmin) diff --git a/ietf/name/migrations/0019_continentname_countryname.py b/ietf/name/migrations/0019_continentname_countryname.py new file mode 100644 index 000000000..3239276e6 --- /dev/null +++ b/ietf/name/migrations/0019_continentname_countryname.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('name', '0018_add_formlang_names'), + ] + + operations = [ + migrations.CreateModel( + name='ContinentName', + fields=[ + ('slug', models.CharField(max_length=32, serialize=False, primary_key=True)), + ('name', models.CharField(max_length=255)), + ('desc', models.TextField(blank=True)), + ('used', models.BooleanField(default=True)), + ('order', models.IntegerField(default=0)), + ], + options={ + 'ordering': ['order', 'name'], + 'abstract': False, + }, + ), + migrations.CreateModel( + name='CountryName', + fields=[ + ('slug', models.CharField(max_length=32, serialize=False, primary_key=True)), + ('name', models.CharField(max_length=255)), + ('desc', models.TextField(blank=True)), + ('used', models.BooleanField(default=True)), + ('order', models.IntegerField(default=0)), + ('in_eu', models.BooleanField(default=False, verbose_name='In EU')), + ('continent', models.ForeignKey(to='name.ContinentName')), + ], + options={ + 'ordering': ['order', 'name'], + 'abstract': False, + }, + ), + ] diff --git a/ietf/name/migrations/0020_add_country_continent_names.py b/ietf/name/migrations/0020_add_country_continent_names.py new file mode 100644 index 000000000..5adc748b7 --- /dev/null +++ b/ietf/name/migrations/0020_add_country_continent_names.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations + +def insert_initial_country_continent_names(apps, schema_editor): + ContinentName = apps.get_model("name", "ContinentName") + africa, _ = ContinentName.objects.get_or_create(slug="africa", name="Africa") + antarctica, _ = ContinentName.objects.get_or_create(slug="antarctica", name="Antarctica") + asia, _ = ContinentName.objects.get_or_create(slug="asia", name="Asia") + europe, _ = ContinentName.objects.get_or_create(slug="europe", name="Europe") + north_america, _ = ContinentName.objects.get_or_create(slug="north-america", name="North America") + oceania, _ = ContinentName.objects.get_or_create(slug="oceania", name="Oceania") + south_america, _ = ContinentName.objects.get_or_create(slug="south-america", name="South America") + + CountryName = apps.get_model("name", "CountryName") + CountryName.objects.get_or_create(slug="AD", name=u"Andorra", continent=europe) + CountryName.objects.get_or_create(slug="AE", name=u"United Arab Emirates", continent=asia) + CountryName.objects.get_or_create(slug="AF", name=u"Afghanistan", continent=asia) + CountryName.objects.get_or_create(slug="AG", name=u"Antigua and Barbuda", continent=north_america) + CountryName.objects.get_or_create(slug="AI", name=u"Anguilla", continent=north_america) + CountryName.objects.get_or_create(slug="AL", name=u"Albania", continent=europe) + CountryName.objects.get_or_create(slug="AM", name=u"Armenia", continent=asia) + CountryName.objects.get_or_create(slug="AO", name=u"Angola", continent=africa) + CountryName.objects.get_or_create(slug="AQ", name=u"Antarctica", continent=antarctica) + CountryName.objects.get_or_create(slug="AR", name=u"Argentina", continent=south_america) + CountryName.objects.get_or_create(slug="AS", name=u"American Samoa", continent=oceania) + CountryName.objects.get_or_create(slug="AT", name=u"Austria", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="AU", name=u"Australia", continent=oceania) + CountryName.objects.get_or_create(slug="AW", name=u"Aruba", continent=north_america) + CountryName.objects.get_or_create(slug="AX", name=u"Åland Islands", continent=europe) + CountryName.objects.get_or_create(slug="AZ", name=u"Azerbaijan", continent=asia) + CountryName.objects.get_or_create(slug="BA", name=u"Bosnia and Herzegovina", continent=europe) + CountryName.objects.get_or_create(slug="BB", name=u"Barbados", continent=north_america) + CountryName.objects.get_or_create(slug="BD", name=u"Bangladesh", continent=asia) + CountryName.objects.get_or_create(slug="BE", name=u"Belgium", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="BF", name=u"Burkina Faso", continent=africa) + CountryName.objects.get_or_create(slug="BG", name=u"Bulgaria", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="BH", name=u"Bahrain", continent=asia) + CountryName.objects.get_or_create(slug="BI", name=u"Burundi", continent=africa) + CountryName.objects.get_or_create(slug="BJ", name=u"Benin", continent=africa) + CountryName.objects.get_or_create(slug="BL", name=u"Saint Barthélemy", continent=north_america) + CountryName.objects.get_or_create(slug="BM", name=u"Bermuda", continent=north_america) + CountryName.objects.get_or_create(slug="BN", name=u"Brunei", continent=asia) + CountryName.objects.get_or_create(slug="BO", name=u"Bolivia", continent=south_america) + CountryName.objects.get_or_create(slug="BQ", name=u"Bonaire, Sint Eustatius and Saba", continent=north_america) + CountryName.objects.get_or_create(slug="BR", name=u"Brazil", continent=south_america) + CountryName.objects.get_or_create(slug="BS", name=u"Bahamas", continent=north_america) + CountryName.objects.get_or_create(slug="BT", name=u"Bhutan", continent=asia) + CountryName.objects.get_or_create(slug="BV", name=u"Bouvet Island", continent=antarctica) + CountryName.objects.get_or_create(slug="BW", name=u"Botswana", continent=africa) + CountryName.objects.get_or_create(slug="BY", name=u"Belarus", continent=europe) + CountryName.objects.get_or_create(slug="BZ", name=u"Belize", continent=north_america) + CountryName.objects.get_or_create(slug="CA", name=u"Canada", continent=north_america) + CountryName.objects.get_or_create(slug="CC", name=u"Cocos (Keeling) Islands", continent=asia) + CountryName.objects.get_or_create(slug="CD", name=u"Congo (the Democratic Republic of the)", continent=africa) + CountryName.objects.get_or_create(slug="CF", name=u"Central African Republic", continent=africa) + CountryName.objects.get_or_create(slug="CG", name=u"Congo", continent=africa) + CountryName.objects.get_or_create(slug="CH", name=u"Switzerland", continent=europe) + CountryName.objects.get_or_create(slug="CI", name=u"Côte d'Ivoire", continent=africa) + CountryName.objects.get_or_create(slug="CK", name=u"Cook Islands", continent=oceania) + CountryName.objects.get_or_create(slug="CL", name=u"Chile", continent=south_america) + CountryName.objects.get_or_create(slug="CM", name=u"Cameroon", continent=africa) + CountryName.objects.get_or_create(slug="CN", name=u"China", continent=asia) + CountryName.objects.get_or_create(slug="CO", name=u"Colombia", continent=south_america) + CountryName.objects.get_or_create(slug="CR", name=u"Costa Rica", continent=north_america) + CountryName.objects.get_or_create(slug="CU", name=u"Cuba", continent=north_america) + CountryName.objects.get_or_create(slug="CV", name=u"Cabo Verde", continent=africa) + CountryName.objects.get_or_create(slug="CW", name=u"Curaçao", continent=north_america) + CountryName.objects.get_or_create(slug="CX", name=u"Christmas Island", continent=asia) + CountryName.objects.get_or_create(slug="CY", name=u"Cyprus", continent=asia, in_eu=True) + CountryName.objects.get_or_create(slug="CZ", name=u"Czech Republic", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="DE", name=u"Germany", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="DJ", name=u"Djibouti", continent=africa) + CountryName.objects.get_or_create(slug="DK", name=u"Denmark", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="DM", name=u"Dominica", continent=north_america) + CountryName.objects.get_or_create(slug="DO", name=u"Dominican Republic", continent=north_america) + CountryName.objects.get_or_create(slug="DZ", name=u"Algeria", continent=africa) + CountryName.objects.get_or_create(slug="EC", name=u"Ecuador", continent=south_america) + CountryName.objects.get_or_create(slug="EE", name=u"Estonia", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="EG", name=u"Egypt", continent=africa) + CountryName.objects.get_or_create(slug="EH", name=u"Western Sahara", continent=africa) + CountryName.objects.get_or_create(slug="ER", name=u"Eritrea", continent=africa) + CountryName.objects.get_or_create(slug="ES", name=u"Spain", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="ET", name=u"Ethiopia", continent=africa) + CountryName.objects.get_or_create(slug="FI", name=u"Finland", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="FJ", name=u"Fiji", continent=oceania) + CountryName.objects.get_or_create(slug="FK", name=u"Falkland Islands [Malvinas]", continent=south_america) + CountryName.objects.get_or_create(slug="FM", name=u"Micronesia (Federated States of)", continent=oceania) + CountryName.objects.get_or_create(slug="FO", name=u"Faroe Islands", continent=europe) + CountryName.objects.get_or_create(slug="FR", name=u"France", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="GA", name=u"Gabon", continent=africa) + CountryName.objects.get_or_create(slug="GB", name=u"United Kingdom", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="GD", name=u"Grenada", continent=north_america) + CountryName.objects.get_or_create(slug="GE", name=u"Georgia", continent=asia) + CountryName.objects.get_or_create(slug="GF", name=u"French Guiana", continent=south_america) + CountryName.objects.get_or_create(slug="GG", name=u"Guernsey", continent=europe) + CountryName.objects.get_or_create(slug="GH", name=u"Ghana", continent=africa) + CountryName.objects.get_or_create(slug="GI", name=u"Gibraltar", continent=europe) + CountryName.objects.get_or_create(slug="GL", name=u"Greenland", continent=north_america) + CountryName.objects.get_or_create(slug="GM", name=u"Gambia", continent=africa) + CountryName.objects.get_or_create(slug="GN", name=u"Guinea", continent=africa) + CountryName.objects.get_or_create(slug="GP", name=u"Guadeloupe", continent=north_america) + CountryName.objects.get_or_create(slug="GQ", name=u"Equatorial Guinea", continent=africa) + CountryName.objects.get_or_create(slug="GR", name=u"Greece", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="GS", name=u"South Georgia and the South Sandwich Islands", continent=antarctica) + CountryName.objects.get_or_create(slug="GT", name=u"Guatemala", continent=north_america) + CountryName.objects.get_or_create(slug="GU", name=u"Guam", continent=oceania) + CountryName.objects.get_or_create(slug="GW", name=u"Guinea-Bissau", continent=africa) + CountryName.objects.get_or_create(slug="GY", name=u"Guyana", continent=south_america) + CountryName.objects.get_or_create(slug="HK", name=u"Hong Kong", continent=asia) + CountryName.objects.get_or_create(slug="HM", name=u"Heard Island and McDonald Islands", continent=antarctica) + CountryName.objects.get_or_create(slug="HN", name=u"Honduras", continent=north_america) + CountryName.objects.get_or_create(slug="HR", name=u"Croatia", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="HT", name=u"Haiti", continent=north_america) + CountryName.objects.get_or_create(slug="HU", name=u"Hungary", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="ID", name=u"Indonesia", continent=asia) + CountryName.objects.get_or_create(slug="IE", name=u"Ireland", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="IL", name=u"Israel", continent=asia) + CountryName.objects.get_or_create(slug="IM", name=u"Isle of Man", continent=europe) + CountryName.objects.get_or_create(slug="IN", name=u"India", continent=asia) + CountryName.objects.get_or_create(slug="IO", name=u"British Indian Ocean Territory", continent=asia) + CountryName.objects.get_or_create(slug="IQ", name=u"Iraq", continent=asia) + CountryName.objects.get_or_create(slug="IR", name=u"Iran", continent=asia) + CountryName.objects.get_or_create(slug="IS", name=u"Iceland", continent=europe) + CountryName.objects.get_or_create(slug="IT", name=u"Italy", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="JE", name=u"Jersey", continent=europe) + CountryName.objects.get_or_create(slug="JM", name=u"Jamaica", continent=north_america) + CountryName.objects.get_or_create(slug="JO", name=u"Jordan", continent=asia) + CountryName.objects.get_or_create(slug="JP", name=u"Japan", continent=asia) + CountryName.objects.get_or_create(slug="KE", name=u"Kenya", continent=africa) + CountryName.objects.get_or_create(slug="KG", name=u"Kyrgyzstan", continent=asia) + CountryName.objects.get_or_create(slug="KH", name=u"Cambodia", continent=asia) + CountryName.objects.get_or_create(slug="KI", name=u"Kiribati", continent=oceania) + CountryName.objects.get_or_create(slug="KM", name=u"Comoros", continent=africa) + CountryName.objects.get_or_create(slug="KN", name=u"Saint Kitts and Nevis", continent=north_america) + CountryName.objects.get_or_create(slug="KP", name=u"North Korea", continent=asia) + CountryName.objects.get_or_create(slug="KR", name=u"South Korea", continent=asia) + CountryName.objects.get_or_create(slug="KW", name=u"Kuwait", continent=asia) + CountryName.objects.get_or_create(slug="KY", name=u"Cayman Islands", continent=north_america) + CountryName.objects.get_or_create(slug="KZ", name=u"Kazakhstan", continent=asia) + CountryName.objects.get_or_create(slug="LA", name=u"Laos", continent=asia) + CountryName.objects.get_or_create(slug="LB", name=u"Lebanon", continent=asia) + CountryName.objects.get_or_create(slug="LC", name=u"Saint Lucia", continent=north_america) + CountryName.objects.get_or_create(slug="LI", name=u"Liechtenstein", continent=europe) + CountryName.objects.get_or_create(slug="LK", name=u"Sri Lanka", continent=asia) + CountryName.objects.get_or_create(slug="LR", name=u"Liberia", continent=africa) + CountryName.objects.get_or_create(slug="LS", name=u"Lesotho", continent=africa) + CountryName.objects.get_or_create(slug="LT", name=u"Lithuania", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="LU", name=u"Luxembourg", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="LV", name=u"Latvia", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="LY", name=u"Libya", continent=africa) + CountryName.objects.get_or_create(slug="MA", name=u"Morocco", continent=africa) + CountryName.objects.get_or_create(slug="MC", name=u"Monaco", continent=europe) + CountryName.objects.get_or_create(slug="MD", name=u"Moldova", continent=europe) + CountryName.objects.get_or_create(slug="ME", name=u"Montenegro", continent=europe) + CountryName.objects.get_or_create(slug="MF", name=u"Saint Martin (French part)", continent=north_america) + CountryName.objects.get_or_create(slug="MG", name=u"Madagascar", continent=africa) + CountryName.objects.get_or_create(slug="MH", name=u"Marshall Islands", continent=oceania) + CountryName.objects.get_or_create(slug="MK", name=u"Macedonia", continent=europe) + CountryName.objects.get_or_create(slug="ML", name=u"Mali", continent=africa) + CountryName.objects.get_or_create(slug="MM", name=u"Myanmar", continent=asia) + CountryName.objects.get_or_create(slug="MN", name=u"Mongolia", continent=asia) + CountryName.objects.get_or_create(slug="MO", name=u"Macao", continent=asia) + CountryName.objects.get_or_create(slug="MP", name=u"Northern Mariana Islands", continent=oceania) + CountryName.objects.get_or_create(slug="MQ", name=u"Martinique", continent=north_america) + CountryName.objects.get_or_create(slug="MR", name=u"Mauritania", continent=africa) + CountryName.objects.get_or_create(slug="MS", name=u"Montserrat", continent=north_america) + CountryName.objects.get_or_create(slug="MT", name=u"Malta", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="MU", name=u"Mauritius", continent=africa) + CountryName.objects.get_or_create(slug="MV", name=u"Maldives", continent=asia) + CountryName.objects.get_or_create(slug="MW", name=u"Malawi", continent=africa) + CountryName.objects.get_or_create(slug="MX", name=u"Mexico", continent=north_america) + CountryName.objects.get_or_create(slug="MY", name=u"Malaysia", continent=asia) + CountryName.objects.get_or_create(slug="MZ", name=u"Mozambique", continent=africa) + CountryName.objects.get_or_create(slug="NA", name=u"Namibia", continent=africa) + CountryName.objects.get_or_create(slug="NC", name=u"New Caledonia", continent=oceania) + CountryName.objects.get_or_create(slug="NE", name=u"Niger", continent=africa) + CountryName.objects.get_or_create(slug="NF", name=u"Norfolk Island", continent=oceania) + CountryName.objects.get_or_create(slug="NG", name=u"Nigeria", continent=africa) + CountryName.objects.get_or_create(slug="NI", name=u"Nicaragua", continent=north_america) + CountryName.objects.get_or_create(slug="NL", name=u"Netherlands", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="NO", name=u"Norway", continent=europe) + CountryName.objects.get_or_create(slug="NP", name=u"Nepal", continent=asia) + CountryName.objects.get_or_create(slug="NR", name=u"Nauru", continent=oceania) + CountryName.objects.get_or_create(slug="NU", name=u"Niue", continent=oceania) + CountryName.objects.get_or_create(slug="NZ", name=u"New Zealand", continent=oceania) + CountryName.objects.get_or_create(slug="OM", name=u"Oman", continent=asia) + CountryName.objects.get_or_create(slug="PA", name=u"Panama", continent=north_america) + CountryName.objects.get_or_create(slug="PE", name=u"Peru", continent=south_america) + CountryName.objects.get_or_create(slug="PF", name=u"French Polynesia", continent=oceania) + CountryName.objects.get_or_create(slug="PG", name=u"Papua New Guinea", continent=oceania) + CountryName.objects.get_or_create(slug="PH", name=u"Philippines", continent=asia) + CountryName.objects.get_or_create(slug="PK", name=u"Pakistan", continent=asia) + CountryName.objects.get_or_create(slug="PL", name=u"Poland", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="PM", name=u"Saint Pierre and Miquelon", continent=north_america) + CountryName.objects.get_or_create(slug="PN", name=u"Pitcairn", continent=oceania) + CountryName.objects.get_or_create(slug="PR", name=u"Puerto Rico", continent=north_america) + CountryName.objects.get_or_create(slug="PS", name=u"Palestine, State of", continent=asia) + CountryName.objects.get_or_create(slug="PT", name=u"Portugal", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="PW", name=u"Palau", continent=oceania) + CountryName.objects.get_or_create(slug="PY", name=u"Paraguay", continent=south_america) + CountryName.objects.get_or_create(slug="QA", name=u"Qatar", continent=asia) + CountryName.objects.get_or_create(slug="RE", name=u"Réunion", continent=africa) + CountryName.objects.get_or_create(slug="RO", name=u"Romania", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="RS", name=u"Serbia", continent=europe) + CountryName.objects.get_or_create(slug="RU", name=u"Russia", continent=europe) + CountryName.objects.get_or_create(slug="RW", name=u"Rwanda", continent=africa) + CountryName.objects.get_or_create(slug="SA", name=u"Saudi Arabia", continent=asia) + CountryName.objects.get_or_create(slug="SB", name=u"Solomon Islands", continent=oceania) + CountryName.objects.get_or_create(slug="SC", name=u"Seychelles", continent=africa) + CountryName.objects.get_or_create(slug="SD", name=u"Sudan", continent=africa) + CountryName.objects.get_or_create(slug="SE", name=u"Sweden", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="SG", name=u"Singapore", continent=asia) + CountryName.objects.get_or_create(slug="SH", name=u"Saint Helena, Ascension and Tristan da Cunha", continent=africa) + CountryName.objects.get_or_create(slug="SI", name=u"Slovenia", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="SJ", name=u"Svalbard and Jan Mayen", continent=europe) + CountryName.objects.get_or_create(slug="SK", name=u"Slovakia", continent=europe, in_eu=True) + CountryName.objects.get_or_create(slug="SL", name=u"Sierra Leone", continent=africa) + CountryName.objects.get_or_create(slug="SM", name=u"San Marino", continent=europe) + CountryName.objects.get_or_create(slug="SN", name=u"Senegal", continent=africa) + CountryName.objects.get_or_create(slug="SO", name=u"Somalia", continent=africa) + CountryName.objects.get_or_create(slug="SR", name=u"Suriname", continent=south_america) + CountryName.objects.get_or_create(slug="SS", name=u"South Sudan", continent=africa) + CountryName.objects.get_or_create(slug="ST", name=u"Sao Tome and Principe", continent=africa) + CountryName.objects.get_or_create(slug="SV", name=u"El Salvador", continent=north_america) + CountryName.objects.get_or_create(slug="SX", name=u"Sint Maarten (Dutch part)", continent=north_america) + CountryName.objects.get_or_create(slug="SY", name=u"Syria", continent=asia) + CountryName.objects.get_or_create(slug="SZ", name=u"Swaziland", continent=africa) + CountryName.objects.get_or_create(slug="TC", name=u"Turks and Caicos Islands", continent=north_america) + CountryName.objects.get_or_create(slug="TD", name=u"Chad", continent=africa) + CountryName.objects.get_or_create(slug="TF", name=u"French Southern Territories", continent=antarctica) + CountryName.objects.get_or_create(slug="TG", name=u"Togo", continent=africa) + CountryName.objects.get_or_create(slug="TH", name=u"Thailand", continent=asia) + CountryName.objects.get_or_create(slug="TJ", name=u"Tajikistan", continent=asia) + CountryName.objects.get_or_create(slug="TK", name=u"Tokelau", continent=oceania) + CountryName.objects.get_or_create(slug="TL", name=u"Timor-Leste", continent=asia) + CountryName.objects.get_or_create(slug="TM", name=u"Turkmenistan", continent=asia) + CountryName.objects.get_or_create(slug="TN", name=u"Tunisia", continent=africa) + CountryName.objects.get_or_create(slug="TO", name=u"Tonga", continent=oceania) + CountryName.objects.get_or_create(slug="TR", name=u"Turkey", continent=europe) + CountryName.objects.get_or_create(slug="TT", name=u"Trinidad and Tobago", continent=north_america) + CountryName.objects.get_or_create(slug="TV", name=u"Tuvalu", continent=oceania) + CountryName.objects.get_or_create(slug="TW", name=u"Taiwan", continent=asia) + CountryName.objects.get_or_create(slug="TZ", name=u"Tanzania", continent=africa) + CountryName.objects.get_or_create(slug="UA", name=u"Ukraine", continent=europe) + CountryName.objects.get_or_create(slug="UG", name=u"Uganda", continent=africa) + CountryName.objects.get_or_create(slug="UM", name=u"United States Minor Outlying Islands", continent=oceania) + CountryName.objects.get_or_create(slug="US", name=u"United States of America", continent=north_america) + CountryName.objects.get_or_create(slug="UY", name=u"Uruguay", continent=south_america) + CountryName.objects.get_or_create(slug="UZ", name=u"Uzbekistan", continent=asia) + CountryName.objects.get_or_create(slug="VA", name=u"Holy See", continent=europe) + CountryName.objects.get_or_create(slug="VC", name=u"Saint Vincent and the Grenadines", continent=north_america) + CountryName.objects.get_or_create(slug="VE", name=u"Venezuela", continent=south_america) + CountryName.objects.get_or_create(slug="VG", name=u"Virgin Islands (British)", continent=north_america) + CountryName.objects.get_or_create(slug="VI", name=u"Virgin Islands (U.S.)", continent=north_america) + CountryName.objects.get_or_create(slug="VN", name=u"Vietnam", continent=asia) + CountryName.objects.get_or_create(slug="VU", name=u"Vanuatu", continent=oceania) + CountryName.objects.get_or_create(slug="WF", name=u"Wallis and Futuna", continent=oceania) + CountryName.objects.get_or_create(slug="WS", name=u"Samoa", continent=oceania) + CountryName.objects.get_or_create(slug="YE", name=u"Yemen", continent=asia) + CountryName.objects.get_or_create(slug="YT", name=u"Mayotte", continent=africa) + CountryName.objects.get_or_create(slug="ZA", name=u"South Africa", continent=africa) + CountryName.objects.get_or_create(slug="ZM", name=u"Zambia", continent=africa) + CountryName.objects.get_or_create(slug="ZW", name=u"Zimbabwe", continent=africa) + +class Migration(migrations.Migration): + + dependencies = [ + ('name', '0019_continentname_countryname'), + ] + + operations = [ + migrations.RunPython(insert_initial_country_continent_names, migrations.RunPython.noop) + ] diff --git a/ietf/name/models.py b/ietf/name/models.py index d208a867b..15161f331 100644 --- a/ietf/name/models.py +++ b/ietf/name/models.py @@ -99,4 +99,10 @@ class ReviewResultName(NameModel): """Almost ready, Has issues, Has nits, Not Ready, On the right track, Ready, Ready with issues, Ready with nits, Serious Issues""" +class ContinentName(NameModel): + "Africa, Antarctica, Asia, ..." +class CountryName(NameModel): + "Afghanistan, Aaland Islands, Albania, ..." + continent = models.ForeignKey(ContinentName) + in_eu = models.BooleanField(verbose_name="In EU", default=False) diff --git a/ietf/name/resources.py b/ietf/name/resources.py index f6a74387d..287963395 100644 --- a/ietf/name/resources.py +++ b/ietf/name/resources.py @@ -15,7 +15,7 @@ from ietf.name.models import (TimeSlotTypeName, GroupStateName, DocTagName, Inte LiaisonStatementTagName, FeedbackTypeName, LiaisonStatementState, StreamName, BallotPositionName, DBTemplateTypeName, NomineePositionStateName, ReviewRequestStateName, ReviewTypeName, ReviewResultName, - FormalLanguageName) + FormalLanguageName, ContinentName, CountryName) class TimeSlotTypeNameResource(ModelResource): @@ -474,3 +474,38 @@ class FormalLanguageNameResource(ModelResource): } api.name.register(FormalLanguageNameResource()) + + +class ContinentNameResource(ModelResource): + class Meta: + queryset = ContinentName.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'continentname' + filtering = { + "slug": ALL, + "name": ALL, + "desc": ALL, + "used": ALL, + "order": ALL, + } +api.name.register(ContinentNameResource()) + +class CountryNameResource(ModelResource): + continent = ToOneField(ContinentNameResource, 'continent') + class Meta: + queryset = CountryName.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'countryname' + filtering = { + "slug": ALL, + "name": ALL, + "desc": ALL, + "used": ALL, + "order": ALL, + "in_eu": ALL, + "continent": ALL_WITH_RELATIONS, + } +api.name.register(CountryNameResource()) + diff --git a/ietf/person/admin.py b/ietf/person/admin.py index 563d212e3..e51427afa 100644 --- a/ietf/person/admin.py +++ b/ietf/person/admin.py @@ -1,7 +1,7 @@ from django.contrib import admin -from ietf.person.models import Email, Alias, Person, AffiliationAlias, AffiliationIgnoredEnding +from ietf.person.models import Email, Alias, Person from ietf.person.name import name_parts class EmailAdmin(admin.ModelAdmin): @@ -32,14 +32,3 @@ class PersonAdmin(admin.ModelAdmin): inlines = [ EmailInline, AliasInline, ] # actions = None admin.site.register(Person, PersonAdmin) - -class AffiliationAliasAdmin(admin.ModelAdmin): - list_filter = ["name"] - list_display = ["alias", "name"] - search_fields = ["alias", "name"] -admin.site.register(AffiliationAlias, AffiliationAliasAdmin) - -class AffiliationIgnoredEndingAdmin(admin.ModelAdmin): - list_display = ["ending"] - search_fields = ["ending"] -admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin) diff --git a/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py b/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py deleted file mode 100644 index 1747fd224..000000000 --- a/ietf/person/migrations/0015_affiliationalias_affiliationignoredending.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('person', '0014_auto_20160613_0751'), - ] - - operations = [ - migrations.CreateModel( - name='AffiliationAlias', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('alias', models.CharField(help_text=b'Note that aliases are matched without regarding case.', max_length=255)), - ('name', models.CharField(max_length=255)), - ], - ), - migrations.CreateModel( - name='AffiliationIgnoredEnding', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('ending', models.CharField(max_length=255)), - ], - ), - ] diff --git a/ietf/person/migrations/0016_auto_20170203_1030.py b/ietf/person/migrations/0016_auto_20170203_1030.py deleted file mode 100644 index d5f4fd950..000000000 --- a/ietf/person/migrations/0016_auto_20170203_1030.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - -from django.db import migrations - -def add_affiliation_info(apps, schema_editor): - AffiliationAlias = apps.get_model("person", "AffiliationAlias") - - AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems") - AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems") - AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems") - AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems") - - AffiliationIgnoredEnding = apps.get_model("person", "AffiliationIgnoredEnding") - AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?") - AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?") - AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?") - AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?") - - -class Migration(migrations.Migration): - - dependencies = [ - ('person', '0015_affiliationalias_affiliationignoredending'), - ] - - operations = [ - migrations.RunPython(add_affiliation_info, migrations.RunPython.noop) - ] diff --git a/ietf/person/models.py b/ietf/person/models.py index 9b2392d4c..61fa6b2c0 100644 --- a/ietf/person/models.py +++ b/ietf/person/models.py @@ -241,26 +241,3 @@ class Email(models.Model): return return self.address - -class AffiliationAlias(models.Model): - """Records that alias should be treated as name for statistical - purposes.""" - - alias = models.CharField(max_length=255, help_text="Note that aliases are matched without regarding case.") - name = models.CharField(max_length=255) - - def __unicode__(self): - return u"{} -> {}".format(self.alias, self.name) - - def save(self, *args, **kwargs): - self.alias = self.alias.lower() - super(AffiliationAlias, self).save(*args, **kwargs) - -class AffiliationIgnoredEnding(models.Model): - """Records that ending should be stripped from the affiliation for statistical purposes.""" - - ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!") - - def __unicode__(self): - return self.ending - diff --git a/ietf/person/resources.py b/ietf/person/resources.py index 4b6fc3cda..ff7351cc8 100644 --- a/ietf/person/resources.py +++ b/ietf/person/resources.py @@ -6,8 +6,7 @@ from tastypie.cache import SimpleCache from ietf import api -from ietf.person.models import (Person, Email, Alias, PersonHistory, - AffiliationAlias, AffiliationIgnoredEnding) +from ietf.person.models import (Person, Email, Alias, PersonHistory) from ietf.utils.resources import UserResource @@ -82,29 +81,3 @@ class PersonHistoryResource(ModelResource): "user": ALL_WITH_RELATIONS, } api.person.register(PersonHistoryResource()) - -class AffiliationIgnoredEndingResource(ModelResource): - class Meta: - queryset = AffiliationIgnoredEnding.objects.all() - serializer = api.Serializer() - cache = SimpleCache() - #resource_name = 'affiliationignoredending' - filtering = { - "id": ALL, - "ending": ALL, - } -api.person.register(AffiliationIgnoredEndingResource()) - -class AffiliationAliasResource(ModelResource): - class Meta: - queryset = AffiliationAlias.objects.all() - serializer = api.Serializer() - cache = SimpleCache() - #resource_name = 'affiliationalias' - filtering = { - "id": ALL, - "alias": ALL, - "name": ALL, - } -api.person.register(AffiliationAliasResource()) - diff --git a/ietf/person/utils.py b/ietf/person/utils.py index 024b91a53..9c033462d 100755 --- a/ietf/person/utils.py +++ b/ietf/person/utils.py @@ -1,10 +1,8 @@ import pprint -import re -from collections import defaultdict from django.contrib import admin from django.contrib.auth.models import User -from ietf.person.models import Person, AffiliationAlias, AffiliationIgnoredEnding +from ietf.person.models import Person def merge_persons(source,target,stream): @@ -88,86 +86,3 @@ def merge_persons(source,target,stream): else: print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk) source.delete() - - -def compile_affiliation_ending_stripping_regexp(): - parts = [] - for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True): - try: - re.compile(ending_re) - except re.error: - pass - - parts.append(ending_re) - - re_str = ",? *({}) *$".format("|".join(parts)) - - return re.compile(re_str, re.IGNORECASE) - - -def get_aliased_affiliations(affiliations): - """Given non-unique sequence of affiliations, returns dictionary with - aliases needed. - - We employ the following strategies, interleaved: - - - Stripping company endings like Inc., GmbH etc. from database - - - Looking up aliases stored directly in the database, like - "Examplar International" -> "Examplar" - - - Case-folding so Examplar and EXAMPLAR is merged with the - winner being the one with most occurrences (so input should not - be made unique) or most upper case letters in case of ties. - Case folding can be overridden by the aliases in the database.""" - - res = {} - - ending_re = compile_affiliation_ending_stripping_regexp() - - known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") } - - affiliations_with_case_spellings = defaultdict(set) - case_spelling_count = defaultdict(int) - for affiliation in affiliations: - original_affiliation = affiliation - - # check aliases from DB - alias = known_aliases.get(affiliation.lower()) - if alias is not None: - affiliation = alias - res[original_affiliation] = affiliation - - # strip ending - alias = ending_re.sub("", affiliation) - if alias != affiliation: - affiliation = alias - res[original_affiliation] = affiliation - - # check aliases from DB - alias = known_aliases.get(affiliation.lower()) - if alias is not None: - affiliation = alias - res[original_affiliation] = affiliation - - affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation) - case_spelling_count[affiliation] += 1 - - def affiliation_sort_key(affiliation): - count = case_spelling_count[affiliation] - uppercase_letters = sum(1 for c in affiliation if c.isupper()) - return (count, uppercase_letters) - - # now we just need to pick the most popular uppercase/lowercase - # spelling for each affiliation with more than one - for similar_affiliations in affiliations_with_case_spellings.itervalues(): - if len(similar_affiliations) > 1: - most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0] - for affiliation in similar_affiliations: - if affiliation != most_popular: - res[affiliation] = most_popular - - return res - - - diff --git a/ietf/secr/drafts/forms.py b/ietf/secr/drafts/forms.py index 956c87109..59b12718b 100644 --- a/ietf/secr/drafts/forms.py +++ b/ietf/secr/drafts/forms.py @@ -4,8 +4,6 @@ import os from django import forms -from django_countries.fields import countries - from ietf.doc.models import Document, DocAlias, State from ietf.name.models import IntendedStdLevelName, DocRelationshipName from ietf.group.models import Group @@ -107,7 +105,7 @@ class AuthorForm(forms.Form): person = forms.CharField(max_length=50,widget=forms.TextInput(attrs={'class':'name-autocomplete'}),help_text="To see a list of people type the first name, or last name, or both.") email = forms.CharField(widget=forms.Select(),help_text="Select an email.") affiliation = forms.CharField(max_length=100, required=False, help_text="Affiliation") - country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False, help_text="Country") + country = forms.CharField(max_length=255, required=False, help_text="Country") # check for id within parenthesis to ensure name was selected from the list def clean_person(self): diff --git a/ietf/settings.py b/ietf/settings.py index ddc77c8df..6f815617f 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -293,7 +293,6 @@ INSTALLED_APPS = ( 'tastypie', 'widget_tweaks', 'django_markup', - 'django_countries', # IETF apps 'ietf.api', 'ietf.community', @@ -315,6 +314,7 @@ INSTALLED_APPS = ( 'ietf.redirects', 'ietf.release', 'ietf.review', + 'ietf.stats', 'ietf.submit', 'ietf.sync', 'ietf.utils', diff --git a/ietf/stats/admin.py b/ietf/stats/admin.py new file mode 100644 index 000000000..57f489746 --- /dev/null +++ b/ietf/stats/admin.py @@ -0,0 +1,22 @@ +from django.contrib import admin + +from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias + + +class AffiliationAliasAdmin(admin.ModelAdmin): + list_filter = ["name"] + list_display = ["alias", "name"] + search_fields = ["alias", "name"] +admin.site.register(AffiliationAlias, AffiliationAliasAdmin) + +class AffiliationIgnoredEndingAdmin(admin.ModelAdmin): + list_display = ["ending"] + search_fields = ["ending"] +admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin) + +class CountryAliasAdmin(admin.ModelAdmin): + list_filter = ["country"] + list_display = ["alias", "country"] + search_fields = ["alias", "country__name"] +admin.site.register(CountryAlias, CountryAliasAdmin) + diff --git a/ietf/stats/backfill_data.py b/ietf/stats/backfill_data.py index c62edfd30..cf4d7ed28 100644 --- a/ietf/stats/backfill_data.py +++ b/ietf/stats/backfill_data.py @@ -26,7 +26,6 @@ args = parser.parse_args() formal_language_dict = { l.pk: l for l in FormalLanguageName.objects.all() } - docs_qs = Document.objects.filter(type="draft") if args.document: @@ -80,11 +79,20 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen for author in old_authors: for alias in author.person.alias_set.all(): old_authors_by_name[alias.name] = author + old_authors_by_name[author.person.plain_name()] = author if author.email_id: old_authors_by_email[author.email_id] = author - for full, _, _, _, _, email, company in d.get_author_list(): + # the draft parser sometimes has a problem if affiliation + # isn't in the second line, then it will report an extra + # author - skip those + seen = set() + for full, _, _, _, _, email, country, company in d.get_author_list(): + if email in seen: + continue + seen.add(email) + old_author = None if email: old_author = old_authors_by_email.get(email) @@ -92,15 +100,29 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen old_author = old_authors_by_name.get(full) if not old_author: - print "UNKNOWN AUTHOR", doc.name, full, email, company + print "UNKNOWN AUTHOR", doc.name, full, email, country, company continue if old_author.affiliation != company: - print "new affiliation", old_author.affiliation, company + print "new affiliation", canonical_name, "[", full, "]", old_author.affiliation, "->", company old_author.affiliation = company old_author.save(update_fields=["affiliation"]) updated = True + if country is None: + country = "" + + try: + country = country.decode("utf-8") + except UnicodeDecodeError: + country = country.decode("latin-1") + + if old_author.country != country: + print "new country", canonical_name ,"[", full, "]", old_author.country.encode("utf-8"), "->", country.encode("utf-8") + old_author.country = country + old_author.save(update_fields=["country"]) + updated = True + if updates: Document.objects.filter(pk=doc.pk).update(**updates) diff --git a/ietf/stats/migrations/0001_initial.py b/ietf/stats/migrations/0001_initial.py new file mode 100644 index 000000000..89297340c --- /dev/null +++ b/ietf/stats/migrations/0001_initial.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('name', '0020_add_country_continent_names'), + ] + + operations = [ + migrations.CreateModel( + name='AffiliationAlias', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('alias', models.CharField(help_text=b"Note that aliases will be matched case-insensitive and both before and after some clean-up.", max_length=255, unique=True)), + ('name', models.CharField(max_length=255)), + ], + ), + migrations.CreateModel( + name='AffiliationIgnoredEnding', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('ending', models.CharField(help_text=b"Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!", max_length=255)), + ], + ), + migrations.CreateModel( + name='CountryAlias', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('alias', models.CharField(help_text=b"Note that aliases are matched case-insensitive if the length is > 2.", max_length=255)), + ('country', models.ForeignKey(to='name.CountryName', max_length=255)), + ], + ), + ] diff --git a/ietf/stats/migrations/0002_add_initial_aliases.py b/ietf/stats/migrations/0002_add_initial_aliases.py new file mode 100644 index 000000000..b25cc152c --- /dev/null +++ b/ietf/stats/migrations/0002_add_initial_aliases.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations + +def add_affiliation_info(apps, schema_editor): + AffiliationAlias = apps.get_model("stats", "AffiliationAlias") + + AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems") + AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems") + + AffiliationIgnoredEnding = apps.get_model("stats", "AffiliationIgnoredEnding") + AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?") + AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?") + + CountryAlias = apps.get_model("stats", "CountryAlias") + CountryAlias.objects.get_or_create(alias="russian federation", country_id="RU") + CountryAlias.objects.get_or_create(alias="p. r. china", country_id="CN") + CountryAlias.objects.get_or_create(alias="p.r. china", country_id="CN") + CountryAlias.objects.get_or_create(alias="p.r.china", country_id="CN") + CountryAlias.objects.get_or_create(alias="p.r china", country_id="CN") + CountryAlias.objects.get_or_create(alias="p.r. of china", country_id="CN") + CountryAlias.objects.get_or_create(alias="PRC", country_id="CN") + CountryAlias.objects.get_or_create(alias="P.R.C", country_id="CN") + CountryAlias.objects.get_or_create(alias="P.R.C.", country_id="CN") + CountryAlias.objects.get_or_create(alias="beijing", country_id="CN") + CountryAlias.objects.get_or_create(alias="shenzhen", country_id="CN") + CountryAlias.objects.get_or_create(alias="R.O.C.", country_id="TW") + CountryAlias.objects.get_or_create(alias="usa", country_id="US") + CountryAlias.objects.get_or_create(alias="UAS", country_id="US") + CountryAlias.objects.get_or_create(alias="USA.", country_id="US") + CountryAlias.objects.get_or_create(alias="u.s.a.", country_id="US") + CountryAlias.objects.get_or_create(alias="u. s. a.", country_id="US") + CountryAlias.objects.get_or_create(alias="u.s.a", country_id="US") + CountryAlias.objects.get_or_create(alias="u.s.", country_id="US") + CountryAlias.objects.get_or_create(alias="U.S", country_id="GB") + CountryAlias.objects.get_or_create(alias="US of A", country_id="US") + CountryAlias.objects.get_or_create(alias="united sates", country_id="US") + CountryAlias.objects.get_or_create(alias="united state", country_id="US") + CountryAlias.objects.get_or_create(alias="united states", country_id="US") + CountryAlias.objects.get_or_create(alias="unites states", country_id="US") + CountryAlias.objects.get_or_create(alias="texas", country_id="US") + CountryAlias.objects.get_or_create(alias="UK", country_id="GB") + CountryAlias.objects.get_or_create(alias="united kingcom", country_id="GB") + CountryAlias.objects.get_or_create(alias="great britain", country_id="GB") + CountryAlias.objects.get_or_create(alias="england", country_id="GB") + CountryAlias.objects.get_or_create(alias="U.K.", country_id="GB") + CountryAlias.objects.get_or_create(alias="U.K", country_id="GB") + CountryAlias.objects.get_or_create(alias="Uk", country_id="GB") + CountryAlias.objects.get_or_create(alias="scotland", country_id="GB") + CountryAlias.objects.get_or_create(alias="republic of korea", country_id="KR") + CountryAlias.objects.get_or_create(alias="korea", country_id="KR") + CountryAlias.objects.get_or_create(alias="korea rep", country_id="KR") + CountryAlias.objects.get_or_create(alias="korea (the republic of)", country_id="KR") + CountryAlias.objects.get_or_create(alias="the netherlands", country_id="NL") + CountryAlias.objects.get_or_create(alias="netherland", country_id="NL") + CountryAlias.objects.get_or_create(alias="danmark", country_id="DK") + CountryAlias.objects.get_or_create(alias="sweeden", country_id="SE") + CountryAlias.objects.get_or_create(alias="swede", country_id="SE") + CountryAlias.objects.get_or_create(alias="belgique", country_id="BE") + CountryAlias.objects.get_or_create(alias="madrid", country_id="ES") + CountryAlias.objects.get_or_create(alias="espana", country_id="ES") + CountryAlias.objects.get_or_create(alias="hellas", country_id="GR") + CountryAlias.objects.get_or_create(alias="gemany", country_id="DE") + CountryAlias.objects.get_or_create(alias="deutschland", country_id="DE") + CountryAlias.objects.get_or_create(alias="italia", country_id="IT") + CountryAlias.objects.get_or_create(alias="isreal", country_id="IL") + CountryAlias.objects.get_or_create(alias="tel aviv", country_id="IL") + CountryAlias.objects.get_or_create(alias="UAE", country_id="AE") + CountryAlias.objects.get_or_create(alias="grand-duchy of luxembourg", country_id="LU") + CountryAlias.objects.get_or_create(alias="brasil", country_id="BR") + + + +class Migration(migrations.Migration): + + dependencies = [ + ('stats', '0001_initial'), + ] + + operations = [ + migrations.RunPython(add_affiliation_info, migrations.RunPython.noop) + ] diff --git a/ietf/stats/migrations/__init__.py b/ietf/stats/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ietf/stats/models.py b/ietf/stats/models.py new file mode 100644 index 000000000..875a81afa --- /dev/null +++ b/ietf/stats/models.py @@ -0,0 +1,41 @@ +from django.db import models +from ietf.name.models import CountryName + +class AffiliationAlias(models.Model): + """Records that alias should be treated as name for statistical + purposes.""" + + alias = models.CharField(max_length=255, help_text="Note that aliases will be matched case-insensitive and both before and after some clean-up.", unique=True) + name = models.CharField(max_length=255) + + def __unicode__(self): + return u"{} -> {}".format(self.alias, self.name) + + def save(self, *args, **kwargs): + self.alias = self.alias.lower() + super(AffiliationAlias, self).save(*args, **kwargs) + + class Meta: + verbose_name_plural = "affiliation aliases" + +class AffiliationIgnoredEnding(models.Model): + """Records that ending should be stripped from the affiliation for statistical purposes.""" + + ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!") + + def __unicode__(self): + return self.ending + +class CountryAlias(models.Model): + """Records that alias should be treated as country for statistical + purposes.""" + + alias = models.CharField(max_length=255, help_text="Note that lower-case aliases are matched case-insensitive while aliases with at least one uppercase letter is matched case-sensitive.") + country = models.ForeignKey(CountryName, max_length=255) + + def __unicode__(self): + return u"{} -> {}".format(self.alias, self.country.name) + + class Meta: + verbose_name_plural = "country aliases" + diff --git a/ietf/stats/resources.py b/ietf/stats/resources.py new file mode 100644 index 000000000..7b7d354e5 --- /dev/null +++ b/ietf/stats/resources.py @@ -0,0 +1,52 @@ +# Autogenerated by the makeresources management command 2017-02-15 10:10 PST +from tastypie.resources import ModelResource +from tastypie.fields import ToManyField # pyflakes:ignore +from tastypie.constants import ALL, ALL_WITH_RELATIONS # pyflakes:ignore +from tastypie.cache import SimpleCache + +from ietf import api +from ietf.api import ToOneField # pyflakes:ignore + +from ietf.stats.models import CountryAlias, AffiliationIgnoredEnding, AffiliationAlias + + +from ietf.name.resources import CountryNameResource +class CountryAliasResource(ModelResource): + country = ToOneField(CountryNameResource, 'country') + class Meta: + queryset = CountryAlias.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'countryalias' + filtering = { + "id": ALL, + "alias": ALL, + "country": ALL_WITH_RELATIONS, + } +api.stats.register(CountryAliasResource()) + +class AffiliationIgnoredEndingResource(ModelResource): + class Meta: + queryset = AffiliationIgnoredEnding.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'affiliationignoredending' + filtering = { + "id": ALL, + "ending": ALL, + } +api.stats.register(AffiliationIgnoredEndingResource()) + +class AffiliationAliasResource(ModelResource): + class Meta: + queryset = AffiliationAlias.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'affiliationalias' + filtering = { + "id": ALL, + "alias": ALL, + "name": ALL, + } +api.stats.register(AffiliationAliasResource()) + diff --git a/ietf/stats/tests.py b/ietf/stats/tests.py index 026a49b96..1d0e1ef47 100644 --- a/ietf/stats/tests.py +++ b/ietf/stats/tests.py @@ -25,7 +25,8 @@ class StatisticsTests(TestCase): self.assertTrue(authors_url in r["Location"]) # check various stats types - for stats_type in ["authors", "pages", "words", "format", "formlang", "author/documents", "author/affiliation"]: + for stats_type in ["authors", "pages", "words", "format", "formlang", + "author/documents", "author/affiliation", "author/country", "author/continent"]: for document_type in ["", "rfc", "draft"]: for time_choice in ["", "5y"]: url = urlreverse(ietf.stats.views.document_stats, kwargs={ "stats_type": stats_type }) diff --git a/ietf/stats/utils.py b/ietf/stats/utils.py new file mode 100644 index 000000000..d2e05fffe --- /dev/null +++ b/ietf/stats/utils.py @@ -0,0 +1,198 @@ +import re +from collections import defaultdict + +from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias +from ietf.name.models import CountryName + +def compile_affiliation_ending_stripping_regexp(): + parts = [] + for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True): + try: + re.compile(ending_re) + except re.error: + pass + + parts.append(ending_re) + + re_str = ",? *({}) *$".format("|".join(parts)) + + return re.compile(re_str, re.IGNORECASE) + + +def get_aliased_affiliations(affiliations): + """Given non-unique sequence of affiliations, returns dictionary with + aliases needed. + + We employ the following strategies, interleaved: + + - Stripping company endings like Inc., GmbH etc. from database + + - Looking up aliases stored directly in the database, like + "Examplar International" -> "Examplar" + + - Case-folding so Examplar and EXAMPLAR is merged with the + winner being the one with most occurrences (so input should not + be made unique) or most upper case letters in case of ties. + Case folding can be overridden by the aliases in the database.""" + + res = {} + + ending_re = compile_affiliation_ending_stripping_regexp() + + known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") } + + affiliations_with_case_spellings = defaultdict(set) + case_spelling_count = defaultdict(int) + for affiliation in affiliations: + original_affiliation = affiliation + + # check aliases from DB + name = known_aliases.get(affiliation.lower()) + if name is not None: + affiliation = name + res[original_affiliation] = affiliation + + # strip ending + name = ending_re.sub("", affiliation) + if name != affiliation: + affiliation = name + res[original_affiliation] = affiliation + + # check aliases from DB + name = known_aliases.get(affiliation.lower()) + if name is not None: + affiliation = name + res[original_affiliation] = affiliation + + affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation) + case_spelling_count[affiliation] += 1 + + def affiliation_sort_key(affiliation): + count = case_spelling_count[affiliation] + uppercase_letters = sum(1 for c in affiliation if c.isupper()) + return (count, uppercase_letters) + + # now we just need to pick the most popular uppercase/lowercase + # spelling for each affiliation with more than one + for similar_affiliations in affiliations_with_case_spellings.itervalues(): + if len(similar_affiliations) > 1: + most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0] + for affiliation in similar_affiliations: + if affiliation != most_popular: + res[affiliation] = most_popular + + return res + + + + +def get_aliased_countries(countries): + known_aliases = dict(CountryAlias.objects.values_list("alias", "country__name")) + + iso_code_aliases = {} + + # add aliases for known countries + for slug, name in CountryName.objects.values_list("slug", "name"): + if len(name) > 2: + known_aliases[name.lower()] = name + + if len(slug) == 2 and slug[0].isupper() and slug[1].isupper(): + iso_code_aliases[slug] = name # add ISO code + + def lookup_alias(possible_alias): + name = known_aliases.get(possible_alias) + if name is not None: + return name + + name = known_aliases.get(possible_alias.lower()) + if name is not None: + return name + + return possible_alias + + known_re_aliases = { + re.compile(u"\\b{}\\b".format(re.escape(alias))): name + for alias, name in known_aliases.iteritems() + } + + # specific hack: check for zip codes from the US since in the + # early days, the addresses often didn't include the country + us_zipcode_re = re.compile(r"\b(AL|AK|AZ|AR|CA|CO|CT|DE|DC|FL|GA|HI|ID|IL|IN|IA|KS|KY|LA|ME|MD|MA|MI|MN|MS|MO|MT|NE|NV|NH|NJ|NM|NY|NC|ND|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VT|VA|WA|WV|WI|WY|AS|GU|MP|PR|VI|UM|FM|MH|PW|Ca|Cal.|California|CALIFORNIA|Colorado|Georgia|Illinois|Ill|Maryland|Ma|Ma.|Mass|Massachuss?etts|Michigan|Minnesota|New Jersey|New York|Ny|N.Y.|North Carolina|NORTH CAROLINA|Ohio|Oregon|Pennsylvania|Tx|Texas|Tennessee|Utah|Vermont|Virginia|Va.|Washington)[., -]*[0-9]{5}\b") + + us_country_name = CountryName.objects.get(slug="US").name + + def last_text_part_stripped(split): + for t in reversed(split): + t = t.strip() + if t: + return t + return u"" + + known_countries = set(CountryName.objects.values_list("name", flat=True)) + + res = {} + + for country in countries: + if country in res or country in known_countries: + continue + + original_country = country + + # aliased name + country = lookup_alias(country) + if country in known_countries: + res[original_country] = country + continue + + # contains US zipcode + if us_zipcode_re.search(country): + res[original_country] = us_country_name + continue + + # do a little bit of cleanup + if len(country) > 1 and country[-1] == "." and not country[-2].isupper(): + country = country.rstrip(".") + + country = country.strip("-,").strip() + + # aliased name + country = lookup_alias(country) + if country in known_countries: + res[original_country] = country + continue + + # country name at end, separated by comma + last_part = lookup_alias(last_text_part_stripped(country.split(","))) + if last_part in known_countries: + res[original_country] = last_part + continue + + # country name at end, separated by whitespace + last_part = lookup_alias(last_text_part_stripped(country.split())) + if last_part in known_countries: + res[original_country] = last_part + continue + + # country name anywhere + country_lower = country.lower() + found = False + for alias_re, name in known_re_aliases.iteritems(): + if alias_re.search(country) or alias_re.search(country_lower): + res[original_country] = name + found = True + break + + if found: + continue + + # if everything else has failed, try ISO code + country = iso_code_aliases.get(country, country) + if country in known_countries: + res[original_country] = country + continue + + # unknown country + res[original_country] = "" + + return res + diff --git a/ietf/stats/views.py b/ietf/stats/views.py index 0dadfdaf7..68e3f29d0 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -23,9 +23,9 @@ from ietf.review.utils import (extract_review_request_data, from ietf.submit.models import Submission from ietf.group.models import Role, Group from ietf.person.models import Person -from ietf.name.models import ReviewRequestStateName, ReviewResultName +from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName from ietf.doc.models import DocAlias, Document -from ietf.person.utils import get_aliased_affiliations +from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries from ietf.ietfauth.utils import has_role def stats_index(request): @@ -139,6 +139,8 @@ def document_stats(request, stats_type=None): table_data = [] stats_title = "" bin_size = 1 + alias_data = [] + eu_countries = None if any(stats_type == t[0] for t in possible_document_stats_types): @@ -332,7 +334,7 @@ def document_stats(request, stats_type=None): if from_time: # this is actually faster than joining in the database, # despite the round-trip back and forth - docs_within_time_constraint = list(Document.objects.filter( + docs_within_time_constraint = set(Document.objects.filter( type="draft", docevent__time__gte=from_time, docevent__type__in=["published_rfc", "new_revision"], @@ -349,7 +351,7 @@ def document_stats(request, stats_type=None): else: doc_label = "document" - total_persons = person_qs.count() + total_persons = person_qs.distinct().count() if stats_type == "author/documents": stats_title = "Number of {}s per author".format(doc_label) @@ -402,6 +404,86 @@ def document_stats(request, stats_type=None): "animation": False, }) + for alias, name in sorted(aliases.iteritems(), key=lambda t: t[1]): + alias_data.append((name, alias)) + + elif stats_type == "author/country": + stats_title = "Number of {} authors per country".format(doc_label) + + bins = defaultdict(list) + + # Since people don't write the country names in the + # same way, and we don't want to go back and edit them + # either, we transform them here. + + name_country_set = set((name, country) + for name, country in person_qs.values_list("name", "documentauthor__country")) + + aliases = get_aliased_countries(country for _, country in name_country_set) + + countries = { c.name: c for c in CountryName.objects.all() } + eu_name = "EU" + eu_countries = set(c for c in countries.itervalues() if c.in_eu) + + for name, country in name_country_set: + country_name = aliases.get(country, country) + bins[country_name].append(name) + + c = countries.get(country_name) + if c and c.in_eu: + bins[eu_name].append(name) + + series_data = [] + for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()): + percentage = len(names) * 100.0 / total_persons + if country: + series_data.append((country, len(names))) + table_data.append((country, percentage, names)) + + series_data.sort(key=lambda t: t[1], reverse=True) + series_data = series_data[:30] + + chart_data.append({ + "data": series_data, + "animation": False, + }) + + for alias, country_name in aliases.iteritems(): + alias_data.append((country_name, alias, countries.get(country_name))) + + alias_data.sort() + + elif stats_type == "author/continent": + stats_title = "Number of {} authors per continent".format(doc_label) + + bins = defaultdict(list) + + name_country_set = set((name, country) + for name, country in person_qs.values_list("name", "documentauthor__country")) + + aliases = get_aliased_countries(country for _, country in name_country_set) + + country_to_continent = dict(CountryName.objects.values_list("name", "continent__name")) + + for name, country in name_country_set: + country_name = aliases.get(country, country) + continent_name = country_to_continent.get(country_name, "") + bins[continent_name].append(name) + + series_data = [] + for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()): + percentage = len(names) * 100.0 / total_persons + if continent: + series_data.append((continent, len(names))) + table_data.append((continent, percentage, names)) + + series_data.sort(key=lambda t: t[1], reverse=True) + + chart_data.append({ + "data": series_data, + "animation": False, + }) + return render(request, "stats/document_stats.html", { "chart_data": mark_safe(json.dumps(chart_data)), @@ -416,6 +498,10 @@ def document_stats(request, stats_type=None): "time_choice": time_choice, "doc_label": doc_label, "bin_size": bin_size, + "show_aliases_url": build_document_stats_url(get_overrides={ "showaliases": "1" }), + "hide_aliases_url": build_document_stats_url(get_overrides={ "showaliases": None }), + "alias_data": alias_data, + "eu_countries": sorted(eu_countries or [], key=lambda c: c.name), "content_template": "stats/document_stats_{}.html".format(stats_type.replace("/", "_")), }) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 6af82a6c3..dbef225af 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -12,8 +12,6 @@ from django.conf import settings from django.utils.html import mark_safe from django.core.urlresolvers import reverse as urlreverse -from django_countries.fields import countries - import debug # pyflakes:ignore from ietf.doc.models import Document @@ -32,15 +30,6 @@ from ietf.submit.parsers.ps_parser import PSParser from ietf.submit.parsers.xml_parser import XMLParser from ietf.utils.draft import Draft -def clean_country(country): - country = country.upper() - for code, name in countries: - if country == code: - return code - if country == name.upper(): - return code - return "" # unknown - class SubmissionUploadForm(forms.Form): txt = forms.FileField(label=u'.txt format', required=False) xml = forms.FileField(label=u'.xml format', required=False) @@ -194,7 +183,7 @@ class SubmissionUploadForm(forms.Form): "name": author.attrib.get('fullname'), "email": author.findtext('address/email'), "affiliation": author.findtext('organization'), - "country": clean_country(author.findtext('address/postal/country')), + "country": author.findtext('address/postal/country'), }) except forms.ValidationError: raise @@ -348,7 +337,7 @@ class NameEmailForm(forms.Form): class AuthorForm(NameEmailForm): affiliation = forms.CharField(max_length=100, required=False) - country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False) + country = forms.CharField(max_length=255, required=False) def __init__(self, *args, **kwargs): super(AuthorForm, self).__init__(*args, **kwargs) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index bd2305779..ed49fc836 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -81,7 +81,7 @@ def upload_submission(request): # If we don't have an xml file, try to extract the # relevant information from the text file for author in form.parsed_draft.get_author_list(): - full_name, first_name, middle_initial, last_name, name_suffix, email, company = author + full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip() @@ -114,7 +114,7 @@ def upload_submission(request): "name": name, "email": email, "affiliation": company, - # FIXME: missing country + "country": country }) if form.abstract: diff --git a/ietf/templates/stats/document_stats_author_affiliation.html b/ietf/templates/stats/document_stats_author_affiliation.html index acca4ff4d..e5bcb23c7 100644 --- a/ietf/templates/stats/document_stats_author_affiliation.html +++ b/ietf/templates/stats/document_stats_author_affiliation.html @@ -57,3 +57,44 @@ {% endfor %} </tbody> </table> + +<p>Some authors are authors of multiple documents with different + affiliation information associated, so the sum of multiple rows in the + table can be more than 100%.</p> + + + +<h3>Affiliation Aliases</h3> + +<p>In generating the above statistics, some heuristics have been applied to determine the affiliation of each author.</p> + +{% if request.GET.showaliases %} + <p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p> + + {% if request.user.is_staff %} + <p>Note: since you're an admin, you can <a href="{% url "admin:stats_affiliationalias_add" %}">add an extra known alias</a> or see the <a href="{% url "admin:stats_affiliationalias_changelist" %}">existing known aliases</a> and <a href="{% url "admin:stats_affiliationignoredending_changelist" %}">generally ignored endings</a>.</p> + {% endif %} + + {% if alias_data %} + <table class="table table-condensed"> + <thead> + <th>Affiliation</th> + <th>Alias</th> + </thead> + + {% for name, alias in alias_data %} + <tr> + <td> + {% ifchanged %} + {{ name|default:"(unknown)" }} + {% endifchanged %} + </td> + <td>{{ alias }}</td> + </tr> + {% endfor %} + </table> + {% endif %} + +{% else %} + <p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p> +{% endif %} diff --git a/ietf/templates/stats/document_stats_author_continent.html b/ietf/templates/stats/document_stats_author_continent.html new file mode 100644 index 000000000..d0327bf3a --- /dev/null +++ b/ietf/templates/stats/document_stats_author_continent.html @@ -0,0 +1,65 @@ +<h3>{{ stats_title }}</h3> + +<div id="chart"></div> + +<script> + var chartConf = { + chart: { + type: 'column' + }, + title: { + text: '{{ stats_title|escapejs }}' + }, + xAxis: { + type: "category", + title: { + text: 'Continent' + } + }, + yAxis: { + title: { + text: 'Number of authors' + } + }, + tooltip: { + formatter: function () { + var s = '<b>' + this.points[0].key + '</b>'; + + $.each(this.points, function () { + s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y; + }); + + return s; + }, + shared: true + }, + series: {{ chart_data }} + }; +</script> + +<h3>Data</h3> + +<table class="table table-condensed stats-data"> + <thead> + <tr> + <th>Continent</th> + <th>Percentage of authors</th> + <th>Authors</th> + </tr> + </thead> + <tbody> + {% for continent, percentage, names in table_data %} + <tr> + <td>{{ continent|default:"(unknown)" }}</td> + <td>{{ percentage|floatformat:2 }}%</td> + <td>{% include "stats/includes/number_with_details_cell.html" %}</td> + </tr> + {% endfor %} + </tbody> +</table> + +<p>The country information for an author can vary between documents, + so the sum of the rows in the table can be more than 100%. This + is especially true for the row with unknown continent information - + many authors may have one or more author entries with an + unrecognized country.</p> diff --git a/ietf/templates/stats/document_stats_author_country.html b/ietf/templates/stats/document_stats_author_country.html new file mode 100644 index 000000000..1b8911c4c --- /dev/null +++ b/ietf/templates/stats/document_stats_author_country.html @@ -0,0 +1,124 @@ +<h3>{{ stats_title }}</h3> + +<div id="chart"></div> + +<script> + var chartConf = { + chart: { + type: 'column' + }, + title: { + text: '{{ stats_title|escapejs }}' + }, + xAxis: { + type: "category", + title: { + text: 'Country' + } + }, + yAxis: { + title: { + text: 'Number of authors' + } + }, + tooltip: { + formatter: function () { + var s = '<b>' + this.points[0].key + '</b>'; + + $.each(this.points, function () { + s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y; + }); + + return s; + }, + shared: true + }, + series: {{ chart_data }} + }; +</script> + +<h3>Data</h3> + +<table class="table table-condensed stats-data"> + <thead> + <tr> + <th>Country</th> + <th>Percentage of authors</th> + <th>Authors</th> + </tr> + </thead> + <tbody> + {% for country, percentage, names in table_data %} + <tr> + <td>{{ country|default:"(unknown)" }}</td> + <td>{{ percentage|floatformat:2 }}%</td> + <td>{% include "stats/includes/number_with_details_cell.html" %}</td> + </tr> + {% endfor %} + </tbody> +</table> + +<p>The country information for an author can vary between documents, + so the sum of multiple rows in the table can be more than 100%. This + is especially true for the row with unknown country information - + many authors may have one or more author entries with an + unrecognized country.</p> + +<p>An author is counted in EU if the country is a member of the EU + now, even if that was not the case at publication. + EU members: + {% for c in eu_countries %}{{ c.name }}{% if not forloop.last %}, {% endif %}{% endfor %}.</p> + +<h3>Country Aliases</h3> + +<p>In generating the above statistics, some heuristics have been + applied to figure out which country each author is from.</p> + +{% if request.GET.showaliases %} + <p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p> + + {% if request.user.is_staff %} + <p>Note: since you're an admin, some extra links are visible. You + can either correct a document author entry directly in case the + information is obviously missing or add an alias if an unknown + <a href="{% url "admin:name_countryname_changelist" %}">country name</a> + is being used. + </p> + + {% endif %} + + {% if alias_data %} + <table class="table table-condensed"> + <thead> + <th>Country</th> + <th>Alias</th> + <th></th> + </thead> + + {% for name, alias, country in alias_data %} + <tr> + <td> + {% ifchanged %} + {% if country and request.user.is_staff %} + <a href="{% url "admin:name_countryname_change" country.pk %}"> + {% endif %} + {{ name|default:"(unknown)" }} + {% if country and request.user.is_staff %} + </a> + {% endif %} + {% endifchanged %} + </td> + <td>{{ alias }}</td> + <td> + {% if request.user.is_staff and name != "EU" %} + <a href="{% url "admin:doc_documentauthor_changelist" %}?country={{ alias|urlencode }}">Matching authors</a> + {% endif %} + </td> + </tr> + {% endfor %} + </table> + {% endif %} + +{% else %} + <p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p> +{% endif %} diff --git a/ietf/templates/submit/submission_status.html b/ietf/templates/submit/submission_status.html index 3bf65c096..39612397d 100644 --- a/ietf/templates/submit/submission_status.html +++ b/ietf/templates/submit/submission_status.html @@ -2,7 +2,7 @@ {# Copyright The IETF Trust 2015, All Rights Reserved #} {% load origin %} {% load staticfiles %} -{% load ietf_filters submit_tags country %} +{% load ietf_filters submit_tags %} {% block title %}Submission status of {{ submission.name }}-{{ submission.rev }}{% endblock %} @@ -207,8 +207,8 @@ <th>Author {{ forloop.counter }}</th> <td> {{ author.name }} {% if author.email %}<{{ author.email }}>{% endif %} - {% if author.affiliation %}- {{ author.affiliation }}{% endif %} - {% if author.country %}- {{ author.country|country_name }}{% endif %} + - {% if author.affiliation %}{{ author.affiliation }}{% else %}<i>unknown affiliation</i>{% endif %} + - {% if author.country %}{{ author.country }}{% else %}<i>unknown country</i>{% endif %} </td> </tr> {% endfor %} diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index b7e9c4231..4bc0365d0 100755 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -196,6 +196,7 @@ class Draft(): line = "" newpage = False sentence = False + shortline = False blankcount = 0 linecount = 0 # two functions with side effects @@ -262,7 +263,7 @@ class Draft(): sentence = True if re.search("[^ \t]", line): if newpage: - if sentence: + if sentence or shortline: stripped += [""] else: if blankcount: @@ -270,6 +271,7 @@ class Draft(): blankcount = 0 sentence = False newpage = False + shortline = len(line.strip()) < 18 if re.search("[.:]$", line): sentence = True if re.search("^[ \t]*$", line): @@ -847,7 +849,8 @@ class Draft(): nonblank_count = 0 blanklines = 0 email = None - for line in self.lines[start+1:]: + country = None + for line_offset, line in enumerate(self.lines[start+1:]): _debug( " " + line.strip()) # Break on the second blank line if not line: @@ -887,15 +890,18 @@ class Draft(): else: pass - try: - column = line[beg:end].strip() - except: - column = line - column = re.sub(" *\(at\) *", "@", column) - column = re.sub(" *\(dot\) *", ".", column) - column = re.sub(" +at +", "@", column) - column = re.sub(" +dot +", ".", column) - column = re.sub("&cisco.com", "@cisco.com", column) + def columnify(l): + try: + column = l.replace('\t', 8 * ' ')[max(0, beg - 1):end].strip() + except: + column = l + column = re.sub(" *(?:\(at\)| <at> | at ) *", "@", column) + column = re.sub(" *(?:\(dot\)| <dot> | dot ) *", ".", column) + column = re.sub("&cisco.com", "@cisco.com", column) + column = column.replace("\xa0", " ") + return column + + column = columnify(line) # if re.search("^\w+: \w+", column): # keyword = True @@ -906,13 +912,42 @@ class Draft(): # break #_debug( " Column text :: " + column) + if nonblank_count >= 2 and blanklines == 0: + # Usually, the contact info lines will look + # like this: "Email: someone@example.com" or + # "Tel: +1 (412)-2390 23123", but sometimes + # the : is left out. That's okay for things we + # can't misinterpret, but "tel" may match "Tel + # Aviv 69710, Israel" so match + # - misc contact info + # - tel/fax [number] + # - [phone number] + # - [email] + + other_contact_info_regex = re.compile(r'^(((contact )?e|\(e|e-|m|electronic )?mail|email_id|mailto|e-main|(tele)?phone|voice|mobile|work|uri|url|tel:)\b|^((ph|tel\.?|telefax|fax) *[:.]? *\(?( ?\+ ?)?[0-9]+)|^(\++[0-9]+|\(\+*[0-9]+\)|\(dsn\)|[0-9]+)([ -.]*\b|\b[ -.]*)(([0-9]{2,}|\([0-9]{2,}\)|(\([0-9]\)|[0-9])[ -][0-9]{2,}|\([0-9]\)[0-9]+)([ -.]+([0-9]+|\([0-9]+\)))+|([0-9]{7,}|\([0-9]{7,}\)))|^(<?[-a-z0-9._+]+|{([-a-z0-9._+]+, ?)+[-a-z0-9._+]+})@[-a-z0-9._]+>?|^https?://|^www\.') + next_line_index = start + 1 + line_offset + 1 + + if (not country + and not other_contact_info_regex.search(column.lower()) + and next_line_index < len(self.lines)): + + next_line_lower = columnify(self.lines[next_line_index]).lower().strip() + + if not next_line_lower or other_contact_info_regex.search(next_line_lower): + # country should be here, as the last + # part of the address, right before an + # empty line or other contact info + country = column.strip() or None + _debug(" Country: %s" % country) + _debug("3: authors[%s]: %s" % (i, authors[i])) emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column) if emailmatch and not "@" in author: email = emailmatch.group(0).lower() break - authors[i] = authors[i] + ( email, ) + + authors[i] = authors[i] + ( email, country) else: if not author in ignore: companies[i] = authors[i] @@ -938,8 +973,8 @@ class Draft(): _debug(" * Final company list: %s" % (companies,)) _debug(" * Final companies_seen: %s" % (companies_seen,)) self._author_info = authors - self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,company in authors ] # pyflakes:ignore - self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,company in authors ] + self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,country,company in authors ] # pyflakes:ignore + self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,country,company in authors ] self._authors.sort() _debug(" * Final author list: " + ", ".join(self._authors)) _debug("-"*72) @@ -1159,10 +1194,10 @@ def getmeta(fn): def _output(docname, fields, outfile=sys.stdout): global company_domain if opt_getauthors: - # Output an (incomplete!) getauthors-compatible format. Country - # information is always UNKNOWN, and information about security and - # iana sections presence is missing. - for full,first,middle,last,suffix,email,company in fields["_authorlist"]: + # Output an (incomplete!) getauthors-compatible format. + # Information about security and iana sections presence is + # missing. + for full,first,middle,last,suffix,email,country,company in fields["_authorlist"]: if company in company_domain: company = company_domain[company] else: @@ -1173,7 +1208,7 @@ def _output(docname, fields, outfile=sys.stdout): fields["name"] = full fields["email"] = email fields["company"] = company - fields["country"] = "UNKNOWN" + fields["country"] = country or "UNKNOWN" try: year, month, day = fields["doccreationdate"].split("-") except ValueError: diff --git a/ietf/utils/templatetags/country.py b/ietf/utils/templatetags/country.py deleted file mode 100644 index 7d730d2f9..000000000 --- a/ietf/utils/templatetags/country.py +++ /dev/null @@ -1,14 +0,0 @@ -from django.template.base import Library -from django.template.defaultfilters import stringfilter - -from django_countries import countries - -register = Library() - -@register.filter(is_safe=True) -@stringfilter -def country_name(value): - """ - Converts country code to country name - """ - return dict(countries).get(value, "")