Add support for extracting the country line from the author addresses

to the draft parser (incorporating patch from trunk), store the
extracted country instead of trying to turn it into an ISO country
code, add country and continent name models and add initial data for
those, add helper function for cleaning the countries, add author
country and continent charts, move the affiliation models to
stats/models.py, fix a bunch of bugs.
 - Legacy-Id: 12846
This commit is contained in:
Ole Laursen 2017-02-15 18:43:57 +00:00
parent 882579bab3
commit b2ff10b0f2
34 changed files with 1234 additions and 281 deletions

View file

@ -174,7 +174,8 @@ class BallotPositionDocEventAdmin(DocEventAdmin):
admin.site.register(BallotPositionDocEvent, BallotPositionDocEventAdmin)
class DocumentAuthorAdmin(admin.ModelAdmin):
list_display = ['id', 'document', 'person', 'email', 'affiliation', 'order']
search_fields = [ 'document__name', 'person__name', 'email__address', 'affiliation']
list_display = ['id', 'document', 'person', 'email', 'affiliation', 'country', 'order']
search_fields = ['document__docalias__name', 'person__name', 'email__address', 'affiliation', 'country']
raw_id_fields = ["document", "person", "email"]
admin.site.register(DocumentAuthor, DocumentAuthorAdmin)

View file

@ -2,7 +2,6 @@
from __future__ import unicode_literals
from django.db import migrations, models
import django_countries.fields
class Migration(migrations.Migration):
@ -49,7 +48,7 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name='dochistoryauthor',
name='country',
field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2),
field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255),
),
migrations.RenameField(
model_name='dochistoryauthor',
@ -74,7 +73,7 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name='documentauthor',
name='country',
field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2),
field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255),
),
migrations.RenameField(
model_name='documentauthor',

View file

@ -11,8 +11,6 @@ from django.contrib.contenttypes.models import ContentType
from django.conf import settings
from django.utils.html import mark_safe
from django_countries.fields import CountryField
import debug # pyflakes:ignore
from ietf.group.models import Group
@ -406,7 +404,7 @@ class DocumentAuthorInfo(models.Model):
# email should only be null for some historic documents
email = models.ForeignKey(Email, help_text="Email address used by author for submission", blank=True, null=True)
affiliation = models.CharField(max_length=100, blank=True, help_text="Organization/company used by author for submission")
country = CountryField(blank=True, help_text="Country used by author for submission")
country = models.CharField(max_length=255, blank=True, help_text="Country used by author for submission")
order = models.IntegerField(default=1)
def formatted_email(self):

View file

@ -1,7 +1,8 @@
from django.contrib import admin
from ietf.name.models import (
BallotPositionName, ConstraintName, DBTemplateTypeName, DocRelationshipName,
BallotPositionName, ConstraintName, ContinentName, CountryName,
DBTemplateTypeName, DocRelationshipName,
DocReminderTypeName, DocTagName, DocTypeName, DraftSubmissionStateName,
FeedbackTypeName, FormalLanguageName, GroupMilestoneStateName, GroupStateName, GroupTypeName,
IntendedStdLevelName, IprDisclosureStateName, IprEventTypeName, IprLicenseTypeName,
@ -10,8 +11,11 @@ from ietf.name.models import (
ReviewRequestStateName, ReviewResultName, ReviewTypeName, RoleName, RoomResourceName,
SessionStatusName, StdLevelName, StreamName, TimeSlotTypeName, )
from ietf.stats.models import CountryAlias
class NameAdmin(admin.ModelAdmin):
list_display = ["slug", "name", "desc", "used"]
search_fields = ["slug", "name"]
prepopulate_from = { "slug": ("name",) }
class DocRelationshipNameAdmin(NameAdmin):
@ -26,8 +30,19 @@ class GroupTypeNameAdmin(NameAdmin):
list_display = ["slug", "name", "verbose_name", "desc", "used"]
admin.site.register(GroupTypeName, GroupTypeNameAdmin)
class CountryAliasInline(admin.TabularInline):
model = CountryAlias
extra = 1
class CountryNameAdmin(NameAdmin):
list_display = ["slug", "name", "continent", "in_eu"]
list_filter = ["continent", "in_eu"]
inlines = [CountryAliasInline]
admin.site.register(CountryName, CountryNameAdmin)
admin.site.register(BallotPositionName, NameAdmin)
admin.site.register(ConstraintName, NameAdmin)
admin.site.register(ContinentName, NameAdmin)
admin.site.register(DBTemplateTypeName, NameAdmin)
admin.site.register(DocReminderTypeName, NameAdmin)
admin.site.register(DocTagName, NameAdmin)

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('name', '0018_add_formlang_names'),
]
operations = [
migrations.CreateModel(
name='ContinentName',
fields=[
('slug', models.CharField(max_length=32, serialize=False, primary_key=True)),
('name', models.CharField(max_length=255)),
('desc', models.TextField(blank=True)),
('used', models.BooleanField(default=True)),
('order', models.IntegerField(default=0)),
],
options={
'ordering': ['order', 'name'],
'abstract': False,
},
),
migrations.CreateModel(
name='CountryName',
fields=[
('slug', models.CharField(max_length=32, serialize=False, primary_key=True)),
('name', models.CharField(max_length=255)),
('desc', models.TextField(blank=True)),
('used', models.BooleanField(default=True)),
('order', models.IntegerField(default=0)),
('in_eu', models.BooleanField(default=False, verbose_name='In EU')),
('continent', models.ForeignKey(to='name.ContinentName')),
],
options={
'ordering': ['order', 'name'],
'abstract': False,
},
),
]

View file

@ -0,0 +1,275 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations
def insert_initial_country_continent_names(apps, schema_editor):
ContinentName = apps.get_model("name", "ContinentName")
africa, _ = ContinentName.objects.get_or_create(slug="africa", name="Africa")
antarctica, _ = ContinentName.objects.get_or_create(slug="antarctica", name="Antarctica")
asia, _ = ContinentName.objects.get_or_create(slug="asia", name="Asia")
europe, _ = ContinentName.objects.get_or_create(slug="europe", name="Europe")
north_america, _ = ContinentName.objects.get_or_create(slug="north-america", name="North America")
oceania, _ = ContinentName.objects.get_or_create(slug="oceania", name="Oceania")
south_america, _ = ContinentName.objects.get_or_create(slug="south-america", name="South America")
CountryName = apps.get_model("name", "CountryName")
CountryName.objects.get_or_create(slug="AD", name=u"Andorra", continent=europe)
CountryName.objects.get_or_create(slug="AE", name=u"United Arab Emirates", continent=asia)
CountryName.objects.get_or_create(slug="AF", name=u"Afghanistan", continent=asia)
CountryName.objects.get_or_create(slug="AG", name=u"Antigua and Barbuda", continent=north_america)
CountryName.objects.get_or_create(slug="AI", name=u"Anguilla", continent=north_america)
CountryName.objects.get_or_create(slug="AL", name=u"Albania", continent=europe)
CountryName.objects.get_or_create(slug="AM", name=u"Armenia", continent=asia)
CountryName.objects.get_or_create(slug="AO", name=u"Angola", continent=africa)
CountryName.objects.get_or_create(slug="AQ", name=u"Antarctica", continent=antarctica)
CountryName.objects.get_or_create(slug="AR", name=u"Argentina", continent=south_america)
CountryName.objects.get_or_create(slug="AS", name=u"American Samoa", continent=oceania)
CountryName.objects.get_or_create(slug="AT", name=u"Austria", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="AU", name=u"Australia", continent=oceania)
CountryName.objects.get_or_create(slug="AW", name=u"Aruba", continent=north_america)
CountryName.objects.get_or_create(slug="AX", name=u"Åland Islands", continent=europe)
CountryName.objects.get_or_create(slug="AZ", name=u"Azerbaijan", continent=asia)
CountryName.objects.get_or_create(slug="BA", name=u"Bosnia and Herzegovina", continent=europe)
CountryName.objects.get_or_create(slug="BB", name=u"Barbados", continent=north_america)
CountryName.objects.get_or_create(slug="BD", name=u"Bangladesh", continent=asia)
CountryName.objects.get_or_create(slug="BE", name=u"Belgium", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="BF", name=u"Burkina Faso", continent=africa)
CountryName.objects.get_or_create(slug="BG", name=u"Bulgaria", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="BH", name=u"Bahrain", continent=asia)
CountryName.objects.get_or_create(slug="BI", name=u"Burundi", continent=africa)
CountryName.objects.get_or_create(slug="BJ", name=u"Benin", continent=africa)
CountryName.objects.get_or_create(slug="BL", name=u"Saint Barthélemy", continent=north_america)
CountryName.objects.get_or_create(slug="BM", name=u"Bermuda", continent=north_america)
CountryName.objects.get_or_create(slug="BN", name=u"Brunei", continent=asia)
CountryName.objects.get_or_create(slug="BO", name=u"Bolivia", continent=south_america)
CountryName.objects.get_or_create(slug="BQ", name=u"Bonaire, Sint Eustatius and Saba", continent=north_america)
CountryName.objects.get_or_create(slug="BR", name=u"Brazil", continent=south_america)
CountryName.objects.get_or_create(slug="BS", name=u"Bahamas", continent=north_america)
CountryName.objects.get_or_create(slug="BT", name=u"Bhutan", continent=asia)
CountryName.objects.get_or_create(slug="BV", name=u"Bouvet Island", continent=antarctica)
CountryName.objects.get_or_create(slug="BW", name=u"Botswana", continent=africa)
CountryName.objects.get_or_create(slug="BY", name=u"Belarus", continent=europe)
CountryName.objects.get_or_create(slug="BZ", name=u"Belize", continent=north_america)
CountryName.objects.get_or_create(slug="CA", name=u"Canada", continent=north_america)
CountryName.objects.get_or_create(slug="CC", name=u"Cocos (Keeling) Islands", continent=asia)
CountryName.objects.get_or_create(slug="CD", name=u"Congo (the Democratic Republic of the)", continent=africa)
CountryName.objects.get_or_create(slug="CF", name=u"Central African Republic", continent=africa)
CountryName.objects.get_or_create(slug="CG", name=u"Congo", continent=africa)
CountryName.objects.get_or_create(slug="CH", name=u"Switzerland", continent=europe)
CountryName.objects.get_or_create(slug="CI", name=u"Côte d'Ivoire", continent=africa)
CountryName.objects.get_or_create(slug="CK", name=u"Cook Islands", continent=oceania)
CountryName.objects.get_or_create(slug="CL", name=u"Chile", continent=south_america)
CountryName.objects.get_or_create(slug="CM", name=u"Cameroon", continent=africa)
CountryName.objects.get_or_create(slug="CN", name=u"China", continent=asia)
CountryName.objects.get_or_create(slug="CO", name=u"Colombia", continent=south_america)
CountryName.objects.get_or_create(slug="CR", name=u"Costa Rica", continent=north_america)
CountryName.objects.get_or_create(slug="CU", name=u"Cuba", continent=north_america)
CountryName.objects.get_or_create(slug="CV", name=u"Cabo Verde", continent=africa)
CountryName.objects.get_or_create(slug="CW", name=u"Curaçao", continent=north_america)
CountryName.objects.get_or_create(slug="CX", name=u"Christmas Island", continent=asia)
CountryName.objects.get_or_create(slug="CY", name=u"Cyprus", continent=asia, in_eu=True)
CountryName.objects.get_or_create(slug="CZ", name=u"Czech Republic", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="DE", name=u"Germany", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="DJ", name=u"Djibouti", continent=africa)
CountryName.objects.get_or_create(slug="DK", name=u"Denmark", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="DM", name=u"Dominica", continent=north_america)
CountryName.objects.get_or_create(slug="DO", name=u"Dominican Republic", continent=north_america)
CountryName.objects.get_or_create(slug="DZ", name=u"Algeria", continent=africa)
CountryName.objects.get_or_create(slug="EC", name=u"Ecuador", continent=south_america)
CountryName.objects.get_or_create(slug="EE", name=u"Estonia", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="EG", name=u"Egypt", continent=africa)
CountryName.objects.get_or_create(slug="EH", name=u"Western Sahara", continent=africa)
CountryName.objects.get_or_create(slug="ER", name=u"Eritrea", continent=africa)
CountryName.objects.get_or_create(slug="ES", name=u"Spain", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="ET", name=u"Ethiopia", continent=africa)
CountryName.objects.get_or_create(slug="FI", name=u"Finland", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="FJ", name=u"Fiji", continent=oceania)
CountryName.objects.get_or_create(slug="FK", name=u"Falkland Islands [Malvinas]", continent=south_america)
CountryName.objects.get_or_create(slug="FM", name=u"Micronesia (Federated States of)", continent=oceania)
CountryName.objects.get_or_create(slug="FO", name=u"Faroe Islands", continent=europe)
CountryName.objects.get_or_create(slug="FR", name=u"France", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="GA", name=u"Gabon", continent=africa)
CountryName.objects.get_or_create(slug="GB", name=u"United Kingdom", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="GD", name=u"Grenada", continent=north_america)
CountryName.objects.get_or_create(slug="GE", name=u"Georgia", continent=asia)
CountryName.objects.get_or_create(slug="GF", name=u"French Guiana", continent=south_america)
CountryName.objects.get_or_create(slug="GG", name=u"Guernsey", continent=europe)
CountryName.objects.get_or_create(slug="GH", name=u"Ghana", continent=africa)
CountryName.objects.get_or_create(slug="GI", name=u"Gibraltar", continent=europe)
CountryName.objects.get_or_create(slug="GL", name=u"Greenland", continent=north_america)
CountryName.objects.get_or_create(slug="GM", name=u"Gambia", continent=africa)
CountryName.objects.get_or_create(slug="GN", name=u"Guinea", continent=africa)
CountryName.objects.get_or_create(slug="GP", name=u"Guadeloupe", continent=north_america)
CountryName.objects.get_or_create(slug="GQ", name=u"Equatorial Guinea", continent=africa)
CountryName.objects.get_or_create(slug="GR", name=u"Greece", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="GS", name=u"South Georgia and the South Sandwich Islands", continent=antarctica)
CountryName.objects.get_or_create(slug="GT", name=u"Guatemala", continent=north_america)
CountryName.objects.get_or_create(slug="GU", name=u"Guam", continent=oceania)
CountryName.objects.get_or_create(slug="GW", name=u"Guinea-Bissau", continent=africa)
CountryName.objects.get_or_create(slug="GY", name=u"Guyana", continent=south_america)
CountryName.objects.get_or_create(slug="HK", name=u"Hong Kong", continent=asia)
CountryName.objects.get_or_create(slug="HM", name=u"Heard Island and McDonald Islands", continent=antarctica)
CountryName.objects.get_or_create(slug="HN", name=u"Honduras", continent=north_america)
CountryName.objects.get_or_create(slug="HR", name=u"Croatia", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="HT", name=u"Haiti", continent=north_america)
CountryName.objects.get_or_create(slug="HU", name=u"Hungary", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="ID", name=u"Indonesia", continent=asia)
CountryName.objects.get_or_create(slug="IE", name=u"Ireland", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="IL", name=u"Israel", continent=asia)
CountryName.objects.get_or_create(slug="IM", name=u"Isle of Man", continent=europe)
CountryName.objects.get_or_create(slug="IN", name=u"India", continent=asia)
CountryName.objects.get_or_create(slug="IO", name=u"British Indian Ocean Territory", continent=asia)
CountryName.objects.get_or_create(slug="IQ", name=u"Iraq", continent=asia)
CountryName.objects.get_or_create(slug="IR", name=u"Iran", continent=asia)
CountryName.objects.get_or_create(slug="IS", name=u"Iceland", continent=europe)
CountryName.objects.get_or_create(slug="IT", name=u"Italy", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="JE", name=u"Jersey", continent=europe)
CountryName.objects.get_or_create(slug="JM", name=u"Jamaica", continent=north_america)
CountryName.objects.get_or_create(slug="JO", name=u"Jordan", continent=asia)
CountryName.objects.get_or_create(slug="JP", name=u"Japan", continent=asia)
CountryName.objects.get_or_create(slug="KE", name=u"Kenya", continent=africa)
CountryName.objects.get_or_create(slug="KG", name=u"Kyrgyzstan", continent=asia)
CountryName.objects.get_or_create(slug="KH", name=u"Cambodia", continent=asia)
CountryName.objects.get_or_create(slug="KI", name=u"Kiribati", continent=oceania)
CountryName.objects.get_or_create(slug="KM", name=u"Comoros", continent=africa)
CountryName.objects.get_or_create(slug="KN", name=u"Saint Kitts and Nevis", continent=north_america)
CountryName.objects.get_or_create(slug="KP", name=u"North Korea", continent=asia)
CountryName.objects.get_or_create(slug="KR", name=u"South Korea", continent=asia)
CountryName.objects.get_or_create(slug="KW", name=u"Kuwait", continent=asia)
CountryName.objects.get_or_create(slug="KY", name=u"Cayman Islands", continent=north_america)
CountryName.objects.get_or_create(slug="KZ", name=u"Kazakhstan", continent=asia)
CountryName.objects.get_or_create(slug="LA", name=u"Laos", continent=asia)
CountryName.objects.get_or_create(slug="LB", name=u"Lebanon", continent=asia)
CountryName.objects.get_or_create(slug="LC", name=u"Saint Lucia", continent=north_america)
CountryName.objects.get_or_create(slug="LI", name=u"Liechtenstein", continent=europe)
CountryName.objects.get_or_create(slug="LK", name=u"Sri Lanka", continent=asia)
CountryName.objects.get_or_create(slug="LR", name=u"Liberia", continent=africa)
CountryName.objects.get_or_create(slug="LS", name=u"Lesotho", continent=africa)
CountryName.objects.get_or_create(slug="LT", name=u"Lithuania", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="LU", name=u"Luxembourg", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="LV", name=u"Latvia", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="LY", name=u"Libya", continent=africa)
CountryName.objects.get_or_create(slug="MA", name=u"Morocco", continent=africa)
CountryName.objects.get_or_create(slug="MC", name=u"Monaco", continent=europe)
CountryName.objects.get_or_create(slug="MD", name=u"Moldova", continent=europe)
CountryName.objects.get_or_create(slug="ME", name=u"Montenegro", continent=europe)
CountryName.objects.get_or_create(slug="MF", name=u"Saint Martin (French part)", continent=north_america)
CountryName.objects.get_or_create(slug="MG", name=u"Madagascar", continent=africa)
CountryName.objects.get_or_create(slug="MH", name=u"Marshall Islands", continent=oceania)
CountryName.objects.get_or_create(slug="MK", name=u"Macedonia", continent=europe)
CountryName.objects.get_or_create(slug="ML", name=u"Mali", continent=africa)
CountryName.objects.get_or_create(slug="MM", name=u"Myanmar", continent=asia)
CountryName.objects.get_or_create(slug="MN", name=u"Mongolia", continent=asia)
CountryName.objects.get_or_create(slug="MO", name=u"Macao", continent=asia)
CountryName.objects.get_or_create(slug="MP", name=u"Northern Mariana Islands", continent=oceania)
CountryName.objects.get_or_create(slug="MQ", name=u"Martinique", continent=north_america)
CountryName.objects.get_or_create(slug="MR", name=u"Mauritania", continent=africa)
CountryName.objects.get_or_create(slug="MS", name=u"Montserrat", continent=north_america)
CountryName.objects.get_or_create(slug="MT", name=u"Malta", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="MU", name=u"Mauritius", continent=africa)
CountryName.objects.get_or_create(slug="MV", name=u"Maldives", continent=asia)
CountryName.objects.get_or_create(slug="MW", name=u"Malawi", continent=africa)
CountryName.objects.get_or_create(slug="MX", name=u"Mexico", continent=north_america)
CountryName.objects.get_or_create(slug="MY", name=u"Malaysia", continent=asia)
CountryName.objects.get_or_create(slug="MZ", name=u"Mozambique", continent=africa)
CountryName.objects.get_or_create(slug="NA", name=u"Namibia", continent=africa)
CountryName.objects.get_or_create(slug="NC", name=u"New Caledonia", continent=oceania)
CountryName.objects.get_or_create(slug="NE", name=u"Niger", continent=africa)
CountryName.objects.get_or_create(slug="NF", name=u"Norfolk Island", continent=oceania)
CountryName.objects.get_or_create(slug="NG", name=u"Nigeria", continent=africa)
CountryName.objects.get_or_create(slug="NI", name=u"Nicaragua", continent=north_america)
CountryName.objects.get_or_create(slug="NL", name=u"Netherlands", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="NO", name=u"Norway", continent=europe)
CountryName.objects.get_or_create(slug="NP", name=u"Nepal", continent=asia)
CountryName.objects.get_or_create(slug="NR", name=u"Nauru", continent=oceania)
CountryName.objects.get_or_create(slug="NU", name=u"Niue", continent=oceania)
CountryName.objects.get_or_create(slug="NZ", name=u"New Zealand", continent=oceania)
CountryName.objects.get_or_create(slug="OM", name=u"Oman", continent=asia)
CountryName.objects.get_or_create(slug="PA", name=u"Panama", continent=north_america)
CountryName.objects.get_or_create(slug="PE", name=u"Peru", continent=south_america)
CountryName.objects.get_or_create(slug="PF", name=u"French Polynesia", continent=oceania)
CountryName.objects.get_or_create(slug="PG", name=u"Papua New Guinea", continent=oceania)
CountryName.objects.get_or_create(slug="PH", name=u"Philippines", continent=asia)
CountryName.objects.get_or_create(slug="PK", name=u"Pakistan", continent=asia)
CountryName.objects.get_or_create(slug="PL", name=u"Poland", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="PM", name=u"Saint Pierre and Miquelon", continent=north_america)
CountryName.objects.get_or_create(slug="PN", name=u"Pitcairn", continent=oceania)
CountryName.objects.get_or_create(slug="PR", name=u"Puerto Rico", continent=north_america)
CountryName.objects.get_or_create(slug="PS", name=u"Palestine, State of", continent=asia)
CountryName.objects.get_or_create(slug="PT", name=u"Portugal", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="PW", name=u"Palau", continent=oceania)
CountryName.objects.get_or_create(slug="PY", name=u"Paraguay", continent=south_america)
CountryName.objects.get_or_create(slug="QA", name=u"Qatar", continent=asia)
CountryName.objects.get_or_create(slug="RE", name=u"Réunion", continent=africa)
CountryName.objects.get_or_create(slug="RO", name=u"Romania", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="RS", name=u"Serbia", continent=europe)
CountryName.objects.get_or_create(slug="RU", name=u"Russia", continent=europe)
CountryName.objects.get_or_create(slug="RW", name=u"Rwanda", continent=africa)
CountryName.objects.get_or_create(slug="SA", name=u"Saudi Arabia", continent=asia)
CountryName.objects.get_or_create(slug="SB", name=u"Solomon Islands", continent=oceania)
CountryName.objects.get_or_create(slug="SC", name=u"Seychelles", continent=africa)
CountryName.objects.get_or_create(slug="SD", name=u"Sudan", continent=africa)
CountryName.objects.get_or_create(slug="SE", name=u"Sweden", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="SG", name=u"Singapore", continent=asia)
CountryName.objects.get_or_create(slug="SH", name=u"Saint Helena, Ascension and Tristan da Cunha", continent=africa)
CountryName.objects.get_or_create(slug="SI", name=u"Slovenia", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="SJ", name=u"Svalbard and Jan Mayen", continent=europe)
CountryName.objects.get_or_create(slug="SK", name=u"Slovakia", continent=europe, in_eu=True)
CountryName.objects.get_or_create(slug="SL", name=u"Sierra Leone", continent=africa)
CountryName.objects.get_or_create(slug="SM", name=u"San Marino", continent=europe)
CountryName.objects.get_or_create(slug="SN", name=u"Senegal", continent=africa)
CountryName.objects.get_or_create(slug="SO", name=u"Somalia", continent=africa)
CountryName.objects.get_or_create(slug="SR", name=u"Suriname", continent=south_america)
CountryName.objects.get_or_create(slug="SS", name=u"South Sudan", continent=africa)
CountryName.objects.get_or_create(slug="ST", name=u"Sao Tome and Principe", continent=africa)
CountryName.objects.get_or_create(slug="SV", name=u"El Salvador", continent=north_america)
CountryName.objects.get_or_create(slug="SX", name=u"Sint Maarten (Dutch part)", continent=north_america)
CountryName.objects.get_or_create(slug="SY", name=u"Syria", continent=asia)
CountryName.objects.get_or_create(slug="SZ", name=u"Swaziland", continent=africa)
CountryName.objects.get_or_create(slug="TC", name=u"Turks and Caicos Islands", continent=north_america)
CountryName.objects.get_or_create(slug="TD", name=u"Chad", continent=africa)
CountryName.objects.get_or_create(slug="TF", name=u"French Southern Territories", continent=antarctica)
CountryName.objects.get_or_create(slug="TG", name=u"Togo", continent=africa)
CountryName.objects.get_or_create(slug="TH", name=u"Thailand", continent=asia)
CountryName.objects.get_or_create(slug="TJ", name=u"Tajikistan", continent=asia)
CountryName.objects.get_or_create(slug="TK", name=u"Tokelau", continent=oceania)
CountryName.objects.get_or_create(slug="TL", name=u"Timor-Leste", continent=asia)
CountryName.objects.get_or_create(slug="TM", name=u"Turkmenistan", continent=asia)
CountryName.objects.get_or_create(slug="TN", name=u"Tunisia", continent=africa)
CountryName.objects.get_or_create(slug="TO", name=u"Tonga", continent=oceania)
CountryName.objects.get_or_create(slug="TR", name=u"Turkey", continent=europe)
CountryName.objects.get_or_create(slug="TT", name=u"Trinidad and Tobago", continent=north_america)
CountryName.objects.get_or_create(slug="TV", name=u"Tuvalu", continent=oceania)
CountryName.objects.get_or_create(slug="TW", name=u"Taiwan", continent=asia)
CountryName.objects.get_or_create(slug="TZ", name=u"Tanzania", continent=africa)
CountryName.objects.get_or_create(slug="UA", name=u"Ukraine", continent=europe)
CountryName.objects.get_or_create(slug="UG", name=u"Uganda", continent=africa)
CountryName.objects.get_or_create(slug="UM", name=u"United States Minor Outlying Islands", continent=oceania)
CountryName.objects.get_or_create(slug="US", name=u"United States of America", continent=north_america)
CountryName.objects.get_or_create(slug="UY", name=u"Uruguay", continent=south_america)
CountryName.objects.get_or_create(slug="UZ", name=u"Uzbekistan", continent=asia)
CountryName.objects.get_or_create(slug="VA", name=u"Holy See", continent=europe)
CountryName.objects.get_or_create(slug="VC", name=u"Saint Vincent and the Grenadines", continent=north_america)
CountryName.objects.get_or_create(slug="VE", name=u"Venezuela", continent=south_america)
CountryName.objects.get_or_create(slug="VG", name=u"Virgin Islands (British)", continent=north_america)
CountryName.objects.get_or_create(slug="VI", name=u"Virgin Islands (U.S.)", continent=north_america)
CountryName.objects.get_or_create(slug="VN", name=u"Vietnam", continent=asia)
CountryName.objects.get_or_create(slug="VU", name=u"Vanuatu", continent=oceania)
CountryName.objects.get_or_create(slug="WF", name=u"Wallis and Futuna", continent=oceania)
CountryName.objects.get_or_create(slug="WS", name=u"Samoa", continent=oceania)
CountryName.objects.get_or_create(slug="YE", name=u"Yemen", continent=asia)
CountryName.objects.get_or_create(slug="YT", name=u"Mayotte", continent=africa)
CountryName.objects.get_or_create(slug="ZA", name=u"South Africa", continent=africa)
CountryName.objects.get_or_create(slug="ZM", name=u"Zambia", continent=africa)
CountryName.objects.get_or_create(slug="ZW", name=u"Zimbabwe", continent=africa)
class Migration(migrations.Migration):
dependencies = [
('name', '0019_continentname_countryname'),
]
operations = [
migrations.RunPython(insert_initial_country_continent_names, migrations.RunPython.noop)
]

View file

@ -99,4 +99,10 @@ class ReviewResultName(NameModel):
"""Almost ready, Has issues, Has nits, Not Ready,
On the right track, Ready, Ready with issues,
Ready with nits, Serious Issues"""
class ContinentName(NameModel):
"Africa, Antarctica, Asia, ..."
class CountryName(NameModel):
"Afghanistan, Aaland Islands, Albania, ..."
continent = models.ForeignKey(ContinentName)
in_eu = models.BooleanField(verbose_name="In EU", default=False)

View file

@ -15,7 +15,7 @@ from ietf.name.models import (TimeSlotTypeName, GroupStateName, DocTagName, Inte
LiaisonStatementTagName, FeedbackTypeName, LiaisonStatementState, StreamName,
BallotPositionName, DBTemplateTypeName, NomineePositionStateName,
ReviewRequestStateName, ReviewTypeName, ReviewResultName,
FormalLanguageName)
FormalLanguageName, ContinentName, CountryName)
class TimeSlotTypeNameResource(ModelResource):
@ -474,3 +474,38 @@ class FormalLanguageNameResource(ModelResource):
}
api.name.register(FormalLanguageNameResource())
class ContinentNameResource(ModelResource):
class Meta:
queryset = ContinentName.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'continentname'
filtering = {
"slug": ALL,
"name": ALL,
"desc": ALL,
"used": ALL,
"order": ALL,
}
api.name.register(ContinentNameResource())
class CountryNameResource(ModelResource):
continent = ToOneField(ContinentNameResource, 'continent')
class Meta:
queryset = CountryName.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'countryname'
filtering = {
"slug": ALL,
"name": ALL,
"desc": ALL,
"used": ALL,
"order": ALL,
"in_eu": ALL,
"continent": ALL_WITH_RELATIONS,
}
api.name.register(CountryNameResource())

View file

@ -1,7 +1,7 @@
from django.contrib import admin
from ietf.person.models import Email, Alias, Person, AffiliationAlias, AffiliationIgnoredEnding
from ietf.person.models import Email, Alias, Person
from ietf.person.name import name_parts
class EmailAdmin(admin.ModelAdmin):
@ -32,14 +32,3 @@ class PersonAdmin(admin.ModelAdmin):
inlines = [ EmailInline, AliasInline, ]
# actions = None
admin.site.register(Person, PersonAdmin)
class AffiliationAliasAdmin(admin.ModelAdmin):
list_filter = ["name"]
list_display = ["alias", "name"]
search_fields = ["alias", "name"]
admin.site.register(AffiliationAlias, AffiliationAliasAdmin)
class AffiliationIgnoredEndingAdmin(admin.ModelAdmin):
list_display = ["ending"]
search_fields = ["ending"]
admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin)

View file

@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('person', '0014_auto_20160613_0751'),
]
operations = [
migrations.CreateModel(
name='AffiliationAlias',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('alias', models.CharField(help_text=b'Note that aliases are matched without regarding case.', max_length=255)),
('name', models.CharField(max_length=255)),
],
),
migrations.CreateModel(
name='AffiliationIgnoredEnding',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('ending', models.CharField(max_length=255)),
],
),
]

View file

@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations
def add_affiliation_info(apps, schema_editor):
AffiliationAlias = apps.get_model("person", "AffiliationAlias")
AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems")
AffiliationIgnoredEnding = apps.get_model("person", "AffiliationIgnoredEnding")
AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?")
class Migration(migrations.Migration):
dependencies = [
('person', '0015_affiliationalias_affiliationignoredending'),
]
operations = [
migrations.RunPython(add_affiliation_info, migrations.RunPython.noop)
]

View file

@ -241,26 +241,3 @@ class Email(models.Model):
return
return self.address
class AffiliationAlias(models.Model):
"""Records that alias should be treated as name for statistical
purposes."""
alias = models.CharField(max_length=255, help_text="Note that aliases are matched without regarding case.")
name = models.CharField(max_length=255)
def __unicode__(self):
return u"{} -> {}".format(self.alias, self.name)
def save(self, *args, **kwargs):
self.alias = self.alias.lower()
super(AffiliationAlias, self).save(*args, **kwargs)
class AffiliationIgnoredEnding(models.Model):
"""Records that ending should be stripped from the affiliation for statistical purposes."""
ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!")
def __unicode__(self):
return self.ending

View file

@ -6,8 +6,7 @@ from tastypie.cache import SimpleCache
from ietf import api
from ietf.person.models import (Person, Email, Alias, PersonHistory,
AffiliationAlias, AffiliationIgnoredEnding)
from ietf.person.models import (Person, Email, Alias, PersonHistory)
from ietf.utils.resources import UserResource
@ -82,29 +81,3 @@ class PersonHistoryResource(ModelResource):
"user": ALL_WITH_RELATIONS,
}
api.person.register(PersonHistoryResource())
class AffiliationIgnoredEndingResource(ModelResource):
class Meta:
queryset = AffiliationIgnoredEnding.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'affiliationignoredending'
filtering = {
"id": ALL,
"ending": ALL,
}
api.person.register(AffiliationIgnoredEndingResource())
class AffiliationAliasResource(ModelResource):
class Meta:
queryset = AffiliationAlias.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'affiliationalias'
filtering = {
"id": ALL,
"alias": ALL,
"name": ALL,
}
api.person.register(AffiliationAliasResource())

View file

@ -1,10 +1,8 @@
import pprint
import re
from collections import defaultdict
from django.contrib import admin
from django.contrib.auth.models import User
from ietf.person.models import Person, AffiliationAlias, AffiliationIgnoredEnding
from ietf.person.models import Person
def merge_persons(source,target,stream):
@ -88,86 +86,3 @@ def merge_persons(source,target,stream):
else:
print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk)
source.delete()
def compile_affiliation_ending_stripping_regexp():
parts = []
for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True):
try:
re.compile(ending_re)
except re.error:
pass
parts.append(ending_re)
re_str = ",? *({}) *$".format("|".join(parts))
return re.compile(re_str, re.IGNORECASE)
def get_aliased_affiliations(affiliations):
"""Given non-unique sequence of affiliations, returns dictionary with
aliases needed.
We employ the following strategies, interleaved:
- Stripping company endings like Inc., GmbH etc. from database
- Looking up aliases stored directly in the database, like
"Examplar International" -> "Examplar"
- Case-folding so Examplar and EXAMPLAR is merged with the
winner being the one with most occurrences (so input should not
be made unique) or most upper case letters in case of ties.
Case folding can be overridden by the aliases in the database."""
res = {}
ending_re = compile_affiliation_ending_stripping_regexp()
known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") }
affiliations_with_case_spellings = defaultdict(set)
case_spelling_count = defaultdict(int)
for affiliation in affiliations:
original_affiliation = affiliation
# check aliases from DB
alias = known_aliases.get(affiliation.lower())
if alias is not None:
affiliation = alias
res[original_affiliation] = affiliation
# strip ending
alias = ending_re.sub("", affiliation)
if alias != affiliation:
affiliation = alias
res[original_affiliation] = affiliation
# check aliases from DB
alias = known_aliases.get(affiliation.lower())
if alias is not None:
affiliation = alias
res[original_affiliation] = affiliation
affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation)
case_spelling_count[affiliation] += 1
def affiliation_sort_key(affiliation):
count = case_spelling_count[affiliation]
uppercase_letters = sum(1 for c in affiliation if c.isupper())
return (count, uppercase_letters)
# now we just need to pick the most popular uppercase/lowercase
# spelling for each affiliation with more than one
for similar_affiliations in affiliations_with_case_spellings.itervalues():
if len(similar_affiliations) > 1:
most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0]
for affiliation in similar_affiliations:
if affiliation != most_popular:
res[affiliation] = most_popular
return res

View file

@ -4,8 +4,6 @@ import os
from django import forms
from django_countries.fields import countries
from ietf.doc.models import Document, DocAlias, State
from ietf.name.models import IntendedStdLevelName, DocRelationshipName
from ietf.group.models import Group
@ -107,7 +105,7 @@ class AuthorForm(forms.Form):
person = forms.CharField(max_length=50,widget=forms.TextInput(attrs={'class':'name-autocomplete'}),help_text="To see a list of people type the first name, or last name, or both.")
email = forms.CharField(widget=forms.Select(),help_text="Select an email.")
affiliation = forms.CharField(max_length=100, required=False, help_text="Affiliation")
country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False, help_text="Country")
country = forms.CharField(max_length=255, required=False, help_text="Country")
# check for id within parenthesis to ensure name was selected from the list
def clean_person(self):

View file

@ -293,7 +293,6 @@ INSTALLED_APPS = (
'tastypie',
'widget_tweaks',
'django_markup',
'django_countries',
# IETF apps
'ietf.api',
'ietf.community',
@ -315,6 +314,7 @@ INSTALLED_APPS = (
'ietf.redirects',
'ietf.release',
'ietf.review',
'ietf.stats',
'ietf.submit',
'ietf.sync',
'ietf.utils',

22
ietf/stats/admin.py Normal file
View file

@ -0,0 +1,22 @@
from django.contrib import admin
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias
class AffiliationAliasAdmin(admin.ModelAdmin):
list_filter = ["name"]
list_display = ["alias", "name"]
search_fields = ["alias", "name"]
admin.site.register(AffiliationAlias, AffiliationAliasAdmin)
class AffiliationIgnoredEndingAdmin(admin.ModelAdmin):
list_display = ["ending"]
search_fields = ["ending"]
admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin)
class CountryAliasAdmin(admin.ModelAdmin):
list_filter = ["country"]
list_display = ["alias", "country"]
search_fields = ["alias", "country__name"]
admin.site.register(CountryAlias, CountryAliasAdmin)

View file

@ -26,7 +26,6 @@ args = parser.parse_args()
formal_language_dict = { l.pk: l for l in FormalLanguageName.objects.all() }
docs_qs = Document.objects.filter(type="draft")
if args.document:
@ -80,11 +79,20 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen
for author in old_authors:
for alias in author.person.alias_set.all():
old_authors_by_name[alias.name] = author
old_authors_by_name[author.person.plain_name()] = author
if author.email_id:
old_authors_by_email[author.email_id] = author
for full, _, _, _, _, email, company in d.get_author_list():
# the draft parser sometimes has a problem if affiliation
# isn't in the second line, then it will report an extra
# author - skip those
seen = set()
for full, _, _, _, _, email, country, company in d.get_author_list():
if email in seen:
continue
seen.add(email)
old_author = None
if email:
old_author = old_authors_by_email.get(email)
@ -92,15 +100,29 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen
old_author = old_authors_by_name.get(full)
if not old_author:
print "UNKNOWN AUTHOR", doc.name, full, email, company
print "UNKNOWN AUTHOR", doc.name, full, email, country, company
continue
if old_author.affiliation != company:
print "new affiliation", old_author.affiliation, company
print "new affiliation", canonical_name, "[", full, "]", old_author.affiliation, "->", company
old_author.affiliation = company
old_author.save(update_fields=["affiliation"])
updated = True
if country is None:
country = ""
try:
country = country.decode("utf-8")
except UnicodeDecodeError:
country = country.decode("latin-1")
if old_author.country != country:
print "new country", canonical_name ,"[", full, "]", old_author.country.encode("utf-8"), "->", country.encode("utf-8")
old_author.country = country
old_author.save(update_fields=["country"])
updated = True
if updates:
Document.objects.filter(pk=doc.pk).update(**updates)

View file

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('name', '0020_add_country_continent_names'),
]
operations = [
migrations.CreateModel(
name='AffiliationAlias',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('alias', models.CharField(help_text=b"Note that aliases will be matched case-insensitive and both before and after some clean-up.", max_length=255, unique=True)),
('name', models.CharField(max_length=255)),
],
),
migrations.CreateModel(
name='AffiliationIgnoredEnding',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('ending', models.CharField(help_text=b"Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!", max_length=255)),
],
),
migrations.CreateModel(
name='CountryAlias',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('alias', models.CharField(help_text=b"Note that aliases are matched case-insensitive if the length is > 2.", max_length=255)),
('country', models.ForeignKey(to='name.CountryName', max_length=255)),
],
),
]

View file

@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations
def add_affiliation_info(apps, schema_editor):
AffiliationAlias = apps.get_model("stats", "AffiliationAlias")
AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems")
AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems")
AffiliationIgnoredEnding = apps.get_model("stats", "AffiliationIgnoredEnding")
AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?")
AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?")
CountryAlias = apps.get_model("stats", "CountryAlias")
CountryAlias.objects.get_or_create(alias="russian federation", country_id="RU")
CountryAlias.objects.get_or_create(alias="p. r. china", country_id="CN")
CountryAlias.objects.get_or_create(alias="p.r. china", country_id="CN")
CountryAlias.objects.get_or_create(alias="p.r.china", country_id="CN")
CountryAlias.objects.get_or_create(alias="p.r china", country_id="CN")
CountryAlias.objects.get_or_create(alias="p.r. of china", country_id="CN")
CountryAlias.objects.get_or_create(alias="PRC", country_id="CN")
CountryAlias.objects.get_or_create(alias="P.R.C", country_id="CN")
CountryAlias.objects.get_or_create(alias="P.R.C.", country_id="CN")
CountryAlias.objects.get_or_create(alias="beijing", country_id="CN")
CountryAlias.objects.get_or_create(alias="shenzhen", country_id="CN")
CountryAlias.objects.get_or_create(alias="R.O.C.", country_id="TW")
CountryAlias.objects.get_or_create(alias="usa", country_id="US")
CountryAlias.objects.get_or_create(alias="UAS", country_id="US")
CountryAlias.objects.get_or_create(alias="USA.", country_id="US")
CountryAlias.objects.get_or_create(alias="u.s.a.", country_id="US")
CountryAlias.objects.get_or_create(alias="u. s. a.", country_id="US")
CountryAlias.objects.get_or_create(alias="u.s.a", country_id="US")
CountryAlias.objects.get_or_create(alias="u.s.", country_id="US")
CountryAlias.objects.get_or_create(alias="U.S", country_id="GB")
CountryAlias.objects.get_or_create(alias="US of A", country_id="US")
CountryAlias.objects.get_or_create(alias="united sates", country_id="US")
CountryAlias.objects.get_or_create(alias="united state", country_id="US")
CountryAlias.objects.get_or_create(alias="united states", country_id="US")
CountryAlias.objects.get_or_create(alias="unites states", country_id="US")
CountryAlias.objects.get_or_create(alias="texas", country_id="US")
CountryAlias.objects.get_or_create(alias="UK", country_id="GB")
CountryAlias.objects.get_or_create(alias="united kingcom", country_id="GB")
CountryAlias.objects.get_or_create(alias="great britain", country_id="GB")
CountryAlias.objects.get_or_create(alias="england", country_id="GB")
CountryAlias.objects.get_or_create(alias="U.K.", country_id="GB")
CountryAlias.objects.get_or_create(alias="U.K", country_id="GB")
CountryAlias.objects.get_or_create(alias="Uk", country_id="GB")
CountryAlias.objects.get_or_create(alias="scotland", country_id="GB")
CountryAlias.objects.get_or_create(alias="republic of korea", country_id="KR")
CountryAlias.objects.get_or_create(alias="korea", country_id="KR")
CountryAlias.objects.get_or_create(alias="korea rep", country_id="KR")
CountryAlias.objects.get_or_create(alias="korea (the republic of)", country_id="KR")
CountryAlias.objects.get_or_create(alias="the netherlands", country_id="NL")
CountryAlias.objects.get_or_create(alias="netherland", country_id="NL")
CountryAlias.objects.get_or_create(alias="danmark", country_id="DK")
CountryAlias.objects.get_or_create(alias="sweeden", country_id="SE")
CountryAlias.objects.get_or_create(alias="swede", country_id="SE")
CountryAlias.objects.get_or_create(alias="belgique", country_id="BE")
CountryAlias.objects.get_or_create(alias="madrid", country_id="ES")
CountryAlias.objects.get_or_create(alias="espana", country_id="ES")
CountryAlias.objects.get_or_create(alias="hellas", country_id="GR")
CountryAlias.objects.get_or_create(alias="gemany", country_id="DE")
CountryAlias.objects.get_or_create(alias="deutschland", country_id="DE")
CountryAlias.objects.get_or_create(alias="italia", country_id="IT")
CountryAlias.objects.get_or_create(alias="isreal", country_id="IL")
CountryAlias.objects.get_or_create(alias="tel aviv", country_id="IL")
CountryAlias.objects.get_or_create(alias="UAE", country_id="AE")
CountryAlias.objects.get_or_create(alias="grand-duchy of luxembourg", country_id="LU")
CountryAlias.objects.get_or_create(alias="brasil", country_id="BR")
class Migration(migrations.Migration):
dependencies = [
('stats', '0001_initial'),
]
operations = [
migrations.RunPython(add_affiliation_info, migrations.RunPython.noop)
]

View file

41
ietf/stats/models.py Normal file
View file

@ -0,0 +1,41 @@
from django.db import models
from ietf.name.models import CountryName
class AffiliationAlias(models.Model):
"""Records that alias should be treated as name for statistical
purposes."""
alias = models.CharField(max_length=255, help_text="Note that aliases will be matched case-insensitive and both before and after some clean-up.", unique=True)
name = models.CharField(max_length=255)
def __unicode__(self):
return u"{} -> {}".format(self.alias, self.name)
def save(self, *args, **kwargs):
self.alias = self.alias.lower()
super(AffiliationAlias, self).save(*args, **kwargs)
class Meta:
verbose_name_plural = "affiliation aliases"
class AffiliationIgnoredEnding(models.Model):
"""Records that ending should be stripped from the affiliation for statistical purposes."""
ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!")
def __unicode__(self):
return self.ending
class CountryAlias(models.Model):
"""Records that alias should be treated as country for statistical
purposes."""
alias = models.CharField(max_length=255, help_text="Note that lower-case aliases are matched case-insensitive while aliases with at least one uppercase letter is matched case-sensitive.")
country = models.ForeignKey(CountryName, max_length=255)
def __unicode__(self):
return u"{} -> {}".format(self.alias, self.country.name)
class Meta:
verbose_name_plural = "country aliases"

52
ietf/stats/resources.py Normal file
View file

@ -0,0 +1,52 @@
# Autogenerated by the makeresources management command 2017-02-15 10:10 PST
from tastypie.resources import ModelResource
from tastypie.fields import ToManyField # pyflakes:ignore
from tastypie.constants import ALL, ALL_WITH_RELATIONS # pyflakes:ignore
from tastypie.cache import SimpleCache
from ietf import api
from ietf.api import ToOneField # pyflakes:ignore
from ietf.stats.models import CountryAlias, AffiliationIgnoredEnding, AffiliationAlias
from ietf.name.resources import CountryNameResource
class CountryAliasResource(ModelResource):
country = ToOneField(CountryNameResource, 'country')
class Meta:
queryset = CountryAlias.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'countryalias'
filtering = {
"id": ALL,
"alias": ALL,
"country": ALL_WITH_RELATIONS,
}
api.stats.register(CountryAliasResource())
class AffiliationIgnoredEndingResource(ModelResource):
class Meta:
queryset = AffiliationIgnoredEnding.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'affiliationignoredending'
filtering = {
"id": ALL,
"ending": ALL,
}
api.stats.register(AffiliationIgnoredEndingResource())
class AffiliationAliasResource(ModelResource):
class Meta:
queryset = AffiliationAlias.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'affiliationalias'
filtering = {
"id": ALL,
"alias": ALL,
"name": ALL,
}
api.stats.register(AffiliationAliasResource())

View file

@ -25,7 +25,8 @@ class StatisticsTests(TestCase):
self.assertTrue(authors_url in r["Location"])
# check various stats types
for stats_type in ["authors", "pages", "words", "format", "formlang", "author/documents", "author/affiliation"]:
for stats_type in ["authors", "pages", "words", "format", "formlang",
"author/documents", "author/affiliation", "author/country", "author/continent"]:
for document_type in ["", "rfc", "draft"]:
for time_choice in ["", "5y"]:
url = urlreverse(ietf.stats.views.document_stats, kwargs={ "stats_type": stats_type })

198
ietf/stats/utils.py Normal file
View file

@ -0,0 +1,198 @@
import re
from collections import defaultdict
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias
from ietf.name.models import CountryName
def compile_affiliation_ending_stripping_regexp():
parts = []
for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True):
try:
re.compile(ending_re)
except re.error:
pass
parts.append(ending_re)
re_str = ",? *({}) *$".format("|".join(parts))
return re.compile(re_str, re.IGNORECASE)
def get_aliased_affiliations(affiliations):
"""Given non-unique sequence of affiliations, returns dictionary with
aliases needed.
We employ the following strategies, interleaved:
- Stripping company endings like Inc., GmbH etc. from database
- Looking up aliases stored directly in the database, like
"Examplar International" -> "Examplar"
- Case-folding so Examplar and EXAMPLAR is merged with the
winner being the one with most occurrences (so input should not
be made unique) or most upper case letters in case of ties.
Case folding can be overridden by the aliases in the database."""
res = {}
ending_re = compile_affiliation_ending_stripping_regexp()
known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") }
affiliations_with_case_spellings = defaultdict(set)
case_spelling_count = defaultdict(int)
for affiliation in affiliations:
original_affiliation = affiliation
# check aliases from DB
name = known_aliases.get(affiliation.lower())
if name is not None:
affiliation = name
res[original_affiliation] = affiliation
# strip ending
name = ending_re.sub("", affiliation)
if name != affiliation:
affiliation = name
res[original_affiliation] = affiliation
# check aliases from DB
name = known_aliases.get(affiliation.lower())
if name is not None:
affiliation = name
res[original_affiliation] = affiliation
affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation)
case_spelling_count[affiliation] += 1
def affiliation_sort_key(affiliation):
count = case_spelling_count[affiliation]
uppercase_letters = sum(1 for c in affiliation if c.isupper())
return (count, uppercase_letters)
# now we just need to pick the most popular uppercase/lowercase
# spelling for each affiliation with more than one
for similar_affiliations in affiliations_with_case_spellings.itervalues():
if len(similar_affiliations) > 1:
most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0]
for affiliation in similar_affiliations:
if affiliation != most_popular:
res[affiliation] = most_popular
return res
def get_aliased_countries(countries):
known_aliases = dict(CountryAlias.objects.values_list("alias", "country__name"))
iso_code_aliases = {}
# add aliases for known countries
for slug, name in CountryName.objects.values_list("slug", "name"):
if len(name) > 2:
known_aliases[name.lower()] = name
if len(slug) == 2 and slug[0].isupper() and slug[1].isupper():
iso_code_aliases[slug] = name # add ISO code
def lookup_alias(possible_alias):
name = known_aliases.get(possible_alias)
if name is not None:
return name
name = known_aliases.get(possible_alias.lower())
if name is not None:
return name
return possible_alias
known_re_aliases = {
re.compile(u"\\b{}\\b".format(re.escape(alias))): name
for alias, name in known_aliases.iteritems()
}
# specific hack: check for zip codes from the US since in the
# early days, the addresses often didn't include the country
us_zipcode_re = re.compile(r"\b(AL|AK|AZ|AR|CA|CO|CT|DE|DC|FL|GA|HI|ID|IL|IN|IA|KS|KY|LA|ME|MD|MA|MI|MN|MS|MO|MT|NE|NV|NH|NJ|NM|NY|NC|ND|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VT|VA|WA|WV|WI|WY|AS|GU|MP|PR|VI|UM|FM|MH|PW|Ca|Cal.|California|CALIFORNIA|Colorado|Georgia|Illinois|Ill|Maryland|Ma|Ma.|Mass|Massachuss?etts|Michigan|Minnesota|New Jersey|New York|Ny|N.Y.|North Carolina|NORTH CAROLINA|Ohio|Oregon|Pennsylvania|Tx|Texas|Tennessee|Utah|Vermont|Virginia|Va.|Washington)[., -]*[0-9]{5}\b")
us_country_name = CountryName.objects.get(slug="US").name
def last_text_part_stripped(split):
for t in reversed(split):
t = t.strip()
if t:
return t
return u""
known_countries = set(CountryName.objects.values_list("name", flat=True))
res = {}
for country in countries:
if country in res or country in known_countries:
continue
original_country = country
# aliased name
country = lookup_alias(country)
if country in known_countries:
res[original_country] = country
continue
# contains US zipcode
if us_zipcode_re.search(country):
res[original_country] = us_country_name
continue
# do a little bit of cleanup
if len(country) > 1 and country[-1] == "." and not country[-2].isupper():
country = country.rstrip(".")
country = country.strip("-,").strip()
# aliased name
country = lookup_alias(country)
if country in known_countries:
res[original_country] = country
continue
# country name at end, separated by comma
last_part = lookup_alias(last_text_part_stripped(country.split(",")))
if last_part in known_countries:
res[original_country] = last_part
continue
# country name at end, separated by whitespace
last_part = lookup_alias(last_text_part_stripped(country.split()))
if last_part in known_countries:
res[original_country] = last_part
continue
# country name anywhere
country_lower = country.lower()
found = False
for alias_re, name in known_re_aliases.iteritems():
if alias_re.search(country) or alias_re.search(country_lower):
res[original_country] = name
found = True
break
if found:
continue
# if everything else has failed, try ISO code
country = iso_code_aliases.get(country, country)
if country in known_countries:
res[original_country] = country
continue
# unknown country
res[original_country] = ""
return res

View file

@ -23,9 +23,9 @@ from ietf.review.utils import (extract_review_request_data,
from ietf.submit.models import Submission
from ietf.group.models import Role, Group
from ietf.person.models import Person
from ietf.name.models import ReviewRequestStateName, ReviewResultName
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName
from ietf.doc.models import DocAlias, Document
from ietf.person.utils import get_aliased_affiliations
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries
from ietf.ietfauth.utils import has_role
def stats_index(request):
@ -139,6 +139,8 @@ def document_stats(request, stats_type=None):
table_data = []
stats_title = ""
bin_size = 1
alias_data = []
eu_countries = None
if any(stats_type == t[0] for t in possible_document_stats_types):
@ -332,7 +334,7 @@ def document_stats(request, stats_type=None):
if from_time:
# this is actually faster than joining in the database,
# despite the round-trip back and forth
docs_within_time_constraint = list(Document.objects.filter(
docs_within_time_constraint = set(Document.objects.filter(
type="draft",
docevent__time__gte=from_time,
docevent__type__in=["published_rfc", "new_revision"],
@ -349,7 +351,7 @@ def document_stats(request, stats_type=None):
else:
doc_label = "document"
total_persons = person_qs.count()
total_persons = person_qs.distinct().count()
if stats_type == "author/documents":
stats_title = "Number of {}s per author".format(doc_label)
@ -402,6 +404,86 @@ def document_stats(request, stats_type=None):
"animation": False,
})
for alias, name in sorted(aliases.iteritems(), key=lambda t: t[1]):
alias_data.append((name, alias))
elif stats_type == "author/country":
stats_title = "Number of {} authors per country".format(doc_label)
bins = defaultdict(list)
# Since people don't write the country names in the
# same way, and we don't want to go back and edit them
# either, we transform them here.
name_country_set = set((name, country)
for name, country in person_qs.values_list("name", "documentauthor__country"))
aliases = get_aliased_countries(country for _, country in name_country_set)
countries = { c.name: c for c in CountryName.objects.all() }
eu_name = "EU"
eu_countries = set(c for c in countries.itervalues() if c.in_eu)
for name, country in name_country_set:
country_name = aliases.get(country, country)
bins[country_name].append(name)
c = countries.get(country_name)
if c and c.in_eu:
bins[eu_name].append(name)
series_data = []
for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
percentage = len(names) * 100.0 / total_persons
if country:
series_data.append((country, len(names)))
table_data.append((country, percentage, names))
series_data.sort(key=lambda t: t[1], reverse=True)
series_data = series_data[:30]
chart_data.append({
"data": series_data,
"animation": False,
})
for alias, country_name in aliases.iteritems():
alias_data.append((country_name, alias, countries.get(country_name)))
alias_data.sort()
elif stats_type == "author/continent":
stats_title = "Number of {} authors per continent".format(doc_label)
bins = defaultdict(list)
name_country_set = set((name, country)
for name, country in person_qs.values_list("name", "documentauthor__country"))
aliases = get_aliased_countries(country for _, country in name_country_set)
country_to_continent = dict(CountryName.objects.values_list("name", "continent__name"))
for name, country in name_country_set:
country_name = aliases.get(country, country)
continent_name = country_to_continent.get(country_name, "")
bins[continent_name].append(name)
series_data = []
for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
percentage = len(names) * 100.0 / total_persons
if continent:
series_data.append((continent, len(names)))
table_data.append((continent, percentage, names))
series_data.sort(key=lambda t: t[1], reverse=True)
chart_data.append({
"data": series_data,
"animation": False,
})
return render(request, "stats/document_stats.html", {
"chart_data": mark_safe(json.dumps(chart_data)),
@ -416,6 +498,10 @@ def document_stats(request, stats_type=None):
"time_choice": time_choice,
"doc_label": doc_label,
"bin_size": bin_size,
"show_aliases_url": build_document_stats_url(get_overrides={ "showaliases": "1" }),
"hide_aliases_url": build_document_stats_url(get_overrides={ "showaliases": None }),
"alias_data": alias_data,
"eu_countries": sorted(eu_countries or [], key=lambda c: c.name),
"content_template": "stats/document_stats_{}.html".format(stats_type.replace("/", "_")),
})

View file

@ -12,8 +12,6 @@ from django.conf import settings
from django.utils.html import mark_safe
from django.core.urlresolvers import reverse as urlreverse
from django_countries.fields import countries
import debug # pyflakes:ignore
from ietf.doc.models import Document
@ -32,15 +30,6 @@ from ietf.submit.parsers.ps_parser import PSParser
from ietf.submit.parsers.xml_parser import XMLParser
from ietf.utils.draft import Draft
def clean_country(country):
country = country.upper()
for code, name in countries:
if country == code:
return code
if country == name.upper():
return code
return "" # unknown
class SubmissionUploadForm(forms.Form):
txt = forms.FileField(label=u'.txt format', required=False)
xml = forms.FileField(label=u'.xml format', required=False)
@ -194,7 +183,7 @@ class SubmissionUploadForm(forms.Form):
"name": author.attrib.get('fullname'),
"email": author.findtext('address/email'),
"affiliation": author.findtext('organization'),
"country": clean_country(author.findtext('address/postal/country')),
"country": author.findtext('address/postal/country'),
})
except forms.ValidationError:
raise
@ -348,7 +337,7 @@ class NameEmailForm(forms.Form):
class AuthorForm(NameEmailForm):
affiliation = forms.CharField(max_length=100, required=False)
country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False)
country = forms.CharField(max_length=255, required=False)
def __init__(self, *args, **kwargs):
super(AuthorForm, self).__init__(*args, **kwargs)

View file

@ -81,7 +81,7 @@ def upload_submission(request):
# If we don't have an xml file, try to extract the
# relevant information from the text file
for author in form.parsed_draft.get_author_list():
full_name, first_name, middle_initial, last_name, name_suffix, email, company = author
full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author
name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip()
@ -114,7 +114,7 @@ def upload_submission(request):
"name": name,
"email": email,
"affiliation": company,
# FIXME: missing country
"country": country
})
if form.abstract:

View file

@ -57,3 +57,44 @@
{% endfor %}
</tbody>
</table>
<p>Some authors are authors of multiple documents with different
affiliation information associated, so the sum of multiple rows in the
table can be more than 100%.</p>
<h3>Affiliation Aliases</h3>
<p>In generating the above statistics, some heuristics have been applied to determine the affiliation of each author.</p>
{% if request.GET.showaliases %}
<p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p>
{% if request.user.is_staff %}
<p>Note: since you're an admin, you can <a href="{% url "admin:stats_affiliationalias_add" %}">add an extra known alias</a> or see the <a href="{% url "admin:stats_affiliationalias_changelist" %}">existing known aliases</a> and <a href="{% url "admin:stats_affiliationignoredending_changelist" %}">generally ignored endings</a>.</p>
{% endif %}
{% if alias_data %}
<table class="table table-condensed">
<thead>
<th>Affiliation</th>
<th>Alias</th>
</thead>
{% for name, alias in alias_data %}
<tr>
<td>
{% ifchanged %}
{{ name|default:"(unknown)" }}
{% endifchanged %}
</td>
<td>{{ alias }}</td>
</tr>
{% endfor %}
</table>
{% endif %}
{% else %}
<p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p>
{% endif %}

View file

@ -0,0 +1,65 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'column'
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
type: "category",
title: {
text: 'Continent'
}
},
yAxis: {
title: {
text: 'Number of authors'
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.points[0].key + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Continent</th>
<th>Percentage of authors</th>
<th>Authors</th>
</tr>
</thead>
<tbody>
{% for continent, percentage, names in table_data %}
<tr>
<td>{{ continent|default:"(unknown)" }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p>The country information for an author can vary between documents,
so the sum of the rows in the table can be more than 100%. This
is especially true for the row with unknown continent information -
many authors may have one or more author entries with an
unrecognized country.</p>

View file

@ -0,0 +1,124 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'column'
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
type: "category",
title: {
text: 'Country'
}
},
yAxis: {
title: {
text: 'Number of authors'
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.points[0].key + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Country</th>
<th>Percentage of authors</th>
<th>Authors</th>
</tr>
</thead>
<tbody>
{% for country, percentage, names in table_data %}
<tr>
<td>{{ country|default:"(unknown)" }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p>The country information for an author can vary between documents,
so the sum of multiple rows in the table can be more than 100%. This
is especially true for the row with unknown country information -
many authors may have one or more author entries with an
unrecognized country.</p>
<p>An author is counted in EU if the country is a member of the EU
now, even if that was not the case at publication.
EU members:
{% for c in eu_countries %}{{ c.name }}{% if not forloop.last %}, {% endif %}{% endfor %}.</p>
<h3>Country Aliases</h3>
<p>In generating the above statistics, some heuristics have been
applied to figure out which country each author is from.</p>
{% if request.GET.showaliases %}
<p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p>
{% if request.user.is_staff %}
<p>Note: since you're an admin, some extra links are visible. You
can either correct a document author entry directly in case the
information is obviously missing or add an alias if an unknown
<a href="{% url "admin:name_countryname_changelist" %}">country name</a>
is being used.
</p>
{% endif %}
{% if alias_data %}
<table class="table table-condensed">
<thead>
<th>Country</th>
<th>Alias</th>
<th></th>
</thead>
{% for name, alias, country in alias_data %}
<tr>
<td>
{% ifchanged %}
{% if country and request.user.is_staff %}
<a href="{% url "admin:name_countryname_change" country.pk %}">
{% endif %}
{{ name|default:"(unknown)" }}
{% if country and request.user.is_staff %}
</a>
{% endif %}
{% endifchanged %}
</td>
<td>{{ alias }}</td>
<td>
{% if request.user.is_staff and name != "EU" %}
<a href="{% url "admin:doc_documentauthor_changelist" %}?country={{ alias|urlencode }}">Matching authors</a>
{% endif %}
</td>
</tr>
{% endfor %}
</table>
{% endif %}
{% else %}
<p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p>
{% endif %}

View file

@ -2,7 +2,7 @@
{# Copyright The IETF Trust 2015, All Rights Reserved #}
{% load origin %}
{% load staticfiles %}
{% load ietf_filters submit_tags country %}
{% load ietf_filters submit_tags %}
{% block title %}Submission status of {{ submission.name }}-{{ submission.rev }}{% endblock %}
@ -207,8 +207,8 @@
<th>Author {{ forloop.counter }}</th>
<td>
{{ author.name }} {% if author.email %}&lt;{{ author.email }}&gt;{% endif %}
{% if author.affiliation %}- {{ author.affiliation }}{% endif %}
{% if author.country %}- {{ author.country|country_name }}{% endif %}
- {% if author.affiliation %}{{ author.affiliation }}{% else %}<i>unknown affiliation</i>{% endif %}
- {% if author.country %}{{ author.country }}{% else %}<i>unknown country</i>{% endif %}
</td>
</tr>
{% endfor %}

View file

@ -196,6 +196,7 @@ class Draft():
line = ""
newpage = False
sentence = False
shortline = False
blankcount = 0
linecount = 0
# two functions with side effects
@ -262,7 +263,7 @@ class Draft():
sentence = True
if re.search("[^ \t]", line):
if newpage:
if sentence:
if sentence or shortline:
stripped += [""]
else:
if blankcount:
@ -270,6 +271,7 @@ class Draft():
blankcount = 0
sentence = False
newpage = False
shortline = len(line.strip()) < 18
if re.search("[.:]$", line):
sentence = True
if re.search("^[ \t]*$", line):
@ -847,7 +849,8 @@ class Draft():
nonblank_count = 0
blanklines = 0
email = None
for line in self.lines[start+1:]:
country = None
for line_offset, line in enumerate(self.lines[start+1:]):
_debug( " " + line.strip())
# Break on the second blank line
if not line:
@ -887,15 +890,18 @@ class Draft():
else:
pass
try:
column = line[beg:end].strip()
except:
column = line
column = re.sub(" *\(at\) *", "@", column)
column = re.sub(" *\(dot\) *", ".", column)
column = re.sub(" +at +", "@", column)
column = re.sub(" +dot +", ".", column)
column = re.sub("&cisco.com", "@cisco.com", column)
def columnify(l):
try:
column = l.replace('\t', 8 * ' ')[max(0, beg - 1):end].strip()
except:
column = l
column = re.sub(" *(?:\(at\)| <at> | at ) *", "@", column)
column = re.sub(" *(?:\(dot\)| <dot> | dot ) *", ".", column)
column = re.sub("&cisco.com", "@cisco.com", column)
column = column.replace("\xa0", " ")
return column
column = columnify(line)
# if re.search("^\w+: \w+", column):
# keyword = True
@ -906,13 +912,42 @@ class Draft():
# break
#_debug( " Column text :: " + column)
if nonblank_count >= 2 and blanklines == 0:
# Usually, the contact info lines will look
# like this: "Email: someone@example.com" or
# "Tel: +1 (412)-2390 23123", but sometimes
# the : is left out. That's okay for things we
# can't misinterpret, but "tel" may match "Tel
# Aviv 69710, Israel" so match
# - misc contact info
# - tel/fax [number]
# - [phone number]
# - [email]
other_contact_info_regex = re.compile(r'^(((contact )?e|\(e|e-|m|electronic )?mail|email_id|mailto|e-main|(tele)?phone|voice|mobile|work|uri|url|tel:)\b|^((ph|tel\.?|telefax|fax) *[:.]? *\(?( ?\+ ?)?[0-9]+)|^(\++[0-9]+|\(\+*[0-9]+\)|\(dsn\)|[0-9]+)([ -.]*\b|\b[ -.]*)(([0-9]{2,}|\([0-9]{2,}\)|(\([0-9]\)|[0-9])[ -][0-9]{2,}|\([0-9]\)[0-9]+)([ -.]+([0-9]+|\([0-9]+\)))+|([0-9]{7,}|\([0-9]{7,}\)))|^(<?[-a-z0-9._+]+|{([-a-z0-9._+]+, ?)+[-a-z0-9._+]+})@[-a-z0-9._]+>?|^https?://|^www\.')
next_line_index = start + 1 + line_offset + 1
if (not country
and not other_contact_info_regex.search(column.lower())
and next_line_index < len(self.lines)):
next_line_lower = columnify(self.lines[next_line_index]).lower().strip()
if not next_line_lower or other_contact_info_regex.search(next_line_lower):
# country should be here, as the last
# part of the address, right before an
# empty line or other contact info
country = column.strip() or None
_debug(" Country: %s" % country)
_debug("3: authors[%s]: %s" % (i, authors[i]))
emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column)
if emailmatch and not "@" in author:
email = emailmatch.group(0).lower()
break
authors[i] = authors[i] + ( email, )
authors[i] = authors[i] + ( email, country)
else:
if not author in ignore:
companies[i] = authors[i]
@ -938,8 +973,8 @@ class Draft():
_debug(" * Final company list: %s" % (companies,))
_debug(" * Final companies_seen: %s" % (companies_seen,))
self._author_info = authors
self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,company in authors ] # pyflakes:ignore
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,company in authors ]
self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,country,company in authors ] # pyflakes:ignore
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,country,company in authors ]
self._authors.sort()
_debug(" * Final author list: " + ", ".join(self._authors))
_debug("-"*72)
@ -1159,10 +1194,10 @@ def getmeta(fn):
def _output(docname, fields, outfile=sys.stdout):
global company_domain
if opt_getauthors:
# Output an (incomplete!) getauthors-compatible format. Country
# information is always UNKNOWN, and information about security and
# iana sections presence is missing.
for full,first,middle,last,suffix,email,company in fields["_authorlist"]:
# Output an (incomplete!) getauthors-compatible format.
# Information about security and iana sections presence is
# missing.
for full,first,middle,last,suffix,email,country,company in fields["_authorlist"]:
if company in company_domain:
company = company_domain[company]
else:
@ -1173,7 +1208,7 @@ def _output(docname, fields, outfile=sys.stdout):
fields["name"] = full
fields["email"] = email
fields["company"] = company
fields["country"] = "UNKNOWN"
fields["country"] = country or "UNKNOWN"
try:
year, month, day = fields["doccreationdate"].split("-")
except ValueError:

View file

@ -1,14 +0,0 @@
from django.template.base import Library
from django.template.defaultfilters import stringfilter
from django_countries import countries
register = Library()
@register.filter(is_safe=True)
@stringfilter
def country_name(value):
"""
Converts country code to country name
"""
return dict(countries).get(value, "")