Add support for extracting the country line from the author addresses
to the draft parser (incorporating patch from trunk), store the extracted country instead of trying to turn it into an ISO country code, add country and continent name models and add initial data for those, add helper function for cleaning the countries, add author country and continent charts, move the affiliation models to stats/models.py, fix a bunch of bugs. - Legacy-Id: 12846
This commit is contained in:
parent
882579bab3
commit
b2ff10b0f2
|
@ -174,7 +174,8 @@ class BallotPositionDocEventAdmin(DocEventAdmin):
|
|||
admin.site.register(BallotPositionDocEvent, BallotPositionDocEventAdmin)
|
||||
|
||||
class DocumentAuthorAdmin(admin.ModelAdmin):
|
||||
list_display = ['id', 'document', 'person', 'email', 'affiliation', 'order']
|
||||
search_fields = [ 'document__name', 'person__name', 'email__address', 'affiliation']
|
||||
list_display = ['id', 'document', 'person', 'email', 'affiliation', 'country', 'order']
|
||||
search_fields = ['document__docalias__name', 'person__name', 'email__address', 'affiliation', 'country']
|
||||
raw_id_fields = ["document", "person", "email"]
|
||||
admin.site.register(DocumentAuthor, DocumentAuthorAdmin)
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django_countries.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
@ -49,7 +48,7 @@ class Migration(migrations.Migration):
|
|||
migrations.AddField(
|
||||
model_name='dochistoryauthor',
|
||||
name='country',
|
||||
field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2),
|
||||
field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255),
|
||||
),
|
||||
migrations.RenameField(
|
||||
model_name='dochistoryauthor',
|
||||
|
@ -74,7 +73,7 @@ class Migration(migrations.Migration):
|
|||
migrations.AddField(
|
||||
model_name='documentauthor',
|
||||
name='country',
|
||||
field=django_countries.fields.CountryField(blank=True, help_text=b'Country used by author for submission', max_length=2),
|
||||
field=models.CharField(blank=True, help_text=b'Country used by author for submission', max_length=255),
|
||||
),
|
||||
migrations.RenameField(
|
||||
model_name='documentauthor',
|
||||
|
|
|
@ -11,8 +11,6 @@ from django.contrib.contenttypes.models import ContentType
|
|||
from django.conf import settings
|
||||
from django.utils.html import mark_safe
|
||||
|
||||
from django_countries.fields import CountryField
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.group.models import Group
|
||||
|
@ -406,7 +404,7 @@ class DocumentAuthorInfo(models.Model):
|
|||
# email should only be null for some historic documents
|
||||
email = models.ForeignKey(Email, help_text="Email address used by author for submission", blank=True, null=True)
|
||||
affiliation = models.CharField(max_length=100, blank=True, help_text="Organization/company used by author for submission")
|
||||
country = CountryField(blank=True, help_text="Country used by author for submission")
|
||||
country = models.CharField(max_length=255, blank=True, help_text="Country used by author for submission")
|
||||
order = models.IntegerField(default=1)
|
||||
|
||||
def formatted_email(self):
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
from django.contrib import admin
|
||||
|
||||
from ietf.name.models import (
|
||||
BallotPositionName, ConstraintName, DBTemplateTypeName, DocRelationshipName,
|
||||
BallotPositionName, ConstraintName, ContinentName, CountryName,
|
||||
DBTemplateTypeName, DocRelationshipName,
|
||||
DocReminderTypeName, DocTagName, DocTypeName, DraftSubmissionStateName,
|
||||
FeedbackTypeName, FormalLanguageName, GroupMilestoneStateName, GroupStateName, GroupTypeName,
|
||||
IntendedStdLevelName, IprDisclosureStateName, IprEventTypeName, IprLicenseTypeName,
|
||||
|
@ -10,8 +11,11 @@ from ietf.name.models import (
|
|||
ReviewRequestStateName, ReviewResultName, ReviewTypeName, RoleName, RoomResourceName,
|
||||
SessionStatusName, StdLevelName, StreamName, TimeSlotTypeName, )
|
||||
|
||||
from ietf.stats.models import CountryAlias
|
||||
|
||||
class NameAdmin(admin.ModelAdmin):
|
||||
list_display = ["slug", "name", "desc", "used"]
|
||||
search_fields = ["slug", "name"]
|
||||
prepopulate_from = { "slug": ("name",) }
|
||||
|
||||
class DocRelationshipNameAdmin(NameAdmin):
|
||||
|
@ -26,8 +30,19 @@ class GroupTypeNameAdmin(NameAdmin):
|
|||
list_display = ["slug", "name", "verbose_name", "desc", "used"]
|
||||
admin.site.register(GroupTypeName, GroupTypeNameAdmin)
|
||||
|
||||
class CountryAliasInline(admin.TabularInline):
|
||||
model = CountryAlias
|
||||
extra = 1
|
||||
|
||||
class CountryNameAdmin(NameAdmin):
|
||||
list_display = ["slug", "name", "continent", "in_eu"]
|
||||
list_filter = ["continent", "in_eu"]
|
||||
inlines = [CountryAliasInline]
|
||||
admin.site.register(CountryName, CountryNameAdmin)
|
||||
|
||||
admin.site.register(BallotPositionName, NameAdmin)
|
||||
admin.site.register(ConstraintName, NameAdmin)
|
||||
admin.site.register(ContinentName, NameAdmin)
|
||||
admin.site.register(DBTemplateTypeName, NameAdmin)
|
||||
admin.site.register(DocReminderTypeName, NameAdmin)
|
||||
admin.site.register(DocTagName, NameAdmin)
|
||||
|
|
44
ietf/name/migrations/0019_continentname_countryname.py
Normal file
44
ietf/name/migrations/0019_continentname_countryname.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('name', '0018_add_formlang_names'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='ContinentName',
|
||||
fields=[
|
||||
('slug', models.CharField(max_length=32, serialize=False, primary_key=True)),
|
||||
('name', models.CharField(max_length=255)),
|
||||
('desc', models.TextField(blank=True)),
|
||||
('used', models.BooleanField(default=True)),
|
||||
('order', models.IntegerField(default=0)),
|
||||
],
|
||||
options={
|
||||
'ordering': ['order', 'name'],
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CountryName',
|
||||
fields=[
|
||||
('slug', models.CharField(max_length=32, serialize=False, primary_key=True)),
|
||||
('name', models.CharField(max_length=255)),
|
||||
('desc', models.TextField(blank=True)),
|
||||
('used', models.BooleanField(default=True)),
|
||||
('order', models.IntegerField(default=0)),
|
||||
('in_eu', models.BooleanField(default=False, verbose_name='In EU')),
|
||||
('continent', models.ForeignKey(to='name.ContinentName')),
|
||||
],
|
||||
options={
|
||||
'ordering': ['order', 'name'],
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
]
|
275
ietf/name/migrations/0020_add_country_continent_names.py
Normal file
275
ietf/name/migrations/0020_add_country_continent_names.py
Normal file
|
@ -0,0 +1,275 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
def insert_initial_country_continent_names(apps, schema_editor):
|
||||
ContinentName = apps.get_model("name", "ContinentName")
|
||||
africa, _ = ContinentName.objects.get_or_create(slug="africa", name="Africa")
|
||||
antarctica, _ = ContinentName.objects.get_or_create(slug="antarctica", name="Antarctica")
|
||||
asia, _ = ContinentName.objects.get_or_create(slug="asia", name="Asia")
|
||||
europe, _ = ContinentName.objects.get_or_create(slug="europe", name="Europe")
|
||||
north_america, _ = ContinentName.objects.get_or_create(slug="north-america", name="North America")
|
||||
oceania, _ = ContinentName.objects.get_or_create(slug="oceania", name="Oceania")
|
||||
south_america, _ = ContinentName.objects.get_or_create(slug="south-america", name="South America")
|
||||
|
||||
CountryName = apps.get_model("name", "CountryName")
|
||||
CountryName.objects.get_or_create(slug="AD", name=u"Andorra", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="AE", name=u"United Arab Emirates", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="AF", name=u"Afghanistan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="AG", name=u"Antigua and Barbuda", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="AI", name=u"Anguilla", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="AL", name=u"Albania", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="AM", name=u"Armenia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="AO", name=u"Angola", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="AQ", name=u"Antarctica", continent=antarctica)
|
||||
CountryName.objects.get_or_create(slug="AR", name=u"Argentina", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="AS", name=u"American Samoa", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="AT", name=u"Austria", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="AU", name=u"Australia", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="AW", name=u"Aruba", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="AX", name=u"Åland Islands", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="AZ", name=u"Azerbaijan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="BA", name=u"Bosnia and Herzegovina", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="BB", name=u"Barbados", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="BD", name=u"Bangladesh", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="BE", name=u"Belgium", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="BF", name=u"Burkina Faso", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="BG", name=u"Bulgaria", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="BH", name=u"Bahrain", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="BI", name=u"Burundi", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="BJ", name=u"Benin", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="BL", name=u"Saint Barthélemy", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="BM", name=u"Bermuda", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="BN", name=u"Brunei", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="BO", name=u"Bolivia", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="BQ", name=u"Bonaire, Sint Eustatius and Saba", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="BR", name=u"Brazil", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="BS", name=u"Bahamas", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="BT", name=u"Bhutan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="BV", name=u"Bouvet Island", continent=antarctica)
|
||||
CountryName.objects.get_or_create(slug="BW", name=u"Botswana", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="BY", name=u"Belarus", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="BZ", name=u"Belize", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="CA", name=u"Canada", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="CC", name=u"Cocos (Keeling) Islands", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="CD", name=u"Congo (the Democratic Republic of the)", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CF", name=u"Central African Republic", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CG", name=u"Congo", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CH", name=u"Switzerland", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="CI", name=u"Côte d'Ivoire", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CK", name=u"Cook Islands", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="CL", name=u"Chile", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="CM", name=u"Cameroon", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CN", name=u"China", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="CO", name=u"Colombia", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="CR", name=u"Costa Rica", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="CU", name=u"Cuba", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="CV", name=u"Cabo Verde", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="CW", name=u"Curaçao", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="CX", name=u"Christmas Island", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="CY", name=u"Cyprus", continent=asia, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="CZ", name=u"Czech Republic", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="DE", name=u"Germany", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="DJ", name=u"Djibouti", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="DK", name=u"Denmark", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="DM", name=u"Dominica", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="DO", name=u"Dominican Republic", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="DZ", name=u"Algeria", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="EC", name=u"Ecuador", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="EE", name=u"Estonia", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="EG", name=u"Egypt", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="EH", name=u"Western Sahara", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ER", name=u"Eritrea", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ES", name=u"Spain", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="ET", name=u"Ethiopia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="FI", name=u"Finland", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="FJ", name=u"Fiji", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="FK", name=u"Falkland Islands [Malvinas]", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="FM", name=u"Micronesia (Federated States of)", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="FO", name=u"Faroe Islands", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="FR", name=u"France", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="GA", name=u"Gabon", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GB", name=u"United Kingdom", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="GD", name=u"Grenada", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="GE", name=u"Georgia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="GF", name=u"French Guiana", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="GG", name=u"Guernsey", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="GH", name=u"Ghana", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GI", name=u"Gibraltar", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="GL", name=u"Greenland", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="GM", name=u"Gambia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GN", name=u"Guinea", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GP", name=u"Guadeloupe", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="GQ", name=u"Equatorial Guinea", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GR", name=u"Greece", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="GS", name=u"South Georgia and the South Sandwich Islands", continent=antarctica)
|
||||
CountryName.objects.get_or_create(slug="GT", name=u"Guatemala", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="GU", name=u"Guam", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="GW", name=u"Guinea-Bissau", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="GY", name=u"Guyana", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="HK", name=u"Hong Kong", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="HM", name=u"Heard Island and McDonald Islands", continent=antarctica)
|
||||
CountryName.objects.get_or_create(slug="HN", name=u"Honduras", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="HR", name=u"Croatia", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="HT", name=u"Haiti", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="HU", name=u"Hungary", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="ID", name=u"Indonesia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IE", name=u"Ireland", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="IL", name=u"Israel", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IM", name=u"Isle of Man", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="IN", name=u"India", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IO", name=u"British Indian Ocean Territory", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IQ", name=u"Iraq", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IR", name=u"Iran", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="IS", name=u"Iceland", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="IT", name=u"Italy", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="JE", name=u"Jersey", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="JM", name=u"Jamaica", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="JO", name=u"Jordan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="JP", name=u"Japan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KE", name=u"Kenya", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="KG", name=u"Kyrgyzstan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KH", name=u"Cambodia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KI", name=u"Kiribati", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="KM", name=u"Comoros", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="KN", name=u"Saint Kitts and Nevis", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="KP", name=u"North Korea", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KR", name=u"South Korea", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KW", name=u"Kuwait", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="KY", name=u"Cayman Islands", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="KZ", name=u"Kazakhstan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="LA", name=u"Laos", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="LB", name=u"Lebanon", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="LC", name=u"Saint Lucia", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="LI", name=u"Liechtenstein", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="LK", name=u"Sri Lanka", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="LR", name=u"Liberia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="LS", name=u"Lesotho", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="LT", name=u"Lithuania", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="LU", name=u"Luxembourg", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="LV", name=u"Latvia", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="LY", name=u"Libya", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MA", name=u"Morocco", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MC", name=u"Monaco", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="MD", name=u"Moldova", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="ME", name=u"Montenegro", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="MF", name=u"Saint Martin (French part)", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="MG", name=u"Madagascar", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MH", name=u"Marshall Islands", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="MK", name=u"Macedonia", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="ML", name=u"Mali", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MM", name=u"Myanmar", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="MN", name=u"Mongolia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="MO", name=u"Macao", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="MP", name=u"Northern Mariana Islands", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="MQ", name=u"Martinique", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="MR", name=u"Mauritania", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MS", name=u"Montserrat", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="MT", name=u"Malta", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="MU", name=u"Mauritius", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MV", name=u"Maldives", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="MW", name=u"Malawi", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="MX", name=u"Mexico", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="MY", name=u"Malaysia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="MZ", name=u"Mozambique", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="NA", name=u"Namibia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="NC", name=u"New Caledonia", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="NE", name=u"Niger", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="NF", name=u"Norfolk Island", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="NG", name=u"Nigeria", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="NI", name=u"Nicaragua", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="NL", name=u"Netherlands", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="NO", name=u"Norway", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="NP", name=u"Nepal", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="NR", name=u"Nauru", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="NU", name=u"Niue", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="NZ", name=u"New Zealand", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="OM", name=u"Oman", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="PA", name=u"Panama", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="PE", name=u"Peru", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="PF", name=u"French Polynesia", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="PG", name=u"Papua New Guinea", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="PH", name=u"Philippines", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="PK", name=u"Pakistan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="PL", name=u"Poland", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="PM", name=u"Saint Pierre and Miquelon", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="PN", name=u"Pitcairn", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="PR", name=u"Puerto Rico", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="PS", name=u"Palestine, State of", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="PT", name=u"Portugal", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="PW", name=u"Palau", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="PY", name=u"Paraguay", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="QA", name=u"Qatar", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="RE", name=u"Réunion", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="RO", name=u"Romania", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="RS", name=u"Serbia", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="RU", name=u"Russia", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="RW", name=u"Rwanda", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SA", name=u"Saudi Arabia", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="SB", name=u"Solomon Islands", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="SC", name=u"Seychelles", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SD", name=u"Sudan", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SE", name=u"Sweden", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="SG", name=u"Singapore", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="SH", name=u"Saint Helena, Ascension and Tristan da Cunha", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SI", name=u"Slovenia", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="SJ", name=u"Svalbard and Jan Mayen", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="SK", name=u"Slovakia", continent=europe, in_eu=True)
|
||||
CountryName.objects.get_or_create(slug="SL", name=u"Sierra Leone", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SM", name=u"San Marino", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="SN", name=u"Senegal", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SO", name=u"Somalia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SR", name=u"Suriname", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="SS", name=u"South Sudan", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ST", name=u"Sao Tome and Principe", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="SV", name=u"El Salvador", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="SX", name=u"Sint Maarten (Dutch part)", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="SY", name=u"Syria", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="SZ", name=u"Swaziland", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="TC", name=u"Turks and Caicos Islands", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="TD", name=u"Chad", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="TF", name=u"French Southern Territories", continent=antarctica)
|
||||
CountryName.objects.get_or_create(slug="TG", name=u"Togo", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="TH", name=u"Thailand", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="TJ", name=u"Tajikistan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="TK", name=u"Tokelau", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="TL", name=u"Timor-Leste", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="TM", name=u"Turkmenistan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="TN", name=u"Tunisia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="TO", name=u"Tonga", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="TR", name=u"Turkey", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="TT", name=u"Trinidad and Tobago", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="TV", name=u"Tuvalu", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="TW", name=u"Taiwan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="TZ", name=u"Tanzania", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="UA", name=u"Ukraine", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="UG", name=u"Uganda", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="UM", name=u"United States Minor Outlying Islands", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="US", name=u"United States of America", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="UY", name=u"Uruguay", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="UZ", name=u"Uzbekistan", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="VA", name=u"Holy See", continent=europe)
|
||||
CountryName.objects.get_or_create(slug="VC", name=u"Saint Vincent and the Grenadines", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="VE", name=u"Venezuela", continent=south_america)
|
||||
CountryName.objects.get_or_create(slug="VG", name=u"Virgin Islands (British)", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="VI", name=u"Virgin Islands (U.S.)", continent=north_america)
|
||||
CountryName.objects.get_or_create(slug="VN", name=u"Vietnam", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="VU", name=u"Vanuatu", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="WF", name=u"Wallis and Futuna", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="WS", name=u"Samoa", continent=oceania)
|
||||
CountryName.objects.get_or_create(slug="YE", name=u"Yemen", continent=asia)
|
||||
CountryName.objects.get_or_create(slug="YT", name=u"Mayotte", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ZA", name=u"South Africa", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ZM", name=u"Zambia", continent=africa)
|
||||
CountryName.objects.get_or_create(slug="ZW", name=u"Zimbabwe", continent=africa)
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('name', '0019_continentname_countryname'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(insert_initial_country_continent_names, migrations.RunPython.noop)
|
||||
]
|
|
@ -99,4 +99,10 @@ class ReviewResultName(NameModel):
|
|||
"""Almost ready, Has issues, Has nits, Not Ready,
|
||||
On the right track, Ready, Ready with issues,
|
||||
Ready with nits, Serious Issues"""
|
||||
class ContinentName(NameModel):
|
||||
"Africa, Antarctica, Asia, ..."
|
||||
class CountryName(NameModel):
|
||||
"Afghanistan, Aaland Islands, Albania, ..."
|
||||
continent = models.ForeignKey(ContinentName)
|
||||
in_eu = models.BooleanField(verbose_name="In EU", default=False)
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from ietf.name.models import (TimeSlotTypeName, GroupStateName, DocTagName, Inte
|
|||
LiaisonStatementTagName, FeedbackTypeName, LiaisonStatementState, StreamName,
|
||||
BallotPositionName, DBTemplateTypeName, NomineePositionStateName,
|
||||
ReviewRequestStateName, ReviewTypeName, ReviewResultName,
|
||||
FormalLanguageName)
|
||||
FormalLanguageName, ContinentName, CountryName)
|
||||
|
||||
|
||||
class TimeSlotTypeNameResource(ModelResource):
|
||||
|
@ -474,3 +474,38 @@ class FormalLanguageNameResource(ModelResource):
|
|||
}
|
||||
api.name.register(FormalLanguageNameResource())
|
||||
|
||||
|
||||
|
||||
class ContinentNameResource(ModelResource):
|
||||
class Meta:
|
||||
queryset = ContinentName.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'continentname'
|
||||
filtering = {
|
||||
"slug": ALL,
|
||||
"name": ALL,
|
||||
"desc": ALL,
|
||||
"used": ALL,
|
||||
"order": ALL,
|
||||
}
|
||||
api.name.register(ContinentNameResource())
|
||||
|
||||
class CountryNameResource(ModelResource):
|
||||
continent = ToOneField(ContinentNameResource, 'continent')
|
||||
class Meta:
|
||||
queryset = CountryName.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'countryname'
|
||||
filtering = {
|
||||
"slug": ALL,
|
||||
"name": ALL,
|
||||
"desc": ALL,
|
||||
"used": ALL,
|
||||
"order": ALL,
|
||||
"in_eu": ALL,
|
||||
"continent": ALL_WITH_RELATIONS,
|
||||
}
|
||||
api.name.register(CountryNameResource())
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from django.contrib import admin
|
||||
|
||||
|
||||
from ietf.person.models import Email, Alias, Person, AffiliationAlias, AffiliationIgnoredEnding
|
||||
from ietf.person.models import Email, Alias, Person
|
||||
from ietf.person.name import name_parts
|
||||
|
||||
class EmailAdmin(admin.ModelAdmin):
|
||||
|
@ -32,14 +32,3 @@ class PersonAdmin(admin.ModelAdmin):
|
|||
inlines = [ EmailInline, AliasInline, ]
|
||||
# actions = None
|
||||
admin.site.register(Person, PersonAdmin)
|
||||
|
||||
class AffiliationAliasAdmin(admin.ModelAdmin):
|
||||
list_filter = ["name"]
|
||||
list_display = ["alias", "name"]
|
||||
search_fields = ["alias", "name"]
|
||||
admin.site.register(AffiliationAlias, AffiliationAliasAdmin)
|
||||
|
||||
class AffiliationIgnoredEndingAdmin(admin.ModelAdmin):
|
||||
list_display = ["ending"]
|
||||
search_fields = ["ending"]
|
||||
admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin)
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('person', '0014_auto_20160613_0751'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='AffiliationAlias',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('alias', models.CharField(help_text=b'Note that aliases are matched without regarding case.', max_length=255)),
|
||||
('name', models.CharField(max_length=255)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='AffiliationIgnoredEnding',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('ending', models.CharField(max_length=255)),
|
||||
],
|
||||
),
|
||||
]
|
|
@ -1,29 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
def add_affiliation_info(apps, schema_editor):
|
||||
AffiliationAlias = apps.get_model("person", "AffiliationAlias")
|
||||
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems")
|
||||
|
||||
AffiliationIgnoredEnding = apps.get_model("person", "AffiliationIgnoredEnding")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('person', '0015_affiliationalias_affiliationignoredending'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(add_affiliation_info, migrations.RunPython.noop)
|
||||
]
|
|
@ -241,26 +241,3 @@ class Email(models.Model):
|
|||
return
|
||||
return self.address
|
||||
|
||||
|
||||
class AffiliationAlias(models.Model):
|
||||
"""Records that alias should be treated as name for statistical
|
||||
purposes."""
|
||||
|
||||
alias = models.CharField(max_length=255, help_text="Note that aliases are matched without regarding case.")
|
||||
name = models.CharField(max_length=255)
|
||||
|
||||
def __unicode__(self):
|
||||
return u"{} -> {}".format(self.alias, self.name)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
self.alias = self.alias.lower()
|
||||
super(AffiliationAlias, self).save(*args, **kwargs)
|
||||
|
||||
class AffiliationIgnoredEnding(models.Model):
|
||||
"""Records that ending should be stripped from the affiliation for statistical purposes."""
|
||||
|
||||
ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!")
|
||||
|
||||
def __unicode__(self):
|
||||
return self.ending
|
||||
|
||||
|
|
|
@ -6,8 +6,7 @@ from tastypie.cache import SimpleCache
|
|||
|
||||
from ietf import api
|
||||
|
||||
from ietf.person.models import (Person, Email, Alias, PersonHistory,
|
||||
AffiliationAlias, AffiliationIgnoredEnding)
|
||||
from ietf.person.models import (Person, Email, Alias, PersonHistory)
|
||||
|
||||
|
||||
from ietf.utils.resources import UserResource
|
||||
|
@ -82,29 +81,3 @@ class PersonHistoryResource(ModelResource):
|
|||
"user": ALL_WITH_RELATIONS,
|
||||
}
|
||||
api.person.register(PersonHistoryResource())
|
||||
|
||||
class AffiliationIgnoredEndingResource(ModelResource):
|
||||
class Meta:
|
||||
queryset = AffiliationIgnoredEnding.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'affiliationignoredending'
|
||||
filtering = {
|
||||
"id": ALL,
|
||||
"ending": ALL,
|
||||
}
|
||||
api.person.register(AffiliationIgnoredEndingResource())
|
||||
|
||||
class AffiliationAliasResource(ModelResource):
|
||||
class Meta:
|
||||
queryset = AffiliationAlias.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'affiliationalias'
|
||||
filtering = {
|
||||
"id": ALL,
|
||||
"alias": ALL,
|
||||
"name": ALL,
|
||||
}
|
||||
api.person.register(AffiliationAliasResource())
|
||||
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import pprint
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import User
|
||||
from ietf.person.models import Person, AffiliationAlias, AffiliationIgnoredEnding
|
||||
from ietf.person.models import Person
|
||||
|
||||
def merge_persons(source,target,stream):
|
||||
|
||||
|
@ -88,86 +86,3 @@ def merge_persons(source,target,stream):
|
|||
else:
|
||||
print >>stream, "Deleting Person: {}({})".format(source.ascii,source.pk)
|
||||
source.delete()
|
||||
|
||||
|
||||
def compile_affiliation_ending_stripping_regexp():
|
||||
parts = []
|
||||
for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True):
|
||||
try:
|
||||
re.compile(ending_re)
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
parts.append(ending_re)
|
||||
|
||||
re_str = ",? *({}) *$".format("|".join(parts))
|
||||
|
||||
return re.compile(re_str, re.IGNORECASE)
|
||||
|
||||
|
||||
def get_aliased_affiliations(affiliations):
|
||||
"""Given non-unique sequence of affiliations, returns dictionary with
|
||||
aliases needed.
|
||||
|
||||
We employ the following strategies, interleaved:
|
||||
|
||||
- Stripping company endings like Inc., GmbH etc. from database
|
||||
|
||||
- Looking up aliases stored directly in the database, like
|
||||
"Examplar International" -> "Examplar"
|
||||
|
||||
- Case-folding so Examplar and EXAMPLAR is merged with the
|
||||
winner being the one with most occurrences (so input should not
|
||||
be made unique) or most upper case letters in case of ties.
|
||||
Case folding can be overridden by the aliases in the database."""
|
||||
|
||||
res = {}
|
||||
|
||||
ending_re = compile_affiliation_ending_stripping_regexp()
|
||||
|
||||
known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") }
|
||||
|
||||
affiliations_with_case_spellings = defaultdict(set)
|
||||
case_spelling_count = defaultdict(int)
|
||||
for affiliation in affiliations:
|
||||
original_affiliation = affiliation
|
||||
|
||||
# check aliases from DB
|
||||
alias = known_aliases.get(affiliation.lower())
|
||||
if alias is not None:
|
||||
affiliation = alias
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
# strip ending
|
||||
alias = ending_re.sub("", affiliation)
|
||||
if alias != affiliation:
|
||||
affiliation = alias
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
# check aliases from DB
|
||||
alias = known_aliases.get(affiliation.lower())
|
||||
if alias is not None:
|
||||
affiliation = alias
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation)
|
||||
case_spelling_count[affiliation] += 1
|
||||
|
||||
def affiliation_sort_key(affiliation):
|
||||
count = case_spelling_count[affiliation]
|
||||
uppercase_letters = sum(1 for c in affiliation if c.isupper())
|
||||
return (count, uppercase_letters)
|
||||
|
||||
# now we just need to pick the most popular uppercase/lowercase
|
||||
# spelling for each affiliation with more than one
|
||||
for similar_affiliations in affiliations_with_case_spellings.itervalues():
|
||||
if len(similar_affiliations) > 1:
|
||||
most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0]
|
||||
for affiliation in similar_affiliations:
|
||||
if affiliation != most_popular:
|
||||
res[affiliation] = most_popular
|
||||
|
||||
return res
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -4,8 +4,6 @@ import os
|
|||
|
||||
from django import forms
|
||||
|
||||
from django_countries.fields import countries
|
||||
|
||||
from ietf.doc.models import Document, DocAlias, State
|
||||
from ietf.name.models import IntendedStdLevelName, DocRelationshipName
|
||||
from ietf.group.models import Group
|
||||
|
@ -107,7 +105,7 @@ class AuthorForm(forms.Form):
|
|||
person = forms.CharField(max_length=50,widget=forms.TextInput(attrs={'class':'name-autocomplete'}),help_text="To see a list of people type the first name, or last name, or both.")
|
||||
email = forms.CharField(widget=forms.Select(),help_text="Select an email.")
|
||||
affiliation = forms.CharField(max_length=100, required=False, help_text="Affiliation")
|
||||
country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False, help_text="Country")
|
||||
country = forms.CharField(max_length=255, required=False, help_text="Country")
|
||||
|
||||
# check for id within parenthesis to ensure name was selected from the list
|
||||
def clean_person(self):
|
||||
|
|
|
@ -293,7 +293,6 @@ INSTALLED_APPS = (
|
|||
'tastypie',
|
||||
'widget_tweaks',
|
||||
'django_markup',
|
||||
'django_countries',
|
||||
# IETF apps
|
||||
'ietf.api',
|
||||
'ietf.community',
|
||||
|
@ -315,6 +314,7 @@ INSTALLED_APPS = (
|
|||
'ietf.redirects',
|
||||
'ietf.release',
|
||||
'ietf.review',
|
||||
'ietf.stats',
|
||||
'ietf.submit',
|
||||
'ietf.sync',
|
||||
'ietf.utils',
|
||||
|
|
22
ietf/stats/admin.py
Normal file
22
ietf/stats/admin.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from django.contrib import admin
|
||||
|
||||
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias
|
||||
|
||||
|
||||
class AffiliationAliasAdmin(admin.ModelAdmin):
|
||||
list_filter = ["name"]
|
||||
list_display = ["alias", "name"]
|
||||
search_fields = ["alias", "name"]
|
||||
admin.site.register(AffiliationAlias, AffiliationAliasAdmin)
|
||||
|
||||
class AffiliationIgnoredEndingAdmin(admin.ModelAdmin):
|
||||
list_display = ["ending"]
|
||||
search_fields = ["ending"]
|
||||
admin.site.register(AffiliationIgnoredEnding, AffiliationIgnoredEndingAdmin)
|
||||
|
||||
class CountryAliasAdmin(admin.ModelAdmin):
|
||||
list_filter = ["country"]
|
||||
list_display = ["alias", "country"]
|
||||
search_fields = ["alias", "country__name"]
|
||||
admin.site.register(CountryAlias, CountryAliasAdmin)
|
||||
|
|
@ -26,7 +26,6 @@ args = parser.parse_args()
|
|||
|
||||
formal_language_dict = { l.pk: l for l in FormalLanguageName.objects.all() }
|
||||
|
||||
|
||||
docs_qs = Document.objects.filter(type="draft")
|
||||
|
||||
if args.document:
|
||||
|
@ -80,11 +79,20 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen
|
|||
for author in old_authors:
|
||||
for alias in author.person.alias_set.all():
|
||||
old_authors_by_name[alias.name] = author
|
||||
old_authors_by_name[author.person.plain_name()] = author
|
||||
|
||||
if author.email_id:
|
||||
old_authors_by_email[author.email_id] = author
|
||||
|
||||
for full, _, _, _, _, email, company in d.get_author_list():
|
||||
# the draft parser sometimes has a problem if affiliation
|
||||
# isn't in the second line, then it will report an extra
|
||||
# author - skip those
|
||||
seen = set()
|
||||
for full, _, _, _, _, email, country, company in d.get_author_list():
|
||||
if email in seen:
|
||||
continue
|
||||
seen.add(email)
|
||||
|
||||
old_author = None
|
||||
if email:
|
||||
old_author = old_authors_by_email.get(email)
|
||||
|
@ -92,15 +100,29 @@ for doc in docs_qs.prefetch_related("docalias_set", "formal_languages", "documen
|
|||
old_author = old_authors_by_name.get(full)
|
||||
|
||||
if not old_author:
|
||||
print "UNKNOWN AUTHOR", doc.name, full, email, company
|
||||
print "UNKNOWN AUTHOR", doc.name, full, email, country, company
|
||||
continue
|
||||
|
||||
if old_author.affiliation != company:
|
||||
print "new affiliation", old_author.affiliation, company
|
||||
print "new affiliation", canonical_name, "[", full, "]", old_author.affiliation, "->", company
|
||||
old_author.affiliation = company
|
||||
old_author.save(update_fields=["affiliation"])
|
||||
updated = True
|
||||
|
||||
if country is None:
|
||||
country = ""
|
||||
|
||||
try:
|
||||
country = country.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
country = country.decode("latin-1")
|
||||
|
||||
if old_author.country != country:
|
||||
print "new country", canonical_name ,"[", full, "]", old_author.country.encode("utf-8"), "->", country.encode("utf-8")
|
||||
old_author.country = country
|
||||
old_author.save(update_fields=["country"])
|
||||
updated = True
|
||||
|
||||
|
||||
if updates:
|
||||
Document.objects.filter(pk=doc.pk).update(**updates)
|
||||
|
|
37
ietf/stats/migrations/0001_initial.py
Normal file
37
ietf/stats/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('name', '0020_add_country_continent_names'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='AffiliationAlias',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('alias', models.CharField(help_text=b"Note that aliases will be matched case-insensitive and both before and after some clean-up.", max_length=255, unique=True)),
|
||||
('name', models.CharField(max_length=255)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='AffiliationIgnoredEnding',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('ending', models.CharField(help_text=b"Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!", max_length=255)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CountryAlias',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('alias', models.CharField(help_text=b"Note that aliases are matched case-insensitive if the length is > 2.", max_length=255)),
|
||||
('country', models.ForeignKey(to='name.CountryName', max_length=255)),
|
||||
],
|
||||
),
|
||||
]
|
87
ietf/stats/migrations/0002_add_initial_aliases.py
Normal file
87
ietf/stats/migrations/0002_add_initial_aliases.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
def add_affiliation_info(apps, schema_editor):
|
||||
AffiliationAlias = apps.get_model("stats", "AffiliationAlias")
|
||||
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco system", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco systems (india) private limited", name="Cisco Systems")
|
||||
AffiliationAlias.objects.get_or_create(alias="cisco systems india pvt", name="Cisco Systems")
|
||||
|
||||
AffiliationIgnoredEnding = apps.get_model("stats", "AffiliationIgnoredEnding")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="LLC\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="Ltd\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="Inc\.?")
|
||||
AffiliationIgnoredEnding.objects.get_or_create(ending="GmbH\.?")
|
||||
|
||||
CountryAlias = apps.get_model("stats", "CountryAlias")
|
||||
CountryAlias.objects.get_or_create(alias="russian federation", country_id="RU")
|
||||
CountryAlias.objects.get_or_create(alias="p. r. china", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="p.r. china", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="p.r.china", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="p.r china", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="p.r. of china", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="PRC", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="P.R.C", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="P.R.C.", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="beijing", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="shenzhen", country_id="CN")
|
||||
CountryAlias.objects.get_or_create(alias="R.O.C.", country_id="TW")
|
||||
CountryAlias.objects.get_or_create(alias="usa", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="UAS", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="USA.", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="u.s.a.", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="u. s. a.", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="u.s.a", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="u.s.", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="U.S", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="US of A", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="united sates", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="united state", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="united states", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="unites states", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="texas", country_id="US")
|
||||
CountryAlias.objects.get_or_create(alias="UK", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="united kingcom", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="great britain", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="england", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="U.K.", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="U.K", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="Uk", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="scotland", country_id="GB")
|
||||
CountryAlias.objects.get_or_create(alias="republic of korea", country_id="KR")
|
||||
CountryAlias.objects.get_or_create(alias="korea", country_id="KR")
|
||||
CountryAlias.objects.get_or_create(alias="korea rep", country_id="KR")
|
||||
CountryAlias.objects.get_or_create(alias="korea (the republic of)", country_id="KR")
|
||||
CountryAlias.objects.get_or_create(alias="the netherlands", country_id="NL")
|
||||
CountryAlias.objects.get_or_create(alias="netherland", country_id="NL")
|
||||
CountryAlias.objects.get_or_create(alias="danmark", country_id="DK")
|
||||
CountryAlias.objects.get_or_create(alias="sweeden", country_id="SE")
|
||||
CountryAlias.objects.get_or_create(alias="swede", country_id="SE")
|
||||
CountryAlias.objects.get_or_create(alias="belgique", country_id="BE")
|
||||
CountryAlias.objects.get_or_create(alias="madrid", country_id="ES")
|
||||
CountryAlias.objects.get_or_create(alias="espana", country_id="ES")
|
||||
CountryAlias.objects.get_or_create(alias="hellas", country_id="GR")
|
||||
CountryAlias.objects.get_or_create(alias="gemany", country_id="DE")
|
||||
CountryAlias.objects.get_or_create(alias="deutschland", country_id="DE")
|
||||
CountryAlias.objects.get_or_create(alias="italia", country_id="IT")
|
||||
CountryAlias.objects.get_or_create(alias="isreal", country_id="IL")
|
||||
CountryAlias.objects.get_or_create(alias="tel aviv", country_id="IL")
|
||||
CountryAlias.objects.get_or_create(alias="UAE", country_id="AE")
|
||||
CountryAlias.objects.get_or_create(alias="grand-duchy of luxembourg", country_id="LU")
|
||||
CountryAlias.objects.get_or_create(alias="brasil", country_id="BR")
|
||||
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('stats', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(add_affiliation_info, migrations.RunPython.noop)
|
||||
]
|
0
ietf/stats/migrations/__init__.py
Normal file
0
ietf/stats/migrations/__init__.py
Normal file
41
ietf/stats/models.py
Normal file
41
ietf/stats/models.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
from django.db import models
|
||||
from ietf.name.models import CountryName
|
||||
|
||||
class AffiliationAlias(models.Model):
|
||||
"""Records that alias should be treated as name for statistical
|
||||
purposes."""
|
||||
|
||||
alias = models.CharField(max_length=255, help_text="Note that aliases will be matched case-insensitive and both before and after some clean-up.", unique=True)
|
||||
name = models.CharField(max_length=255)
|
||||
|
||||
def __unicode__(self):
|
||||
return u"{} -> {}".format(self.alias, self.name)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
self.alias = self.alias.lower()
|
||||
super(AffiliationAlias, self).save(*args, **kwargs)
|
||||
|
||||
class Meta:
|
||||
verbose_name_plural = "affiliation aliases"
|
||||
|
||||
class AffiliationIgnoredEnding(models.Model):
|
||||
"""Records that ending should be stripped from the affiliation for statistical purposes."""
|
||||
|
||||
ending = models.CharField(max_length=255, help_text="Regexp with ending, e.g. 'Inc\\.?' - remember to escape .!")
|
||||
|
||||
def __unicode__(self):
|
||||
return self.ending
|
||||
|
||||
class CountryAlias(models.Model):
|
||||
"""Records that alias should be treated as country for statistical
|
||||
purposes."""
|
||||
|
||||
alias = models.CharField(max_length=255, help_text="Note that lower-case aliases are matched case-insensitive while aliases with at least one uppercase letter is matched case-sensitive.")
|
||||
country = models.ForeignKey(CountryName, max_length=255)
|
||||
|
||||
def __unicode__(self):
|
||||
return u"{} -> {}".format(self.alias, self.country.name)
|
||||
|
||||
class Meta:
|
||||
verbose_name_plural = "country aliases"
|
||||
|
52
ietf/stats/resources.py
Normal file
52
ietf/stats/resources.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
# Autogenerated by the makeresources management command 2017-02-15 10:10 PST
|
||||
from tastypie.resources import ModelResource
|
||||
from tastypie.fields import ToManyField # pyflakes:ignore
|
||||
from tastypie.constants import ALL, ALL_WITH_RELATIONS # pyflakes:ignore
|
||||
from tastypie.cache import SimpleCache
|
||||
|
||||
from ietf import api
|
||||
from ietf.api import ToOneField # pyflakes:ignore
|
||||
|
||||
from ietf.stats.models import CountryAlias, AffiliationIgnoredEnding, AffiliationAlias
|
||||
|
||||
|
||||
from ietf.name.resources import CountryNameResource
|
||||
class CountryAliasResource(ModelResource):
|
||||
country = ToOneField(CountryNameResource, 'country')
|
||||
class Meta:
|
||||
queryset = CountryAlias.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'countryalias'
|
||||
filtering = {
|
||||
"id": ALL,
|
||||
"alias": ALL,
|
||||
"country": ALL_WITH_RELATIONS,
|
||||
}
|
||||
api.stats.register(CountryAliasResource())
|
||||
|
||||
class AffiliationIgnoredEndingResource(ModelResource):
|
||||
class Meta:
|
||||
queryset = AffiliationIgnoredEnding.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'affiliationignoredending'
|
||||
filtering = {
|
||||
"id": ALL,
|
||||
"ending": ALL,
|
||||
}
|
||||
api.stats.register(AffiliationIgnoredEndingResource())
|
||||
|
||||
class AffiliationAliasResource(ModelResource):
|
||||
class Meta:
|
||||
queryset = AffiliationAlias.objects.all()
|
||||
serializer = api.Serializer()
|
||||
cache = SimpleCache()
|
||||
#resource_name = 'affiliationalias'
|
||||
filtering = {
|
||||
"id": ALL,
|
||||
"alias": ALL,
|
||||
"name": ALL,
|
||||
}
|
||||
api.stats.register(AffiliationAliasResource())
|
||||
|
|
@ -25,7 +25,8 @@ class StatisticsTests(TestCase):
|
|||
self.assertTrue(authors_url in r["Location"])
|
||||
|
||||
# check various stats types
|
||||
for stats_type in ["authors", "pages", "words", "format", "formlang", "author/documents", "author/affiliation"]:
|
||||
for stats_type in ["authors", "pages", "words", "format", "formlang",
|
||||
"author/documents", "author/affiliation", "author/country", "author/continent"]:
|
||||
for document_type in ["", "rfc", "draft"]:
|
||||
for time_choice in ["", "5y"]:
|
||||
url = urlreverse(ietf.stats.views.document_stats, kwargs={ "stats_type": stats_type })
|
||||
|
|
198
ietf/stats/utils.py
Normal file
198
ietf/stats/utils.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias
|
||||
from ietf.name.models import CountryName
|
||||
|
||||
def compile_affiliation_ending_stripping_regexp():
|
||||
parts = []
|
||||
for ending_re in AffiliationIgnoredEnding.objects.values_list("ending", flat=True):
|
||||
try:
|
||||
re.compile(ending_re)
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
parts.append(ending_re)
|
||||
|
||||
re_str = ",? *({}) *$".format("|".join(parts))
|
||||
|
||||
return re.compile(re_str, re.IGNORECASE)
|
||||
|
||||
|
||||
def get_aliased_affiliations(affiliations):
|
||||
"""Given non-unique sequence of affiliations, returns dictionary with
|
||||
aliases needed.
|
||||
|
||||
We employ the following strategies, interleaved:
|
||||
|
||||
- Stripping company endings like Inc., GmbH etc. from database
|
||||
|
||||
- Looking up aliases stored directly in the database, like
|
||||
"Examplar International" -> "Examplar"
|
||||
|
||||
- Case-folding so Examplar and EXAMPLAR is merged with the
|
||||
winner being the one with most occurrences (so input should not
|
||||
be made unique) or most upper case letters in case of ties.
|
||||
Case folding can be overridden by the aliases in the database."""
|
||||
|
||||
res = {}
|
||||
|
||||
ending_re = compile_affiliation_ending_stripping_regexp()
|
||||
|
||||
known_aliases = { alias.lower(): name for alias, name in AffiliationAlias.objects.values_list("alias", "name") }
|
||||
|
||||
affiliations_with_case_spellings = defaultdict(set)
|
||||
case_spelling_count = defaultdict(int)
|
||||
for affiliation in affiliations:
|
||||
original_affiliation = affiliation
|
||||
|
||||
# check aliases from DB
|
||||
name = known_aliases.get(affiliation.lower())
|
||||
if name is not None:
|
||||
affiliation = name
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
# strip ending
|
||||
name = ending_re.sub("", affiliation)
|
||||
if name != affiliation:
|
||||
affiliation = name
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
# check aliases from DB
|
||||
name = known_aliases.get(affiliation.lower())
|
||||
if name is not None:
|
||||
affiliation = name
|
||||
res[original_affiliation] = affiliation
|
||||
|
||||
affiliations_with_case_spellings[affiliation.lower()].add(original_affiliation)
|
||||
case_spelling_count[affiliation] += 1
|
||||
|
||||
def affiliation_sort_key(affiliation):
|
||||
count = case_spelling_count[affiliation]
|
||||
uppercase_letters = sum(1 for c in affiliation if c.isupper())
|
||||
return (count, uppercase_letters)
|
||||
|
||||
# now we just need to pick the most popular uppercase/lowercase
|
||||
# spelling for each affiliation with more than one
|
||||
for similar_affiliations in affiliations_with_case_spellings.itervalues():
|
||||
if len(similar_affiliations) > 1:
|
||||
most_popular = sorted(similar_affiliations, key=affiliation_sort_key, reverse=True)[0]
|
||||
for affiliation in similar_affiliations:
|
||||
if affiliation != most_popular:
|
||||
res[affiliation] = most_popular
|
||||
|
||||
return res
|
||||
|
||||
|
||||
|
||||
|
||||
def get_aliased_countries(countries):
|
||||
known_aliases = dict(CountryAlias.objects.values_list("alias", "country__name"))
|
||||
|
||||
iso_code_aliases = {}
|
||||
|
||||
# add aliases for known countries
|
||||
for slug, name in CountryName.objects.values_list("slug", "name"):
|
||||
if len(name) > 2:
|
||||
known_aliases[name.lower()] = name
|
||||
|
||||
if len(slug) == 2 and slug[0].isupper() and slug[1].isupper():
|
||||
iso_code_aliases[slug] = name # add ISO code
|
||||
|
||||
def lookup_alias(possible_alias):
|
||||
name = known_aliases.get(possible_alias)
|
||||
if name is not None:
|
||||
return name
|
||||
|
||||
name = known_aliases.get(possible_alias.lower())
|
||||
if name is not None:
|
||||
return name
|
||||
|
||||
return possible_alias
|
||||
|
||||
known_re_aliases = {
|
||||
re.compile(u"\\b{}\\b".format(re.escape(alias))): name
|
||||
for alias, name in known_aliases.iteritems()
|
||||
}
|
||||
|
||||
# specific hack: check for zip codes from the US since in the
|
||||
# early days, the addresses often didn't include the country
|
||||
us_zipcode_re = re.compile(r"\b(AL|AK|AZ|AR|CA|CO|CT|DE|DC|FL|GA|HI|ID|IL|IN|IA|KS|KY|LA|ME|MD|MA|MI|MN|MS|MO|MT|NE|NV|NH|NJ|NM|NY|NC|ND|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VT|VA|WA|WV|WI|WY|AS|GU|MP|PR|VI|UM|FM|MH|PW|Ca|Cal.|California|CALIFORNIA|Colorado|Georgia|Illinois|Ill|Maryland|Ma|Ma.|Mass|Massachuss?etts|Michigan|Minnesota|New Jersey|New York|Ny|N.Y.|North Carolina|NORTH CAROLINA|Ohio|Oregon|Pennsylvania|Tx|Texas|Tennessee|Utah|Vermont|Virginia|Va.|Washington)[., -]*[0-9]{5}\b")
|
||||
|
||||
us_country_name = CountryName.objects.get(slug="US").name
|
||||
|
||||
def last_text_part_stripped(split):
|
||||
for t in reversed(split):
|
||||
t = t.strip()
|
||||
if t:
|
||||
return t
|
||||
return u""
|
||||
|
||||
known_countries = set(CountryName.objects.values_list("name", flat=True))
|
||||
|
||||
res = {}
|
||||
|
||||
for country in countries:
|
||||
if country in res or country in known_countries:
|
||||
continue
|
||||
|
||||
original_country = country
|
||||
|
||||
# aliased name
|
||||
country = lookup_alias(country)
|
||||
if country in known_countries:
|
||||
res[original_country] = country
|
||||
continue
|
||||
|
||||
# contains US zipcode
|
||||
if us_zipcode_re.search(country):
|
||||
res[original_country] = us_country_name
|
||||
continue
|
||||
|
||||
# do a little bit of cleanup
|
||||
if len(country) > 1 and country[-1] == "." and not country[-2].isupper():
|
||||
country = country.rstrip(".")
|
||||
|
||||
country = country.strip("-,").strip()
|
||||
|
||||
# aliased name
|
||||
country = lookup_alias(country)
|
||||
if country in known_countries:
|
||||
res[original_country] = country
|
||||
continue
|
||||
|
||||
# country name at end, separated by comma
|
||||
last_part = lookup_alias(last_text_part_stripped(country.split(",")))
|
||||
if last_part in known_countries:
|
||||
res[original_country] = last_part
|
||||
continue
|
||||
|
||||
# country name at end, separated by whitespace
|
||||
last_part = lookup_alias(last_text_part_stripped(country.split()))
|
||||
if last_part in known_countries:
|
||||
res[original_country] = last_part
|
||||
continue
|
||||
|
||||
# country name anywhere
|
||||
country_lower = country.lower()
|
||||
found = False
|
||||
for alias_re, name in known_re_aliases.iteritems():
|
||||
if alias_re.search(country) or alias_re.search(country_lower):
|
||||
res[original_country] = name
|
||||
found = True
|
||||
break
|
||||
|
||||
if found:
|
||||
continue
|
||||
|
||||
# if everything else has failed, try ISO code
|
||||
country = iso_code_aliases.get(country, country)
|
||||
if country in known_countries:
|
||||
res[original_country] = country
|
||||
continue
|
||||
|
||||
# unknown country
|
||||
res[original_country] = ""
|
||||
|
||||
return res
|
||||
|
|
@ -23,9 +23,9 @@ from ietf.review.utils import (extract_review_request_data,
|
|||
from ietf.submit.models import Submission
|
||||
from ietf.group.models import Role, Group
|
||||
from ietf.person.models import Person
|
||||
from ietf.name.models import ReviewRequestStateName, ReviewResultName
|
||||
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName
|
||||
from ietf.doc.models import DocAlias, Document
|
||||
from ietf.person.utils import get_aliased_affiliations
|
||||
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries
|
||||
from ietf.ietfauth.utils import has_role
|
||||
|
||||
def stats_index(request):
|
||||
|
@ -139,6 +139,8 @@ def document_stats(request, stats_type=None):
|
|||
table_data = []
|
||||
stats_title = ""
|
||||
bin_size = 1
|
||||
alias_data = []
|
||||
eu_countries = None
|
||||
|
||||
|
||||
if any(stats_type == t[0] for t in possible_document_stats_types):
|
||||
|
@ -332,7 +334,7 @@ def document_stats(request, stats_type=None):
|
|||
if from_time:
|
||||
# this is actually faster than joining in the database,
|
||||
# despite the round-trip back and forth
|
||||
docs_within_time_constraint = list(Document.objects.filter(
|
||||
docs_within_time_constraint = set(Document.objects.filter(
|
||||
type="draft",
|
||||
docevent__time__gte=from_time,
|
||||
docevent__type__in=["published_rfc", "new_revision"],
|
||||
|
@ -349,7 +351,7 @@ def document_stats(request, stats_type=None):
|
|||
else:
|
||||
doc_label = "document"
|
||||
|
||||
total_persons = person_qs.count()
|
||||
total_persons = person_qs.distinct().count()
|
||||
|
||||
if stats_type == "author/documents":
|
||||
stats_title = "Number of {}s per author".format(doc_label)
|
||||
|
@ -402,6 +404,86 @@ def document_stats(request, stats_type=None):
|
|||
"animation": False,
|
||||
})
|
||||
|
||||
for alias, name in sorted(aliases.iteritems(), key=lambda t: t[1]):
|
||||
alias_data.append((name, alias))
|
||||
|
||||
elif stats_type == "author/country":
|
||||
stats_title = "Number of {} authors per country".format(doc_label)
|
||||
|
||||
bins = defaultdict(list)
|
||||
|
||||
# Since people don't write the country names in the
|
||||
# same way, and we don't want to go back and edit them
|
||||
# either, we transform them here.
|
||||
|
||||
name_country_set = set((name, country)
|
||||
for name, country in person_qs.values_list("name", "documentauthor__country"))
|
||||
|
||||
aliases = get_aliased_countries(country for _, country in name_country_set)
|
||||
|
||||
countries = { c.name: c for c in CountryName.objects.all() }
|
||||
eu_name = "EU"
|
||||
eu_countries = set(c for c in countries.itervalues() if c.in_eu)
|
||||
|
||||
for name, country in name_country_set:
|
||||
country_name = aliases.get(country, country)
|
||||
bins[country_name].append(name)
|
||||
|
||||
c = countries.get(country_name)
|
||||
if c and c.in_eu:
|
||||
bins[eu_name].append(name)
|
||||
|
||||
series_data = []
|
||||
for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
|
||||
percentage = len(names) * 100.0 / total_persons
|
||||
if country:
|
||||
series_data.append((country, len(names)))
|
||||
table_data.append((country, percentage, names))
|
||||
|
||||
series_data.sort(key=lambda t: t[1], reverse=True)
|
||||
series_data = series_data[:30]
|
||||
|
||||
chart_data.append({
|
||||
"data": series_data,
|
||||
"animation": False,
|
||||
})
|
||||
|
||||
for alias, country_name in aliases.iteritems():
|
||||
alias_data.append((country_name, alias, countries.get(country_name)))
|
||||
|
||||
alias_data.sort()
|
||||
|
||||
elif stats_type == "author/continent":
|
||||
stats_title = "Number of {} authors per continent".format(doc_label)
|
||||
|
||||
bins = defaultdict(list)
|
||||
|
||||
name_country_set = set((name, country)
|
||||
for name, country in person_qs.values_list("name", "documentauthor__country"))
|
||||
|
||||
aliases = get_aliased_countries(country for _, country in name_country_set)
|
||||
|
||||
country_to_continent = dict(CountryName.objects.values_list("name", "continent__name"))
|
||||
|
||||
for name, country in name_country_set:
|
||||
country_name = aliases.get(country, country)
|
||||
continent_name = country_to_continent.get(country_name, "")
|
||||
bins[continent_name].append(name)
|
||||
|
||||
series_data = []
|
||||
for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
|
||||
percentage = len(names) * 100.0 / total_persons
|
||||
if continent:
|
||||
series_data.append((continent, len(names)))
|
||||
table_data.append((continent, percentage, names))
|
||||
|
||||
series_data.sort(key=lambda t: t[1], reverse=True)
|
||||
|
||||
chart_data.append({
|
||||
"data": series_data,
|
||||
"animation": False,
|
||||
})
|
||||
|
||||
|
||||
return render(request, "stats/document_stats.html", {
|
||||
"chart_data": mark_safe(json.dumps(chart_data)),
|
||||
|
@ -416,6 +498,10 @@ def document_stats(request, stats_type=None):
|
|||
"time_choice": time_choice,
|
||||
"doc_label": doc_label,
|
||||
"bin_size": bin_size,
|
||||
"show_aliases_url": build_document_stats_url(get_overrides={ "showaliases": "1" }),
|
||||
"hide_aliases_url": build_document_stats_url(get_overrides={ "showaliases": None }),
|
||||
"alias_data": alias_data,
|
||||
"eu_countries": sorted(eu_countries or [], key=lambda c: c.name),
|
||||
"content_template": "stats/document_stats_{}.html".format(stats_type.replace("/", "_")),
|
||||
})
|
||||
|
||||
|
|
|
@ -12,8 +12,6 @@ from django.conf import settings
|
|||
from django.utils.html import mark_safe
|
||||
from django.core.urlresolvers import reverse as urlreverse
|
||||
|
||||
from django_countries.fields import countries
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.doc.models import Document
|
||||
|
@ -32,15 +30,6 @@ from ietf.submit.parsers.ps_parser import PSParser
|
|||
from ietf.submit.parsers.xml_parser import XMLParser
|
||||
from ietf.utils.draft import Draft
|
||||
|
||||
def clean_country(country):
|
||||
country = country.upper()
|
||||
for code, name in countries:
|
||||
if country == code:
|
||||
return code
|
||||
if country == name.upper():
|
||||
return code
|
||||
return "" # unknown
|
||||
|
||||
class SubmissionUploadForm(forms.Form):
|
||||
txt = forms.FileField(label=u'.txt format', required=False)
|
||||
xml = forms.FileField(label=u'.xml format', required=False)
|
||||
|
@ -194,7 +183,7 @@ class SubmissionUploadForm(forms.Form):
|
|||
"name": author.attrib.get('fullname'),
|
||||
"email": author.findtext('address/email'),
|
||||
"affiliation": author.findtext('organization'),
|
||||
"country": clean_country(author.findtext('address/postal/country')),
|
||||
"country": author.findtext('address/postal/country'),
|
||||
})
|
||||
except forms.ValidationError:
|
||||
raise
|
||||
|
@ -348,7 +337,7 @@ class NameEmailForm(forms.Form):
|
|||
|
||||
class AuthorForm(NameEmailForm):
|
||||
affiliation = forms.CharField(max_length=100, required=False)
|
||||
country = forms.ChoiceField(choices=[('', "(Not specified)")] + list(countries), required=False)
|
||||
country = forms.CharField(max_length=255, required=False)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AuthorForm, self).__init__(*args, **kwargs)
|
||||
|
|
|
@ -81,7 +81,7 @@ def upload_submission(request):
|
|||
# If we don't have an xml file, try to extract the
|
||||
# relevant information from the text file
|
||||
for author in form.parsed_draft.get_author_list():
|
||||
full_name, first_name, middle_initial, last_name, name_suffix, email, company = author
|
||||
full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author
|
||||
|
||||
name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip()
|
||||
|
||||
|
@ -114,7 +114,7 @@ def upload_submission(request):
|
|||
"name": name,
|
||||
"email": email,
|
||||
"affiliation": company,
|
||||
# FIXME: missing country
|
||||
"country": country
|
||||
})
|
||||
|
||||
if form.abstract:
|
||||
|
|
|
@ -57,3 +57,44 @@
|
|||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>Some authors are authors of multiple documents with different
|
||||
affiliation information associated, so the sum of multiple rows in the
|
||||
table can be more than 100%.</p>
|
||||
|
||||
|
||||
|
||||
<h3>Affiliation Aliases</h3>
|
||||
|
||||
<p>In generating the above statistics, some heuristics have been applied to determine the affiliation of each author.</p>
|
||||
|
||||
{% if request.GET.showaliases %}
|
||||
<p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p>
|
||||
|
||||
{% if request.user.is_staff %}
|
||||
<p>Note: since you're an admin, you can <a href="{% url "admin:stats_affiliationalias_add" %}">add an extra known alias</a> or see the <a href="{% url "admin:stats_affiliationalias_changelist" %}">existing known aliases</a> and <a href="{% url "admin:stats_affiliationignoredending_changelist" %}">generally ignored endings</a>.</p>
|
||||
{% endif %}
|
||||
|
||||
{% if alias_data %}
|
||||
<table class="table table-condensed">
|
||||
<thead>
|
||||
<th>Affiliation</th>
|
||||
<th>Alias</th>
|
||||
</thead>
|
||||
|
||||
{% for name, alias in alias_data %}
|
||||
<tr>
|
||||
<td>
|
||||
{% ifchanged %}
|
||||
{{ name|default:"(unknown)" }}
|
||||
{% endifchanged %}
|
||||
</td>
|
||||
<td>{{ alias }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
<p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p>
|
||||
{% endif %}
|
||||
|
|
65
ietf/templates/stats/document_stats_author_continent.html
Normal file
65
ietf/templates/stats/document_stats_author_continent.html
Normal file
|
@ -0,0 +1,65 @@
|
|||
<h3>{{ stats_title }}</h3>
|
||||
|
||||
<div id="chart"></div>
|
||||
|
||||
<script>
|
||||
var chartConf = {
|
||||
chart: {
|
||||
type: 'column'
|
||||
},
|
||||
title: {
|
||||
text: '{{ stats_title|escapejs }}'
|
||||
},
|
||||
xAxis: {
|
||||
type: "category",
|
||||
title: {
|
||||
text: 'Continent'
|
||||
}
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
text: 'Number of authors'
|
||||
}
|
||||
},
|
||||
tooltip: {
|
||||
formatter: function () {
|
||||
var s = '<b>' + this.points[0].key + '</b>';
|
||||
|
||||
$.each(this.points, function () {
|
||||
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
|
||||
});
|
||||
|
||||
return s;
|
||||
},
|
||||
shared: true
|
||||
},
|
||||
series: {{ chart_data }}
|
||||
};
|
||||
</script>
|
||||
|
||||
<h3>Data</h3>
|
||||
|
||||
<table class="table table-condensed stats-data">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Continent</th>
|
||||
<th>Percentage of authors</th>
|
||||
<th>Authors</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for continent, percentage, names in table_data %}
|
||||
<tr>
|
||||
<td>{{ continent|default:"(unknown)" }}</td>
|
||||
<td>{{ percentage|floatformat:2 }}%</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>The country information for an author can vary between documents,
|
||||
so the sum of the rows in the table can be more than 100%. This
|
||||
is especially true for the row with unknown continent information -
|
||||
many authors may have one or more author entries with an
|
||||
unrecognized country.</p>
|
124
ietf/templates/stats/document_stats_author_country.html
Normal file
124
ietf/templates/stats/document_stats_author_country.html
Normal file
|
@ -0,0 +1,124 @@
|
|||
<h3>{{ stats_title }}</h3>
|
||||
|
||||
<div id="chart"></div>
|
||||
|
||||
<script>
|
||||
var chartConf = {
|
||||
chart: {
|
||||
type: 'column'
|
||||
},
|
||||
title: {
|
||||
text: '{{ stats_title|escapejs }}'
|
||||
},
|
||||
xAxis: {
|
||||
type: "category",
|
||||
title: {
|
||||
text: 'Country'
|
||||
}
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
text: 'Number of authors'
|
||||
}
|
||||
},
|
||||
tooltip: {
|
||||
formatter: function () {
|
||||
var s = '<b>' + this.points[0].key + '</b>';
|
||||
|
||||
$.each(this.points, function () {
|
||||
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
|
||||
});
|
||||
|
||||
return s;
|
||||
},
|
||||
shared: true
|
||||
},
|
||||
series: {{ chart_data }}
|
||||
};
|
||||
</script>
|
||||
|
||||
<h3>Data</h3>
|
||||
|
||||
<table class="table table-condensed stats-data">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Country</th>
|
||||
<th>Percentage of authors</th>
|
||||
<th>Authors</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for country, percentage, names in table_data %}
|
||||
<tr>
|
||||
<td>{{ country|default:"(unknown)" }}</td>
|
||||
<td>{{ percentage|floatformat:2 }}%</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>The country information for an author can vary between documents,
|
||||
so the sum of multiple rows in the table can be more than 100%. This
|
||||
is especially true for the row with unknown country information -
|
||||
many authors may have one or more author entries with an
|
||||
unrecognized country.</p>
|
||||
|
||||
<p>An author is counted in EU if the country is a member of the EU
|
||||
now, even if that was not the case at publication.
|
||||
EU members:
|
||||
{% for c in eu_countries %}{{ c.name }}{% if not forloop.last %}, {% endif %}{% endfor %}.</p>
|
||||
|
||||
<h3>Country Aliases</h3>
|
||||
|
||||
<p>In generating the above statistics, some heuristics have been
|
||||
applied to figure out which country each author is from.</p>
|
||||
|
||||
{% if request.GET.showaliases %}
|
||||
<p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p>
|
||||
|
||||
{% if request.user.is_staff %}
|
||||
<p>Note: since you're an admin, some extra links are visible. You
|
||||
can either correct a document author entry directly in case the
|
||||
information is obviously missing or add an alias if an unknown
|
||||
<a href="{% url "admin:name_countryname_changelist" %}">country name</a>
|
||||
is being used.
|
||||
</p>
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if alias_data %}
|
||||
<table class="table table-condensed">
|
||||
<thead>
|
||||
<th>Country</th>
|
||||
<th>Alias</th>
|
||||
<th></th>
|
||||
</thead>
|
||||
|
||||
{% for name, alias, country in alias_data %}
|
||||
<tr>
|
||||
<td>
|
||||
{% ifchanged %}
|
||||
{% if country and request.user.is_staff %}
|
||||
<a href="{% url "admin:name_countryname_change" country.pk %}">
|
||||
{% endif %}
|
||||
{{ name|default:"(unknown)" }}
|
||||
{% if country and request.user.is_staff %}
|
||||
</a>
|
||||
{% endif %}
|
||||
{% endifchanged %}
|
||||
</td>
|
||||
<td>{{ alias }}</td>
|
||||
<td>
|
||||
{% if request.user.is_staff and name != "EU" %}
|
||||
<a href="{% url "admin:doc_documentauthor_changelist" %}?country={{ alias|urlencode }}">Matching authors</a>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
<p><a href="{{ show_aliases_url }}" class="btn btn-default">Show generated aliases</a></p>
|
||||
{% endif %}
|
|
@ -2,7 +2,7 @@
|
|||
{# Copyright The IETF Trust 2015, All Rights Reserved #}
|
||||
{% load origin %}
|
||||
{% load staticfiles %}
|
||||
{% load ietf_filters submit_tags country %}
|
||||
{% load ietf_filters submit_tags %}
|
||||
|
||||
{% block title %}Submission status of {{ submission.name }}-{{ submission.rev }}{% endblock %}
|
||||
|
||||
|
@ -207,8 +207,8 @@
|
|||
<th>Author {{ forloop.counter }}</th>
|
||||
<td>
|
||||
{{ author.name }} {% if author.email %}<{{ author.email }}>{% endif %}
|
||||
{% if author.affiliation %}- {{ author.affiliation }}{% endif %}
|
||||
{% if author.country %}- {{ author.country|country_name }}{% endif %}
|
||||
- {% if author.affiliation %}{{ author.affiliation }}{% else %}<i>unknown affiliation</i>{% endif %}
|
||||
- {% if author.country %}{{ author.country }}{% else %}<i>unknown country</i>{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
|
|
@ -196,6 +196,7 @@ class Draft():
|
|||
line = ""
|
||||
newpage = False
|
||||
sentence = False
|
||||
shortline = False
|
||||
blankcount = 0
|
||||
linecount = 0
|
||||
# two functions with side effects
|
||||
|
@ -262,7 +263,7 @@ class Draft():
|
|||
sentence = True
|
||||
if re.search("[^ \t]", line):
|
||||
if newpage:
|
||||
if sentence:
|
||||
if sentence or shortline:
|
||||
stripped += [""]
|
||||
else:
|
||||
if blankcount:
|
||||
|
@ -270,6 +271,7 @@ class Draft():
|
|||
blankcount = 0
|
||||
sentence = False
|
||||
newpage = False
|
||||
shortline = len(line.strip()) < 18
|
||||
if re.search("[.:]$", line):
|
||||
sentence = True
|
||||
if re.search("^[ \t]*$", line):
|
||||
|
@ -847,7 +849,8 @@ class Draft():
|
|||
nonblank_count = 0
|
||||
blanklines = 0
|
||||
email = None
|
||||
for line in self.lines[start+1:]:
|
||||
country = None
|
||||
for line_offset, line in enumerate(self.lines[start+1:]):
|
||||
_debug( " " + line.strip())
|
||||
# Break on the second blank line
|
||||
if not line:
|
||||
|
@ -887,15 +890,18 @@ class Draft():
|
|||
else:
|
||||
pass
|
||||
|
||||
def columnify(l):
|
||||
try:
|
||||
column = line[beg:end].strip()
|
||||
column = l.replace('\t', 8 * ' ')[max(0, beg - 1):end].strip()
|
||||
except:
|
||||
column = line
|
||||
column = re.sub(" *\(at\) *", "@", column)
|
||||
column = re.sub(" *\(dot\) *", ".", column)
|
||||
column = re.sub(" +at +", "@", column)
|
||||
column = re.sub(" +dot +", ".", column)
|
||||
column = l
|
||||
column = re.sub(" *(?:\(at\)| <at> | at ) *", "@", column)
|
||||
column = re.sub(" *(?:\(dot\)| <dot> | dot ) *", ".", column)
|
||||
column = re.sub("&cisco.com", "@cisco.com", column)
|
||||
column = column.replace("\xa0", " ")
|
||||
return column
|
||||
|
||||
column = columnify(line)
|
||||
|
||||
# if re.search("^\w+: \w+", column):
|
||||
# keyword = True
|
||||
|
@ -906,13 +912,42 @@ class Draft():
|
|||
# break
|
||||
|
||||
#_debug( " Column text :: " + column)
|
||||
if nonblank_count >= 2 and blanklines == 0:
|
||||
# Usually, the contact info lines will look
|
||||
# like this: "Email: someone@example.com" or
|
||||
# "Tel: +1 (412)-2390 23123", but sometimes
|
||||
# the : is left out. That's okay for things we
|
||||
# can't misinterpret, but "tel" may match "Tel
|
||||
# Aviv 69710, Israel" so match
|
||||
# - misc contact info
|
||||
# - tel/fax [number]
|
||||
# - [phone number]
|
||||
# - [email]
|
||||
|
||||
other_contact_info_regex = re.compile(r'^(((contact )?e|\(e|e-|m|electronic )?mail|email_id|mailto|e-main|(tele)?phone|voice|mobile|work|uri|url|tel:)\b|^((ph|tel\.?|telefax|fax) *[:.]? *\(?( ?\+ ?)?[0-9]+)|^(\++[0-9]+|\(\+*[0-9]+\)|\(dsn\)|[0-9]+)([ -.]*\b|\b[ -.]*)(([0-9]{2,}|\([0-9]{2,}\)|(\([0-9]\)|[0-9])[ -][0-9]{2,}|\([0-9]\)[0-9]+)([ -.]+([0-9]+|\([0-9]+\)))+|([0-9]{7,}|\([0-9]{7,}\)))|^(<?[-a-z0-9._+]+|{([-a-z0-9._+]+, ?)+[-a-z0-9._+]+})@[-a-z0-9._]+>?|^https?://|^www\.')
|
||||
next_line_index = start + 1 + line_offset + 1
|
||||
|
||||
if (not country
|
||||
and not other_contact_info_regex.search(column.lower())
|
||||
and next_line_index < len(self.lines)):
|
||||
|
||||
next_line_lower = columnify(self.lines[next_line_index]).lower().strip()
|
||||
|
||||
if not next_line_lower or other_contact_info_regex.search(next_line_lower):
|
||||
# country should be here, as the last
|
||||
# part of the address, right before an
|
||||
# empty line or other contact info
|
||||
country = column.strip() or None
|
||||
_debug(" Country: %s" % country)
|
||||
|
||||
_debug("3: authors[%s]: %s" % (i, authors[i]))
|
||||
|
||||
emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column)
|
||||
if emailmatch and not "@" in author:
|
||||
email = emailmatch.group(0).lower()
|
||||
break
|
||||
authors[i] = authors[i] + ( email, )
|
||||
|
||||
authors[i] = authors[i] + ( email, country)
|
||||
else:
|
||||
if not author in ignore:
|
||||
companies[i] = authors[i]
|
||||
|
@ -938,8 +973,8 @@ class Draft():
|
|||
_debug(" * Final company list: %s" % (companies,))
|
||||
_debug(" * Final companies_seen: %s" % (companies_seen,))
|
||||
self._author_info = authors
|
||||
self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,company in authors ] # pyflakes:ignore
|
||||
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,company in authors ]
|
||||
self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,country,company in authors ] # pyflakes:ignore
|
||||
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,country,company in authors ]
|
||||
self._authors.sort()
|
||||
_debug(" * Final author list: " + ", ".join(self._authors))
|
||||
_debug("-"*72)
|
||||
|
@ -1159,10 +1194,10 @@ def getmeta(fn):
|
|||
def _output(docname, fields, outfile=sys.stdout):
|
||||
global company_domain
|
||||
if opt_getauthors:
|
||||
# Output an (incomplete!) getauthors-compatible format. Country
|
||||
# information is always UNKNOWN, and information about security and
|
||||
# iana sections presence is missing.
|
||||
for full,first,middle,last,suffix,email,company in fields["_authorlist"]:
|
||||
# Output an (incomplete!) getauthors-compatible format.
|
||||
# Information about security and iana sections presence is
|
||||
# missing.
|
||||
for full,first,middle,last,suffix,email,country,company in fields["_authorlist"]:
|
||||
if company in company_domain:
|
||||
company = company_domain[company]
|
||||
else:
|
||||
|
@ -1173,7 +1208,7 @@ def _output(docname, fields, outfile=sys.stdout):
|
|||
fields["name"] = full
|
||||
fields["email"] = email
|
||||
fields["company"] = company
|
||||
fields["country"] = "UNKNOWN"
|
||||
fields["country"] = country or "UNKNOWN"
|
||||
try:
|
||||
year, month, day = fields["doccreationdate"].split("-")
|
||||
except ValueError:
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
from django.template.base import Library
|
||||
from django.template.defaultfilters import stringfilter
|
||||
|
||||
from django_countries import countries
|
||||
|
||||
register = Library()
|
||||
|
||||
@register.filter(is_safe=True)
|
||||
@stringfilter
|
||||
def country_name(value):
|
||||
"""
|
||||
Converts country code to country name
|
||||
"""
|
||||
return dict(countries).get(value, "")
|
Loading…
Reference in a new issue