diff --git a/ietf/nomcom/utils.py b/ietf/nomcom/utils.py index 21d2b17cc..5929d90d9 100644 --- a/ietf/nomcom/utils.py +++ b/ietf/nomcom/utils.py @@ -20,9 +20,9 @@ from ietf.dbtemplate.models import DBTemplate from ietf.person.models import Email, Person from ietf.mailtrigger.utils import gather_address_lists from ietf.utils.pipe import pipe -from unidecode import unidecode from ietf.utils.mail import send_mail_text, send_mail from ietf.utils.log import log +from ietf.utils.text import unidecode_name import debug # pyflakes:ignore @@ -365,7 +365,7 @@ def make_nomineeposition_for_newperson(nomcom, candidate_name, candidate_email, # This is expected to fail if called with an existing email address email = Email.objects.create(address=candidate_email) person = Person.objects.create(name=candidate_name, - ascii=unidecode(candidate_name), + ascii=unidecode_name(candidate_name), address=candidate_email) email.person = person email.save() diff --git a/ietf/person/factories.py b/ietf/person/factories.py index 014c741b9..20be62f14 100644 --- a/ietf/person/factories.py +++ b/ietf/person/factories.py @@ -13,6 +13,8 @@ from django.utils.text import slugify import debug # pyflakes:ignore from ietf.person.models import Person, Alias, Email +from ietf.utils.text import unidecode_name + fake = faker.Factory.create() @@ -39,7 +41,7 @@ class PersonFactory(factory.DjangoModelFactory): user = factory.SubFactory(UserFactory) name = factory.LazyAttribute(lambda p: u'%s %s'%(p.user.first_name,p.user.last_name)) - ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip())) + ascii = factory.LazyAttribute(lambda p: unicode(unidecode_name(p.name))) class Params: with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs())) diff --git a/ietf/person/models.py b/ietf/person/models.py index d4e3c6924..8988e458a 100644 --- a/ietf/person/models.py +++ b/ietf/person/models.py @@ -4,7 +4,6 @@ import datetime import email.utils import email.header from hashids import Hashids -from unidecode import unidecode from urlparse import urljoin from django.conf import settings @@ -21,6 +20,7 @@ from ietf.person.name import name_parts, initials, plain_name from ietf.utils.mail import send_mail_preformatted from ietf.utils.storage import NoLocationMigrationFileSystemStorage from ietf.utils.mail import formataddr +from ietf.utils.text import unidecode_name class PersonInfo(models.Model): @@ -61,18 +61,18 @@ class PersonInfo(models.Model): # we're validating the content of the ascii field, and have # verified that the field is ascii clean in the database: if not all(ord(c) < 128 for c in self.ascii): - self._cached_ascii_name = unidecode(self.ascii).strip() + self._cached_ascii_name = unidecode_name(self.ascii) else: self._cached_ascii_name = self.ascii else: - self._cached_ascii_name = unidecode(self.plain_name()).strip() + self._cached_ascii_name = unidecode_name(self.plain_name()) return self._cached_ascii_name def plain_ascii(self): if not hasattr(self, '_cached_plain_ascii'): if self.ascii: - ascii = unidecode(self.ascii).strip() + ascii = unidecode_name(self.ascii) else: - ascii = unidecode(self.name).strip() + ascii = unidecode_name(self.name) prefix, first, middle, last, suffix = name_parts(ascii) self._cached_plain_ascii = u" ".join([first, last]) return self._cached_plain_ascii diff --git a/ietf/review/import_from_review_tool.py b/ietf/review/import_from_review_tool.py index 6f8f931aa..f91aa50ce 100755 --- a/ietf/review/import_from_review_tool.py +++ b/ietf/review/import_from_review_tool.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import sys, os +import argparse # boilerplate basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) @@ -24,8 +25,7 @@ from ietf.person.models import Person, Email, Alias from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State from ietf.utils.text import strip_prefix, xslugify from ietf.review.utils import possibly_advance_next_reviewer_for_team -import argparse -from unidecode import unidecode +from ietf.utils.text import unidecode_name parser = argparse.ArgumentParser() parser.add_argument("database", help="database must be included in settings") @@ -92,7 +92,7 @@ with db_con.cursor() as c: if not email: person = Person.objects.filter(alias__name=row.name).first() if not person: - person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode(row.name)) + person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode_name(row.name)) if created: print "created person", unicode(person).encode("utf-8") existing_aliases = set(Alias.objects.filter(person=person).values_list("name", flat=True)) diff --git a/ietf/stats/utils.py b/ietf/stats/utils.py index d30171eff..d48e8d2d0 100644 --- a/ietf/stats/utils.py +++ b/ietf/stats/utils.py @@ -3,12 +3,12 @@ import requests from collections import defaultdict from django.conf import settings +from django.contrib.auth.models import User from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration from ietf.name.models import CountryName from ietf.person.models import Person, Email, Alias -from django.contrib.auth.models import User -from unidecode import unidecode +from ietf.utils.text import unidecode_name def compile_affiliation_ending_stripping_regexp(): @@ -269,7 +269,7 @@ def get_meeting_registration_data(meeting): last_name = last_name.capitalize() regname = "%s %s" % (first_name, last_name) # if there are any unicode characters decode the string to ascii - ascii_name = unidecode(regname).strip() + ascii_name = unidecode_name(regname) # Create a new user object if it does not exist already # if the user already exists do not try to create a new one diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 66c9fec57..4f7b013de 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -4,7 +4,6 @@ import os import datetime import six # pyflakes:ignore import xml2rfc -from unidecode import unidecode from django.conf import settings from django.core.validators import validate_email, ValidationError @@ -31,6 +30,7 @@ from ietf.utils import log from ietf.utils.accesstoken import generate_random_key from ietf.utils.draft import Draft from ietf.utils.mail import is_valid_email +from ietf.utils.text import unidecode_name def validate_submission(submission): @@ -407,7 +407,7 @@ def ensure_person_email_info_exists(name, email): person = Person() person.name = name log.assertion('isinstance(person.name, six.text_type)') - person.ascii = unidecode(person.name).decode('ascii') + person.ascii = unidecode_name(person.name).decode('ascii') person.save() # make sure we have an email address diff --git a/ietf/utils/test_data.py b/ietf/utils/test_data.py index 5342da8a4..0b34188fe 100644 --- a/ietf/utils/test_data.py +++ b/ietf/utils/test_data.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import datetime -from unidecode import unidecode from django.conf import settings from django.contrib.auth.models import User @@ -21,6 +20,8 @@ from ietf.name.models import StreamName, DocRelationshipName, RoomResourceName from ietf.person.models import Person, Email from ietf.group.utils import setup_default_community_list_for_group from ietf.review.models import (ReviewRequest, ReviewerSettings, ReviewResultName, ReviewTypeName, ReviewTeamSettings ) +from ietf.utils.text import unidecode_name + def create_person(group, role_name, name=None, username=None, email_address=None, password=None, is_staff=False, is_superuser=False): """Add person/user/email and role.""" @@ -36,7 +37,7 @@ def create_person(group, role_name, name=None, username=None, email_address=None user = User.objects.create(username=username,is_staff=is_staff,is_superuser=is_superuser) user.set_password(password) user.save() - person = Person.objects.create(name=name, ascii=unidecode(smart_text(name)), user=user) + person = Person.objects.create(name=name, ascii=unidecode_name(smart_text(name)), user=user) email = Email.objects.create(address=email_address, person=person) Role.objects.create(group=group, name_id=role_name, person=person, email=email) return person diff --git a/ietf/utils/text.py b/ietf/utils/text.py index c19852e0c..d7a6bb3ca 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals import re -import unicodedata import textwrap import types +import unicodedata +import unidecode from django.utils.functional import allow_lazy from django.utils import six @@ -125,3 +126,10 @@ def isascii(text): except UnicodeEncodeError: return False +def unidecode_name(name): + """ + unidecode() of cjk ideograms can produce strings which contain spaces. + Strip leading and trailing spaces, and reduce double-spaces to single. + """ + return unidecode.unidecode(name).strip().replace(' ', ' ') +