Added ietf.utils.text.unidecode_name() and replaced various uses of unidecode() with it, in order to normalize the generation of ascii versions of names, to avoid different practices in space stripping and space normalization in different parts of the code.

- Legacy-Id: 14128
This commit is contained in:
Henrik Levkowetz 2017-09-17 15:12:18 +00:00
parent 6aa2cfca89
commit 33b275b04f
8 changed files with 30 additions and 19 deletions

View file

@ -20,9 +20,9 @@ from ietf.dbtemplate.models import DBTemplate
from ietf.person.models import Email, Person
from ietf.mailtrigger.utils import gather_address_lists
from ietf.utils.pipe import pipe
from unidecode import unidecode
from ietf.utils.mail import send_mail_text, send_mail
from ietf.utils.log import log
from ietf.utils.text import unidecode_name
import debug # pyflakes:ignore
@ -365,7 +365,7 @@ def make_nomineeposition_for_newperson(nomcom, candidate_name, candidate_email,
# This is expected to fail if called with an existing email address
email = Email.objects.create(address=candidate_email)
person = Person.objects.create(name=candidate_name,
ascii=unidecode(candidate_name),
ascii=unidecode_name(candidate_name),
address=candidate_email)
email.person = person
email.save()

View file

@ -13,6 +13,8 @@ from django.utils.text import slugify
import debug # pyflakes:ignore
from ietf.person.models import Person, Alias, Email
from ietf.utils.text import unidecode_name
fake = faker.Factory.create()
@ -39,7 +41,7 @@ class PersonFactory(factory.DjangoModelFactory):
user = factory.SubFactory(UserFactory)
name = factory.LazyAttribute(lambda p: u'%s %s'%(p.user.first_name,p.user.last_name))
ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip()))
ascii = factory.LazyAttribute(lambda p: unicode(unidecode_name(p.name)))
class Params:
with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs()))

View file

@ -4,7 +4,6 @@ import datetime
import email.utils
import email.header
from hashids import Hashids
from unidecode import unidecode
from urlparse import urljoin
from django.conf import settings
@ -21,6 +20,7 @@ from ietf.person.name import name_parts, initials, plain_name
from ietf.utils.mail import send_mail_preformatted
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
from ietf.utils.mail import formataddr
from ietf.utils.text import unidecode_name
class PersonInfo(models.Model):
@ -61,18 +61,18 @@ class PersonInfo(models.Model):
# we're validating the content of the ascii field, and have
# verified that the field is ascii clean in the database:
if not all(ord(c) < 128 for c in self.ascii):
self._cached_ascii_name = unidecode(self.ascii).strip()
self._cached_ascii_name = unidecode_name(self.ascii)
else:
self._cached_ascii_name = self.ascii
else:
self._cached_ascii_name = unidecode(self.plain_name()).strip()
self._cached_ascii_name = unidecode_name(self.plain_name())
return self._cached_ascii_name
def plain_ascii(self):
if not hasattr(self, '_cached_plain_ascii'):
if self.ascii:
ascii = unidecode(self.ascii).strip()
ascii = unidecode_name(self.ascii)
else:
ascii = unidecode(self.name).strip()
ascii = unidecode_name(self.name)
prefix, first, middle, last, suffix = name_parts(ascii)
self._cached_plain_ascii = u" ".join([first, last])
return self._cached_plain_ascii

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python
import sys, os
import argparse
# boilerplate
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
@ -24,8 +25,7 @@ from ietf.person.models import Person, Email, Alias
from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State
from ietf.utils.text import strip_prefix, xslugify
from ietf.review.utils import possibly_advance_next_reviewer_for_team
import argparse
from unidecode import unidecode
from ietf.utils.text import unidecode_name
parser = argparse.ArgumentParser()
parser.add_argument("database", help="database must be included in settings")
@ -92,7 +92,7 @@ with db_con.cursor() as c:
if not email:
person = Person.objects.filter(alias__name=row.name).first()
if not person:
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode(row.name))
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode_name(row.name))
if created:
print "created person", unicode(person).encode("utf-8")
existing_aliases = set(Alias.objects.filter(person=person).values_list("name", flat=True))

View file

@ -3,12 +3,12 @@ import requests
from collections import defaultdict
from django.conf import settings
from django.contrib.auth.models import User
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
from ietf.name.models import CountryName
from ietf.person.models import Person, Email, Alias
from django.contrib.auth.models import User
from unidecode import unidecode
from ietf.utils.text import unidecode_name
def compile_affiliation_ending_stripping_regexp():
@ -269,7 +269,7 @@ def get_meeting_registration_data(meeting):
last_name = last_name.capitalize()
regname = "%s %s" % (first_name, last_name)
# if there are any unicode characters decode the string to ascii
ascii_name = unidecode(regname).strip()
ascii_name = unidecode_name(regname)
# Create a new user object if it does not exist already
# if the user already exists do not try to create a new one

View file

@ -4,7 +4,6 @@ import os
import datetime
import six # pyflakes:ignore
import xml2rfc
from unidecode import unidecode
from django.conf import settings
from django.core.validators import validate_email, ValidationError
@ -31,6 +30,7 @@ from ietf.utils import log
from ietf.utils.accesstoken import generate_random_key
from ietf.utils.draft import Draft
from ietf.utils.mail import is_valid_email
from ietf.utils.text import unidecode_name
def validate_submission(submission):
@ -407,7 +407,7 @@ def ensure_person_email_info_exists(name, email):
person = Person()
person.name = name
log.assertion('isinstance(person.name, six.text_type)')
person.ascii = unidecode(person.name).decode('ascii')
person.ascii = unidecode_name(person.name).decode('ascii')
person.save()
# make sure we have an email address

View file

@ -4,7 +4,6 @@
from __future__ import unicode_literals
import datetime
from unidecode import unidecode
from django.conf import settings
from django.contrib.auth.models import User
@ -21,6 +20,8 @@ from ietf.name.models import StreamName, DocRelationshipName, RoomResourceName
from ietf.person.models import Person, Email
from ietf.group.utils import setup_default_community_list_for_group
from ietf.review.models import (ReviewRequest, ReviewerSettings, ReviewResultName, ReviewTypeName, ReviewTeamSettings )
from ietf.utils.text import unidecode_name
def create_person(group, role_name, name=None, username=None, email_address=None, password=None, is_staff=False, is_superuser=False):
"""Add person/user/email and role."""
@ -36,7 +37,7 @@ def create_person(group, role_name, name=None, username=None, email_address=None
user = User.objects.create(username=username,is_staff=is_staff,is_superuser=is_superuser)
user.set_password(password)
user.save()
person = Person.objects.create(name=name, ascii=unidecode(smart_text(name)), user=user)
person = Person.objects.create(name=name, ascii=unidecode_name(smart_text(name)), user=user)
email = Email.objects.create(address=email_address, person=person)
Role.objects.create(group=group, name_id=role_name, person=person, email=email)
return person

View file

@ -1,9 +1,10 @@
from __future__ import unicode_literals
import re
import unicodedata
import textwrap
import types
import unicodedata
import unidecode
from django.utils.functional import allow_lazy
from django.utils import six
@ -125,3 +126,10 @@ def isascii(text):
except UnicodeEncodeError:
return False
def unidecode_name(name):
"""
unidecode() of cjk ideograms can produce strings which contain spaces.
Strip leading and trailing spaces, and reduce double-spaces to single.
"""
return unidecode.unidecode(name).strip().replace(' ', ' ')