Added ietf.utils.text.unidecode_name() and replaced various uses of unidecode() with it, in order to normalize the generation of ascii versions of names, to avoid different practices in space stripping and space normalization in different parts of the code.
- Legacy-Id: 14128
This commit is contained in:
parent
6aa2cfca89
commit
33b275b04f
|
@ -20,9 +20,9 @@ from ietf.dbtemplate.models import DBTemplate
|
|||
from ietf.person.models import Email, Person
|
||||
from ietf.mailtrigger.utils import gather_address_lists
|
||||
from ietf.utils.pipe import pipe
|
||||
from unidecode import unidecode
|
||||
from ietf.utils.mail import send_mail_text, send_mail
|
||||
from ietf.utils.log import log
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
|
@ -365,7 +365,7 @@ def make_nomineeposition_for_newperson(nomcom, candidate_name, candidate_email,
|
|||
# This is expected to fail if called with an existing email address
|
||||
email = Email.objects.create(address=candidate_email)
|
||||
person = Person.objects.create(name=candidate_name,
|
||||
ascii=unidecode(candidate_name),
|
||||
ascii=unidecode_name(candidate_name),
|
||||
address=candidate_email)
|
||||
email.person = person
|
||||
email.save()
|
||||
|
|
|
@ -13,6 +13,8 @@ from django.utils.text import slugify
|
|||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.person.models import Person, Alias, Email
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
|
||||
fake = faker.Factory.create()
|
||||
|
||||
|
@ -39,7 +41,7 @@ class PersonFactory(factory.DjangoModelFactory):
|
|||
|
||||
user = factory.SubFactory(UserFactory)
|
||||
name = factory.LazyAttribute(lambda p: u'%s %s'%(p.user.first_name,p.user.last_name))
|
||||
ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip()))
|
||||
ascii = factory.LazyAttribute(lambda p: unicode(unidecode_name(p.name)))
|
||||
|
||||
class Params:
|
||||
with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs()))
|
||||
|
|
|
@ -4,7 +4,6 @@ import datetime
|
|||
import email.utils
|
||||
import email.header
|
||||
from hashids import Hashids
|
||||
from unidecode import unidecode
|
||||
from urlparse import urljoin
|
||||
|
||||
from django.conf import settings
|
||||
|
@ -21,6 +20,7 @@ from ietf.person.name import name_parts, initials, plain_name
|
|||
from ietf.utils.mail import send_mail_preformatted
|
||||
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
|
||||
from ietf.utils.mail import formataddr
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
|
||||
class PersonInfo(models.Model):
|
||||
|
@ -61,18 +61,18 @@ class PersonInfo(models.Model):
|
|||
# we're validating the content of the ascii field, and have
|
||||
# verified that the field is ascii clean in the database:
|
||||
if not all(ord(c) < 128 for c in self.ascii):
|
||||
self._cached_ascii_name = unidecode(self.ascii).strip()
|
||||
self._cached_ascii_name = unidecode_name(self.ascii)
|
||||
else:
|
||||
self._cached_ascii_name = self.ascii
|
||||
else:
|
||||
self._cached_ascii_name = unidecode(self.plain_name()).strip()
|
||||
self._cached_ascii_name = unidecode_name(self.plain_name())
|
||||
return self._cached_ascii_name
|
||||
def plain_ascii(self):
|
||||
if not hasattr(self, '_cached_plain_ascii'):
|
||||
if self.ascii:
|
||||
ascii = unidecode(self.ascii).strip()
|
||||
ascii = unidecode_name(self.ascii)
|
||||
else:
|
||||
ascii = unidecode(self.name).strip()
|
||||
ascii = unidecode_name(self.name)
|
||||
prefix, first, middle, last, suffix = name_parts(ascii)
|
||||
self._cached_plain_ascii = u" ".join([first, last])
|
||||
return self._cached_plain_ascii
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys, os
|
||||
import argparse
|
||||
|
||||
# boilerplate
|
||||
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||
|
@ -24,8 +25,7 @@ from ietf.person.models import Person, Email, Alias
|
|||
from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State
|
||||
from ietf.utils.text import strip_prefix, xslugify
|
||||
from ietf.review.utils import possibly_advance_next_reviewer_for_team
|
||||
import argparse
|
||||
from unidecode import unidecode
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("database", help="database must be included in settings")
|
||||
|
@ -92,7 +92,7 @@ with db_con.cursor() as c:
|
|||
if not email:
|
||||
person = Person.objects.filter(alias__name=row.name).first()
|
||||
if not person:
|
||||
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode(row.name))
|
||||
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode_name(row.name))
|
||||
if created:
|
||||
print "created person", unicode(person).encode("utf-8")
|
||||
existing_aliases = set(Alias.objects.filter(person=person).values_list("name", flat=True))
|
||||
|
|
|
@ -3,12 +3,12 @@ import requests
|
|||
from collections import defaultdict
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
|
||||
from ietf.name.models import CountryName
|
||||
from ietf.person.models import Person, Email, Alias
|
||||
from django.contrib.auth.models import User
|
||||
from unidecode import unidecode
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
|
||||
def compile_affiliation_ending_stripping_regexp():
|
||||
|
@ -269,7 +269,7 @@ def get_meeting_registration_data(meeting):
|
|||
last_name = last_name.capitalize()
|
||||
regname = "%s %s" % (first_name, last_name)
|
||||
# if there are any unicode characters decode the string to ascii
|
||||
ascii_name = unidecode(regname).strip()
|
||||
ascii_name = unidecode_name(regname)
|
||||
|
||||
# Create a new user object if it does not exist already
|
||||
# if the user already exists do not try to create a new one
|
||||
|
|
|
@ -4,7 +4,6 @@ import os
|
|||
import datetime
|
||||
import six # pyflakes:ignore
|
||||
import xml2rfc
|
||||
from unidecode import unidecode
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.validators import validate_email, ValidationError
|
||||
|
@ -31,6 +30,7 @@ from ietf.utils import log
|
|||
from ietf.utils.accesstoken import generate_random_key
|
||||
from ietf.utils.draft import Draft
|
||||
from ietf.utils.mail import is_valid_email
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
|
||||
def validate_submission(submission):
|
||||
|
@ -407,7 +407,7 @@ def ensure_person_email_info_exists(name, email):
|
|||
person = Person()
|
||||
person.name = name
|
||||
log.assertion('isinstance(person.name, six.text_type)')
|
||||
person.ascii = unidecode(person.name).decode('ascii')
|
||||
person.ascii = unidecode_name(person.name).decode('ascii')
|
||||
person.save()
|
||||
|
||||
# make sure we have an email address
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
from unidecode import unidecode
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
|
@ -21,6 +20,8 @@ from ietf.name.models import StreamName, DocRelationshipName, RoomResourceName
|
|||
from ietf.person.models import Person, Email
|
||||
from ietf.group.utils import setup_default_community_list_for_group
|
||||
from ietf.review.models import (ReviewRequest, ReviewerSettings, ReviewResultName, ReviewTypeName, ReviewTeamSettings )
|
||||
from ietf.utils.text import unidecode_name
|
||||
|
||||
|
||||
def create_person(group, role_name, name=None, username=None, email_address=None, password=None, is_staff=False, is_superuser=False):
|
||||
"""Add person/user/email and role."""
|
||||
|
@ -36,7 +37,7 @@ def create_person(group, role_name, name=None, username=None, email_address=None
|
|||
user = User.objects.create(username=username,is_staff=is_staff,is_superuser=is_superuser)
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
person = Person.objects.create(name=name, ascii=unidecode(smart_text(name)), user=user)
|
||||
person = Person.objects.create(name=name, ascii=unidecode_name(smart_text(name)), user=user)
|
||||
email = Email.objects.create(address=email_address, person=person)
|
||||
Role.objects.create(group=group, name_id=role_name, person=person, email=email)
|
||||
return person
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import textwrap
|
||||
import types
|
||||
import unicodedata
|
||||
import unidecode
|
||||
|
||||
from django.utils.functional import allow_lazy
|
||||
from django.utils import six
|
||||
|
@ -125,3 +126,10 @@ def isascii(text):
|
|||
except UnicodeEncodeError:
|
||||
return False
|
||||
|
||||
def unidecode_name(name):
|
||||
"""
|
||||
unidecode() of cjk ideograms can produce strings which contain spaces.
|
||||
Strip leading and trailing spaces, and reduce double-spaces to single.
|
||||
"""
|
||||
return unidecode.unidecode(name).strip().replace(' ', ' ')
|
||||
|
||||
|
|
Loading…
Reference in a new issue