Moved unidecode_name from utils.text to person.name.
Modified UserFactory to use a new locale for each new user, instead of the same locale for a whole test run. This (almost) ensures the exercise of code to deal with non-ascii names, something which would not happen if a locale with ascii names was chosen at the start of a run. Modified name.initials() to not use non-word characters as initials. Modified unidecode_name() to do more normalization, to conform to the conventions used in internet-drafts. Added saving of the factory-boy random state in order to be able to re-run a test suite with the same pseudo-random sequence as in a previous failed run. Fixed an issue with email formatting in test_api_submit_ok(). Modified the draft author extraction code to deal better with names with embedded apostrophes. - Legacy-Id: 14141
This commit is contained in:
parent
a440a30f95
commit
2c1438c240
|
@ -22,7 +22,7 @@ from ietf.mailtrigger.utils import gather_address_lists
|
|||
from ietf.utils.pipe import pipe
|
||||
from ietf.utils.mail import send_mail_text, send_mail
|
||||
from ietf.utils.log import log
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
|
|
|
@ -13,20 +13,23 @@ from django.utils.text import slugify
|
|||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.person.models import Person, Alias, Email
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
|
||||
fake = faker.Factory.create()
|
||||
|
||||
def random_faker():
|
||||
return faker.Faker(random.sample(faker.config.AVAILABLE_LOCALES, 1)[0])
|
||||
|
||||
class UserFactory(factory.DjangoModelFactory):
|
||||
class Meta:
|
||||
model = User
|
||||
django_get_or_create = ('username',)
|
||||
exclude = ['locale', ]
|
||||
exclude = ['faker', ]
|
||||
|
||||
locale = random.sample(faker.config.AVAILABLE_LOCALES, 1)[0]
|
||||
first_name = factory.Faker('first_name', locale)
|
||||
last_name = factory.Faker('last_name', locale)
|
||||
faker = factory.LazyFunction(random_faker)
|
||||
first_name = factory.LazyAttribute(lambda o: o.faker.first_name())
|
||||
last_name = factory.LazyAttribute(lambda o: o.faker.last_name())
|
||||
email = factory.LazyAttributeSequence(lambda u, n: '%s.%s_%d@%s'%( slugify(unidecode(u.first_name)),
|
||||
slugify(unidecode(u.last_name)), n, fake.domain_name()))
|
||||
username = factory.LazyAttribute(lambda u: u.email)
|
||||
|
|
|
@ -20,7 +20,7 @@ from ietf.person.name import name_parts, initials, plain_name
|
|||
from ietf.utils.mail import send_mail_preformatted
|
||||
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
|
||||
from ietf.utils.mail import formataddr
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
|
||||
class PersonInfo(models.Model):
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
import re
|
||||
import unidecode
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
|
||||
def name_particle_match(name):
|
||||
return re.search(r" (af|al|Al|de|der|di|Di|du|el|El|Hadi|in 't|Le|st\.?|St\.?|ten|ter|van|van der|Van|von|von der|Von|zu) ", name)
|
||||
|
||||
def name_parts(name):
|
||||
prefix, first, middle, last, suffix = u"", u"", u"", u"", u""
|
||||
|
||||
|
@ -36,7 +41,7 @@ def name_parts(name):
|
|||
full = full.lower() # adjust case for all-uppercase input
|
||||
# This is an incomplete list. Adjust as needed to handle known ietf
|
||||
# participant names correctly:
|
||||
particle = re.search(r" (af|al|Al|de|der|di|Di|du|el|El|Hadi|in 't|Le|st\.?|St\.?|ten|ter|van|van der|Van|von|von der|Von|zu) ", full)
|
||||
particle = name_particle_match(full)
|
||||
if particle:
|
||||
pos = particle.start()
|
||||
parts = full[:pos].split() + [full[pos+1:]]
|
||||
|
@ -52,19 +57,63 @@ def name_parts(name):
|
|||
else:
|
||||
last = parts[0]
|
||||
return prefix, first, middle, last, suffix
|
||||
|
||||
|
||||
def initials(name):
|
||||
prefix, first, middle, last, suffix = name_parts(name)
|
||||
given = first
|
||||
if middle:
|
||||
given += u" "+middle
|
||||
initials = u" ".join([ n[0]+'.' for n in given.split() ])
|
||||
# Don't use non-word characters as initials.
|
||||
# Example: The Bulgarian transcribed name "'Rnest Balkanska" should not have an initial of "'".
|
||||
given = re.sub('[^ .\w]', '', given)
|
||||
initials = u" ".join([ n[0].upper()+'.' for n in given.split() ])
|
||||
return initials
|
||||
|
||||
def plain_name(name):
|
||||
prefix, first, middle, last, suffix = name_parts(name)
|
||||
return u" ".join([first, last])
|
||||
|
||||
def capfirst(s):
|
||||
# Capitalize the first word character, skipping non-word characters and
|
||||
# leaving following word characters untouched:
|
||||
letters = list(s)
|
||||
for i,l in enumerate(letters):
|
||||
if l.isalpha():
|
||||
letters[i] = l.capitalize()
|
||||
break
|
||||
return ''.join(letters)
|
||||
|
||||
def unidecode_name(uname):
|
||||
"""
|
||||
unidecode() of cjk ideograms can produce strings which contain spaces.
|
||||
Strip leading and trailing spaces, and reduce double-spaces to single.
|
||||
|
||||
For some other ranges, unidecode returns all-lowercase names; fix these
|
||||
up with capitalization.
|
||||
"""
|
||||
# Fix double spacing
|
||||
name = unidecode.unidecode(uname)
|
||||
if name == uname:
|
||||
return name
|
||||
name = name.strip().replace(' ', ' ')
|
||||
# Fix all-upper and all-lower names:
|
||||
# Check for name particles -- don't capitalize those
|
||||
m = name_particle_match(name)
|
||||
particle = m.group(1) if m else None
|
||||
# Get the name parts
|
||||
prefix, first, middle, last, suffix = name_parts(name)
|
||||
# Capitalize names
|
||||
first = capfirst(first)
|
||||
middle = ' '.join([ capfirst(p) for p in middle.split() ])
|
||||
last = ' '.join([ capfirst(p) for p in last.split() ])
|
||||
# Restore the particle, if any
|
||||
if particle and last.startswith(capfirst(particle)+' '):
|
||||
last = ' '.join([ particle, last[len(particle)+1:] ])
|
||||
# Recombine the parts
|
||||
parts = prefix, first, middle, last, suffix
|
||||
name = ' '.join([ p for p in parts if p and p.strip() != '' ])
|
||||
return name
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
name = u" ".join(sys.argv[1:])
|
||||
|
|
|
@ -25,7 +25,7 @@ from ietf.person.models import Person, Email, Alias
|
|||
from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State
|
||||
from ietf.utils.text import strip_prefix, xslugify
|
||||
from ietf.review.utils import possibly_advance_next_reviewer_for_team
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("database", help="database must be included in settings")
|
||||
|
|
|
@ -920,6 +920,9 @@ SILENCED_SYSTEM_CHECKS = [
|
|||
|
||||
STATS_NAMES_LIMIT = 25
|
||||
|
||||
UTILS_TEST_RANDOM_STATE_FILE = '.factoryboy_random_state'
|
||||
|
||||
|
||||
# Put the production SECRET_KEY in settings_local.py, and also any other
|
||||
# sensitive or site-specific changes. DO NOT commit settings_local.py to svn.
|
||||
from settings_local import * # pyflakes:ignore pylint: disable=wildcard-import
|
||||
|
|
|
@ -8,7 +8,7 @@ from django.contrib.auth.models import User
|
|||
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
|
||||
from ietf.name.models import CountryName
|
||||
from ietf.person.models import Person, Email, Alias
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
|
||||
def compile_affiliation_ending_stripping_regexp():
|
||||
|
|
|
@ -1588,7 +1588,7 @@ class ApiSubmitTests(TestCase):
|
|||
|
||||
def test_api_submit_ok(self):
|
||||
r, author, name = self.post_submission('00')
|
||||
expected = "Upload of %s OK, confirmation requests sent to:\n %s" % (name, author.formatted_email())
|
||||
expected = "Upload of %s OK, confirmation requests sent to:\n %s" % (name, author.formatted_email().replace('\n',''))
|
||||
self.assertContains(r, expected, status_code=200)
|
||||
|
||||
def test_api_submit_no_user(self):
|
||||
|
|
|
@ -30,7 +30,7 @@ from ietf.utils import log
|
|||
from ietf.utils.accesstoken import generate_random_key
|
||||
from ietf.utils.draft import Draft
|
||||
from ietf.utils.mail import is_valid_email
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
|
||||
def validate_submission(submission):
|
||||
|
|
|
@ -509,8 +509,8 @@ class Draft():
|
|||
"honor" : r"(?:[A-Z]\.|Dr\.?|Dr\.-Ing\.|Prof(?:\.?|essor)|Sir|Lady|Dame|Sri)",
|
||||
"prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von|[Ee]l)",
|
||||
"suffix": r"(jr.?|Jr.?|II|2nd|III|3rd|IV|4th)",
|
||||
"first" : r"([A-Z][-A-Za-z]*)(( ?\([A-Z][-A-Za-z]*\))?(\.?[- ]{1,2}[A-Za-z]+)*)",
|
||||
"last" : r"([-A-Za-z']{2,})",
|
||||
"first" : r"([A-Z][-A-Za-z'`]*)(( ?\([A-Z][-A-Za-z'`]*\))?(\.?[- ]{1,2}[A-Za-z'`]+)*)",
|
||||
"last" : r"([-A-Za-z'`]{2,})",
|
||||
"months": r"(January|February|March|April|May|June|July|August|September|October|November|December)",
|
||||
"mabbr" : r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.?",
|
||||
}
|
||||
|
@ -575,7 +575,7 @@ class Draft():
|
|||
|
||||
# permit insertion of middle names between first and last, and
|
||||
# add possible honorific and suffix information
|
||||
authpat = r"(?:^| and )(?:%(hon)s ?)?(%(first)s\S*( +[^ ]+)* +%(last)s)( *\(.*|,( [A-Z][-A-Za-z0-9]*)?| %(suffix)s| [A-Z][a-z]+)?" % {"hon":hon, "first":first, "last":last, "suffix":suffix,}
|
||||
authpat = r"(?:^| and )(?:%(hon)s ?)?([`']?%(first)s\S*( +[^ ]+)* +%(last)s)( *\(.*|,( [A-Z][-A-Za-z0-9]*)?| %(suffix)s| [A-Z][a-z]+)?" % {"hon":hon, "first":first, "last":last, "suffix":suffix,}
|
||||
return authpat
|
||||
|
||||
authors = []
|
||||
|
|
|
@ -20,7 +20,7 @@ from ietf.name.models import StreamName, DocRelationshipName, RoomResourceName
|
|||
from ietf.person.models import Person, Email
|
||||
from ietf.group.utils import setup_default_community_list_for_group
|
||||
from ietf.review.models import (ReviewRequest, ReviewerSettings, ReviewResultName, ReviewTypeName, ReviewTeamSettings )
|
||||
from ietf.utils.text import unidecode_name
|
||||
from ietf.person.name import unidecode_name
|
||||
|
||||
|
||||
def create_person(group, role_name, name=None, username=None, email_address=None, password=None, is_staff=False, is_superuser=False):
|
||||
|
|
|
@ -45,6 +45,7 @@ import datetime
|
|||
import codecs
|
||||
import gzip
|
||||
import unittest
|
||||
import factory.random
|
||||
from fnmatch import fnmatch
|
||||
|
||||
from coverage.report import Reporter
|
||||
|
@ -557,6 +558,18 @@ class IetfTestRunner(DiscoverRunner):
|
|||
|
||||
maybe_create_svn_symlinks(settings)
|
||||
|
||||
if os.path.exists(settings.UTILS_TEST_RANDOM_STATE_FILE):
|
||||
print " Loading factory-boy random state from .random-state"
|
||||
with open(settings.UTILS_TEST_RANDOM_STATE_FILE) as f:
|
||||
s = json.load(f)
|
||||
s[1] = tuple(s[1]) # random.setstate() won't accept a list in lieus of a tuple
|
||||
factory.random.set_random_state(s)
|
||||
else:
|
||||
print " Saving factory-boy random state to .random-state"
|
||||
with open(settings.UTILS_TEST_RANDOM_STATE_FILE, 'w') as f:
|
||||
s = factory.random.get_random_state()
|
||||
json.dump(s, f)
|
||||
|
||||
super(IetfTestRunner, self).setup_test_environment(**kwargs)
|
||||
|
||||
def teardown_test_environment(self, **kwargs):
|
||||
|
@ -683,4 +696,7 @@ class IetfTestRunner(DiscoverRunner):
|
|||
|
||||
save_test_results(failures, test_labels)
|
||||
|
||||
if not failures and os.path.exists(settings.UTILS_TEST_RANDOM_STATE_FILE):
|
||||
os.unlink(settings.UTILS_TEST_RANDOM_STATE_FILE)
|
||||
|
||||
return failures
|
||||
|
|
|
@ -4,7 +4,6 @@ import re
|
|||
import textwrap
|
||||
import types
|
||||
import unicodedata
|
||||
import unidecode
|
||||
|
||||
from django.utils.functional import allow_lazy
|
||||
from django.utils import six
|
||||
|
@ -125,11 +124,3 @@ def isascii(text):
|
|||
return True
|
||||
except UnicodeEncodeError:
|
||||
return False
|
||||
|
||||
def unidecode_name(name):
|
||||
"""
|
||||
unidecode() of cjk ideograms can produce strings which contain spaces.
|
||||
Strip leading and trailing spaces, and reduce double-spaces to single.
|
||||
"""
|
||||
return unidecode.unidecode(name).strip().replace(' ', ' ')
|
||||
|
||||
|
|
Loading…
Reference in a new issue