Make sure that name_parts doesn't accidentally turn Unicode strings

into plain strings
 - Legacy-Id: 12146
This commit is contained in:
Ole Laursen 2016-10-14 09:41:21 +00:00
parent e864abdcdb
commit 9a3996a286

View file

@ -1,14 +1,14 @@
import re
def name_parts(name):
prefix, first, middle, last, suffix = "", "", "", "", ""
prefix, first, middle, last, suffix = u"", u"", u"", u"", u""
if not name.strip():
return prefix, first, middle, last, suffix
# if we got a name on the form "Some Name (Foo Bar)", get rid of
# the paranthesized part
name_with_paren_match = re.search("^([^(]+)\s*\(.*\)$", name)
name_with_paren_match = re.search(r"^([^(]+)\s*\(.*\)$", name)
if name_with_paren_match:
name = name_with_paren_match.group(1)
@ -24,8 +24,8 @@ def name_parts(name):
suffix = parts[-1]
parts = parts[:-1]
if len(parts) > 2:
name = " ".join(parts)
compound = re.search(" (de|hadi|van|ver|von|el|le|st\.?) ", name.lower())
name = u" ".join(parts)
compound = re.search(r" (de|hadi|van|ver|von|el|le|st\.?) ", name.lower())
if compound:
pos = compound.start()
parts = name[:pos].split() + [name[pos+1:]]
@ -35,7 +35,7 @@ def name_parts(name):
# Handle reverse-order names with uppercase surname correctly
if re.search("^[A-Z-]+$", first):
first, last = last, first
middle = " ".join(parts[1:-1])
middle = u" ".join(parts[1:-1])
elif len(parts) == 2:
first, last = parts
else:
@ -46,13 +46,13 @@ def initials(name):
prefix, first, middle, last, suffix = name_parts(name)
given = first
if middle:
given += " "+middle
initials = " ".join([ n[0]+'.' for n in given.split() ])
given += u" "+middle
initials = u" ".join([ n[0]+'.' for n in given.split() ])
return initials
if __name__ == "__main__":
import sys
name = " ".join(sys.argv[1:])
name = u" ".join(sys.argv[1:])
print name_parts(name)
print initials(name)