Changed the photo collection script to use a more sophisticated algorithm to separate name parts, in order to avoid photos named 'dr-foo-bar-ph-d', and in order to correctly identify surnames like 'le-faucheur'. Added translation for the first-name only named IAB photos. Added additional directories for IAB photos.

- Legacy-Id: 11271
This commit is contained in:
Henrik Levkowetz 2016-06-05 09:47:55 +00:00
parent 33a0629911
commit f9136dcad3

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import os, sys, shutil, pathlib import os, re, sys, shutil, pathlib
from collections import namedtuple from collections import namedtuple
from PIL import Image from PIL import Image
@ -13,12 +13,12 @@ import django
django.setup() django.setup()
from django.conf import settings from django.conf import settings
from django.utils.text import slugify
import debug import debug
from ietf.group.models import Role, Person from ietf.group.models import Role, Person
from ietf.person.name import name_parts
old_images_dir = '' old_images_dir = ''
new_images_dir = settings.PHOTOS_DIR new_images_dir = settings.PHOTOS_DIR
@ -42,41 +42,87 @@ for f in old_image_files:
img = Image.open(path) img = Image.open(path)
old_images.append(photo(path, f.stem.decode('utf8'), f.suffix, img.size[0], img.size[1], f.stat().st_mtime, f)) old_images.append(photo(path, f.stem.decode('utf8'), f.suffix, img.size[0], img.size[1], f.stat().st_mtime, f))
# Fix up some names:
def fix_missing_surnames(images):
replacement = {
"alissa": "alissa-cooper",
"alissa1": "alissa-cooper",
"andrei": "andrei-robachevsky",
"bernard": "bernard-aboba",
"danny": "danny-mcpherson",
"danny1": "danny-mcpherson",
"dthaler": "dave-thaler",
"eliot-mug": "eliot-lear",
"erik.nordmark-300": "erik-nordmark",
"hannes": "hannes-tschofenig",
"hildebrand": "joe-hildebrand",
"housley": "russ-housley",
"jariarkko": "jari-arkko",
"joel": "joel-jaeggli",
"joel1": "joel-jaeggli",
"joel2": "joel-jaeggli",
"jon": "jon-peterson",
"kessens": "david-kessens",
"klensin": "john-klensin",
"lars": "lars-eggert",
"lars1": "lars-eggert",
"marc_blanchet": "marc-blanchet",
"marcelo": "marcelo-bagnulo",
"olaf": "olaf-kolkman",
"olaf1": "olaf-kolkman",
"ross": "ross-callon",
"spencer": "spencer-dawkins",
"spencer1": "spencer-dawkins",
"vijay": "vijay-gurbani",
"xing": "xing-li",
}
for i in range(len(images)):
img = images[i]
name = re.sub('-[0-9]+x[0-9]+', '', img.name)
if '/iab/' in img.path and name in replacement:
name = replacement[name]
images[i] = photo(img.path, name, img.ext, img.width, img.height, img.time, img.file)
fix_missing_surnames(old_images)
interesting_persons = set(Person.objects.all()) interesting_persons = set(Person.objects.all())
name_alias = { name_alias = {
"andy": ["andrew", ], u"andy": [u"andrew", ],
"ben": ["benjamin", ], u"ben": [u"benjamin", ],
"bill": ["william", ], u"bill": [u"william", ],
"bob": ["robert", ], u"bob": [u"robert", ],
"chris": ["christopher", "christian"], u"chris": [u"christopher", u"christian"],
"dan": ["daniel", ], u"dan": [u"daniel", ],
"dave": ["david", ], u"dave": [u"david", ],
"dick": ["richard", ], u"dick": [u"richard", ],
"fred": ["alfred", ], u"fred": [u"alfred", ],
"geoff": ["geoffrey", ], u"geoff": [u"geoffrey", ],
"jake": ["jacob", ], u"jake": [u"jacob", ],
"jerry": ["gerald", ], u"jerry": [u"gerald", ],
"jim": ["james", ], u"jim": [u"james", ],
"joe": ["joseph", ], u"joe": [u"joseph", ],
"jon": ["jonathan", ], u"jon": [u"jonathan", ],
"mike": ["michael", ], u"mike": [u"michael", ],
"ned": ["edward", ], u"ned": [u"edward", ],
"pete": ["peter", ], u"pete": [u"peter", ],
"ron": ["ronald", ], u"ron": [u"ronald", ],
"russ": ["russel", ], u"russ": [u"russel", ],
"steve": ["stephen", ], u"steve": [u"stephen", ],
"ted": ["edward", ], u"ted": [u"edward", ],
"terry": ["terence", ], u"terry": [u"terence", ],
"tom": ["thomas", ], u"tom": [u"thomas", ],
"wes": ["wesley", ], u"wes": [u"wesley", ],
"will": ["william", ], u"will": [u"william", ],
"beth": ["elizabeth", ], u"beth": [u"elizabeth", ],
"liz": ["elizabeth", ], u"liz": [u"elizabeth", ],
"lynn": ["carolyn", ], u"lynn": [u"carolyn", ],
"pat": ["patricia", "patrick", ], u"pat": [u"patricia", u"patrick", ],
"sue": ["susan", ], u"sue": [u"susan", ],
} }
# Add lookups from long to short, from the initial set # Add lookups from long to short, from the initial set
for key,value in name_alias.items(): for key,value in name_alias.items():
@ -119,37 +165,53 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
break break
if not person.ascii.strip(): if not person.ascii.strip():
print(" Setting person.ascii for %s" % person.name) print(" Setting person.ascii for %s" % person.name)
person.ascii = person.name.encode('ascii', errors='replace') person.ascii = person.name.encode('ascii', errors='replace').decode('ascii')
debug.show('person.ascii')
name_parts = person.ascii.lower().split() _, first, _, last, _ = person.ascii_parts()
first = first.lower()
last = last. lower()
if not substr_pattern: if not substr_pattern:
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1]) substr_pattern = slugify("%s %s" % (last, first))
if first in ['', '<>'] or last in ['', '<>']:
continue
#debug.show('1, substr_pattern')
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)] candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
# Also check the reverse the name order (necessary for Deng Hui, for instance) # Also check the reverse the name order (necessary for Deng Hui, for instance)
substr_pattern = u'-'.join(name_parts[0:1]+name_parts[-1:]) substr_pattern = slugify("%s %s" % (first, last))
#debug.show('2, substr_pattern')
prev_len = len(candidates)
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)] candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
if candidates: if prev_len < len(candidates) :
print(" Used '%s %s' instead of '%s %s'" % (name_parts[-1], name_parts[0], name_parts[0], name_parts[-1], )) print(" Found match with '%s %s' for '%s %s'" % (last, first, first, last, ))
# If no joy, try a short name # If no joy, try a short name
if name_parts[0] in name_alias: if first in name_alias:
for alias in name_alias[name_parts[0]]: prev_len = len(candidates)
substr_pattern = u'-'.join(name_parts[-1:]+[alias]) for alias in name_alias[first]:
substr_pattern = slugify("%s %s" % (last, alias))
#debug.show('3, substr_pattern')
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)] candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
if candidates: if prev_len < len(candidates):
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], )) print(" Found match with '%s %s' for '%s %s'" % (alias, last, first, last, ))
# If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
if not candidates:
name_parts = person.plain_name().lower().split() # # If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1]) # if not candidates:
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)] # prefix, first, middle, last, suffix = person.name_parts()
# If no joy, try a short name # name_parts = person.plain_name().lower().split()
if not candidates and name_parts[0] in name_alias: #
for alias in name_alias[name_parts[0]]: # substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
substr_pattern = u'-'.join(name_parts[-1:]+[alias]) # candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)] # # If no joy, try a short name
if candidates: # if not candidates and first in name_alias:
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], )) # prev_len = len(candidates)
# for alias in name_alias[first]:
# substr_pattern = u'-'.join(name_parts[-1:]+[alias])
# candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
# if prev_len < len(candidates) :
# print(" Used '%s %s' instead of '%s %s'" % (alias, last, first, last, ))
# # Fixup for other exceptional cases # # Fixup for other exceptional cases
# if person.ascii=="David Oran": # if person.ascii=="David Oran":
@ -172,15 +234,24 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
# - if none found, then the smallest photo # - if none found, then the smallest photo
if candidates: if candidates:
candidates.sort(key=lambda x: "%04d-%d" % (x.width, x.time)) candidates.sort(key=lambda x: "%04d-%d" % (x.width, x.time))
full = candidates[-1] iesg_cand = [ c for c in candidates if '/iesg/' in c.path ]
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ] iab_cand = [ c for c in candidates if '/iab/' in c.path ]
if not thumbs: if iesg_cand:
thumbs = [ c for c in candidates if c.width==c.height ] full = iesg_cand[-1]
if not thumbs: thumb = iesg_cand[-1]
thumbs = [ c for c in candidates if c.width <= 200 ] elif iab_cand:
if not thumbs: full = iab_cand[-1]
thumbs = candidates[:1] thumb = iab_cand[0]
thumb = thumbs[-1] else:
full = candidates[-1]
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ]
if not thumbs:
thumbs = [ c for c in candidates if c.width==c.height ]
if not thumbs:
thumbs = [ c for c in candidates if c.width <= 200 ]
if not thumbs:
thumbs = candidates[:1]
thumb = thumbs[-1]
candidates = [ thumb, full ] candidates = [ thumb, full ]
# At this point we either have no candidates or two. If two, the first will be the thumb # At this point we either have no candidates or two. If two, the first will be the thumb
@ -203,6 +274,7 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
# #
copy( thumb.path, os.path.join(new_images_dir,new_thumb_name) ) copy( thumb.path, os.path.join(new_images_dir,new_thumb_name) )
print("") print("")
not_processed = 0 not_processed = 0
for file in old_image_files: for file in old_image_files:
@ -217,5 +289,4 @@ for file in old_image_files:
not_processed += 1 not_processed += 1
print(u"Not processed: "+str(file).decode('utf8')) print(u"Not processed: "+str(file).decode('utf8'))
print("") print("")
print("")
print("Not processed: %s files" % not_processed) print("Not processed: %s files" % not_processed)