Changed the photo collection script to use a more sophisticated algorithm to separate name parts, in order to avoid photos named 'dr-foo-bar-ph-d', and in order to correctly identify surnames like 'le-faucheur'. Added translation for the first-name only named IAB photos. Added additional directories for IAB photos.
- Legacy-Id: 11271
This commit is contained in:
parent
33a0629911
commit
f9136dcad3
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import os, sys, shutil, pathlib
|
import os, re, sys, shutil, pathlib
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
@ -13,12 +13,12 @@ import django
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils.text import slugify
|
||||||
|
|
||||||
import debug
|
import debug
|
||||||
|
|
||||||
from ietf.group.models import Role, Person
|
from ietf.group.models import Role, Person
|
||||||
|
from ietf.person.name import name_parts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
old_images_dir = ''
|
old_images_dir = ''
|
||||||
new_images_dir = settings.PHOTOS_DIR
|
new_images_dir = settings.PHOTOS_DIR
|
||||||
|
@ -42,41 +42,87 @@ for f in old_image_files:
|
||||||
img = Image.open(path)
|
img = Image.open(path)
|
||||||
old_images.append(photo(path, f.stem.decode('utf8'), f.suffix, img.size[0], img.size[1], f.stat().st_mtime, f))
|
old_images.append(photo(path, f.stem.decode('utf8'), f.suffix, img.size[0], img.size[1], f.stat().st_mtime, f))
|
||||||
|
|
||||||
|
# Fix up some names:
|
||||||
|
|
||||||
|
def fix_missing_surnames(images):
|
||||||
|
replacement = {
|
||||||
|
"alissa": "alissa-cooper",
|
||||||
|
"alissa1": "alissa-cooper",
|
||||||
|
"andrei": "andrei-robachevsky",
|
||||||
|
"bernard": "bernard-aboba",
|
||||||
|
"danny": "danny-mcpherson",
|
||||||
|
"danny1": "danny-mcpherson",
|
||||||
|
"dthaler": "dave-thaler",
|
||||||
|
"eliot-mug": "eliot-lear",
|
||||||
|
"erik.nordmark-300": "erik-nordmark",
|
||||||
|
"hannes": "hannes-tschofenig",
|
||||||
|
"hildebrand": "joe-hildebrand",
|
||||||
|
"housley": "russ-housley",
|
||||||
|
"jariarkko": "jari-arkko",
|
||||||
|
"joel": "joel-jaeggli",
|
||||||
|
"joel1": "joel-jaeggli",
|
||||||
|
"joel2": "joel-jaeggli",
|
||||||
|
"jon": "jon-peterson",
|
||||||
|
"kessens": "david-kessens",
|
||||||
|
"klensin": "john-klensin",
|
||||||
|
"lars": "lars-eggert",
|
||||||
|
"lars1": "lars-eggert",
|
||||||
|
"marc_blanchet": "marc-blanchet",
|
||||||
|
"marcelo": "marcelo-bagnulo",
|
||||||
|
"olaf": "olaf-kolkman",
|
||||||
|
"olaf1": "olaf-kolkman",
|
||||||
|
"ross": "ross-callon",
|
||||||
|
"spencer": "spencer-dawkins",
|
||||||
|
"spencer1": "spencer-dawkins",
|
||||||
|
"vijay": "vijay-gurbani",
|
||||||
|
"xing": "xing-li",
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in range(len(images)):
|
||||||
|
img = images[i]
|
||||||
|
name = re.sub('-[0-9]+x[0-9]+', '', img.name)
|
||||||
|
if '/iab/' in img.path and name in replacement:
|
||||||
|
name = replacement[name]
|
||||||
|
images[i] = photo(img.path, name, img.ext, img.width, img.height, img.time, img.file)
|
||||||
|
|
||||||
|
|
||||||
|
fix_missing_surnames(old_images)
|
||||||
|
|
||||||
interesting_persons = set(Person.objects.all())
|
interesting_persons = set(Person.objects.all())
|
||||||
|
|
||||||
name_alias = {
|
name_alias = {
|
||||||
"andy": ["andrew", ],
|
u"andy": [u"andrew", ],
|
||||||
"ben": ["benjamin", ],
|
u"ben": [u"benjamin", ],
|
||||||
"bill": ["william", ],
|
u"bill": [u"william", ],
|
||||||
"bob": ["robert", ],
|
u"bob": [u"robert", ],
|
||||||
"chris": ["christopher", "christian"],
|
u"chris": [u"christopher", u"christian"],
|
||||||
"dan": ["daniel", ],
|
u"dan": [u"daniel", ],
|
||||||
"dave": ["david", ],
|
u"dave": [u"david", ],
|
||||||
"dick": ["richard", ],
|
u"dick": [u"richard", ],
|
||||||
"fred": ["alfred", ],
|
u"fred": [u"alfred", ],
|
||||||
"geoff": ["geoffrey", ],
|
u"geoff": [u"geoffrey", ],
|
||||||
"jake": ["jacob", ],
|
u"jake": [u"jacob", ],
|
||||||
"jerry": ["gerald", ],
|
u"jerry": [u"gerald", ],
|
||||||
"jim": ["james", ],
|
u"jim": [u"james", ],
|
||||||
"joe": ["joseph", ],
|
u"joe": [u"joseph", ],
|
||||||
"jon": ["jonathan", ],
|
u"jon": [u"jonathan", ],
|
||||||
"mike": ["michael", ],
|
u"mike": [u"michael", ],
|
||||||
"ned": ["edward", ],
|
u"ned": [u"edward", ],
|
||||||
"pete": ["peter", ],
|
u"pete": [u"peter", ],
|
||||||
"ron": ["ronald", ],
|
u"ron": [u"ronald", ],
|
||||||
"russ": ["russel", ],
|
u"russ": [u"russel", ],
|
||||||
"steve": ["stephen", ],
|
u"steve": [u"stephen", ],
|
||||||
"ted": ["edward", ],
|
u"ted": [u"edward", ],
|
||||||
"terry": ["terence", ],
|
u"terry": [u"terence", ],
|
||||||
"tom": ["thomas", ],
|
u"tom": [u"thomas", ],
|
||||||
"wes": ["wesley", ],
|
u"wes": [u"wesley", ],
|
||||||
"will": ["william", ],
|
u"will": [u"william", ],
|
||||||
|
|
||||||
"beth": ["elizabeth", ],
|
u"beth": [u"elizabeth", ],
|
||||||
"liz": ["elizabeth", ],
|
u"liz": [u"elizabeth", ],
|
||||||
"lynn": ["carolyn", ],
|
u"lynn": [u"carolyn", ],
|
||||||
"pat": ["patricia", "patrick", ],
|
u"pat": [u"patricia", u"patrick", ],
|
||||||
"sue": ["susan", ],
|
u"sue": [u"susan", ],
|
||||||
}
|
}
|
||||||
# Add lookups from long to short, from the initial set
|
# Add lookups from long to short, from the initial set
|
||||||
for key,value in name_alias.items():
|
for key,value in name_alias.items():
|
||||||
|
@ -119,37 +165,53 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
|
||||||
break
|
break
|
||||||
if not person.ascii.strip():
|
if not person.ascii.strip():
|
||||||
print(" Setting person.ascii for %s" % person.name)
|
print(" Setting person.ascii for %s" % person.name)
|
||||||
person.ascii = person.name.encode('ascii', errors='replace')
|
person.ascii = person.name.encode('ascii', errors='replace').decode('ascii')
|
||||||
debug.show('person.ascii')
|
|
||||||
name_parts = person.ascii.lower().split()
|
_, first, _, last, _ = person.ascii_parts()
|
||||||
|
first = first.lower()
|
||||||
|
last = last. lower()
|
||||||
if not substr_pattern:
|
if not substr_pattern:
|
||||||
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
|
substr_pattern = slugify("%s %s" % (last, first))
|
||||||
|
|
||||||
|
if first in ['', '<>'] or last in ['', '<>']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
#debug.show('1, substr_pattern')
|
||||||
|
|
||||||
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
||||||
# Also check the reverse the name order (necessary for Deng Hui, for instance)
|
# Also check the reverse the name order (necessary for Deng Hui, for instance)
|
||||||
substr_pattern = u'-'.join(name_parts[0:1]+name_parts[-1:])
|
substr_pattern = slugify("%s %s" % (first, last))
|
||||||
|
#debug.show('2, substr_pattern')
|
||||||
|
prev_len = len(candidates)
|
||||||
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
||||||
if candidates:
|
if prev_len < len(candidates) :
|
||||||
print(" Used '%s %s' instead of '%s %s'" % (name_parts[-1], name_parts[0], name_parts[0], name_parts[-1], ))
|
print(" Found match with '%s %s' for '%s %s'" % (last, first, first, last, ))
|
||||||
# If no joy, try a short name
|
# If no joy, try a short name
|
||||||
if name_parts[0] in name_alias:
|
if first in name_alias:
|
||||||
for alias in name_alias[name_parts[0]]:
|
prev_len = len(candidates)
|
||||||
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
|
for alias in name_alias[first]:
|
||||||
|
substr_pattern = slugify("%s %s" % (last, alias))
|
||||||
|
#debug.show('3, substr_pattern')
|
||||||
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
||||||
if candidates:
|
if prev_len < len(candidates):
|
||||||
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
|
print(" Found match with '%s %s' for '%s %s'" % (alias, last, first, last, ))
|
||||||
# If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
|
|
||||||
if not candidates:
|
|
||||||
name_parts = person.plain_name().lower().split()
|
# # If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
|
||||||
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
|
# if not candidates:
|
||||||
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
# prefix, first, middle, last, suffix = person.name_parts()
|
||||||
# If no joy, try a short name
|
# name_parts = person.plain_name().lower().split()
|
||||||
if not candidates and name_parts[0] in name_alias:
|
#
|
||||||
for alias in name_alias[name_parts[0]]:
|
# substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
|
||||||
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
|
# candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
||||||
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
# # If no joy, try a short name
|
||||||
if candidates:
|
# if not candidates and first in name_alias:
|
||||||
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
|
# prev_len = len(candidates)
|
||||||
|
# for alias in name_alias[first]:
|
||||||
|
# substr_pattern = u'-'.join(name_parts[-1:]+[alias])
|
||||||
|
# candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
|
||||||
|
# if prev_len < len(candidates) :
|
||||||
|
# print(" Used '%s %s' instead of '%s %s'" % (alias, last, first, last, ))
|
||||||
|
|
||||||
# # Fixup for other exceptional cases
|
# # Fixup for other exceptional cases
|
||||||
# if person.ascii=="David Oran":
|
# if person.ascii=="David Oran":
|
||||||
|
@ -172,15 +234,24 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
|
||||||
# - if none found, then the smallest photo
|
# - if none found, then the smallest photo
|
||||||
if candidates:
|
if candidates:
|
||||||
candidates.sort(key=lambda x: "%04d-%d" % (x.width, x.time))
|
candidates.sort(key=lambda x: "%04d-%d" % (x.width, x.time))
|
||||||
full = candidates[-1]
|
iesg_cand = [ c for c in candidates if '/iesg/' in c.path ]
|
||||||
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ]
|
iab_cand = [ c for c in candidates if '/iab/' in c.path ]
|
||||||
if not thumbs:
|
if iesg_cand:
|
||||||
thumbs = [ c for c in candidates if c.width==c.height ]
|
full = iesg_cand[-1]
|
||||||
if not thumbs:
|
thumb = iesg_cand[-1]
|
||||||
thumbs = [ c for c in candidates if c.width <= 200 ]
|
elif iab_cand:
|
||||||
if not thumbs:
|
full = iab_cand[-1]
|
||||||
thumbs = candidates[:1]
|
thumb = iab_cand[0]
|
||||||
thumb = thumbs[-1]
|
else:
|
||||||
|
full = candidates[-1]
|
||||||
|
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ]
|
||||||
|
if not thumbs:
|
||||||
|
thumbs = [ c for c in candidates if c.width==c.height ]
|
||||||
|
if not thumbs:
|
||||||
|
thumbs = [ c for c in candidates if c.width <= 200 ]
|
||||||
|
if not thumbs:
|
||||||
|
thumbs = candidates[:1]
|
||||||
|
thumb = thumbs[-1]
|
||||||
candidates = [ thumb, full ]
|
candidates = [ thumb, full ]
|
||||||
|
|
||||||
# At this point we either have no candidates or two. If two, the first will be the thumb
|
# At this point we either have no candidates or two. If two, the first will be the thumb
|
||||||
|
@ -203,6 +274,7 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
|
||||||
#
|
#
|
||||||
copy( thumb.path, os.path.join(new_images_dir,new_thumb_name) )
|
copy( thumb.path, os.path.join(new_images_dir,new_thumb_name) )
|
||||||
|
|
||||||
|
|
||||||
print("")
|
print("")
|
||||||
not_processed = 0
|
not_processed = 0
|
||||||
for file in old_image_files:
|
for file in old_image_files:
|
||||||
|
@ -217,5 +289,4 @@ for file in old_image_files:
|
||||||
not_processed += 1
|
not_processed += 1
|
||||||
print(u"Not processed: "+str(file).decode('utf8'))
|
print(u"Not processed: "+str(file).decode('utf8'))
|
||||||
print("")
|
print("")
|
||||||
print("")
|
|
||||||
print("Not processed: %s files" % not_processed)
|
print("Not processed: %s files" % not_processed)
|
||||||
|
|
Loading…
Reference in a new issue