Added heuristics to process existing photo files for a larger percentage of person records than earlier. Changed to checking all person records, not only those with roles. Added a summary of photo files not handled at the end. This reduced the number of unhandled files from ~350 to less than 10, and all the unhandled ones seems to belong to persons for which photos have been found.
- Legacy-Id: 11262
This commit is contained in:
parent
8a4d0b3db8
commit
88e56f2c98
218
ietf/bin/2016-05-25-collect-photos
Executable file
218
ietf/bin/2016-05-25-collect-photos
Executable file
|
@ -0,0 +1,218 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os, sys, shutil, pathlib
|
||||
|
||||
# boilerplate
|
||||
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
|
||||
sys.path = [ basedir ] + sys.path
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ietf.settings")
|
||||
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
import debug
|
||||
|
||||
from ietf.group.models import Role, Person
|
||||
|
||||
old_images_dir = django.conf.settings.OLD_PHOTOS_DIR
|
||||
new_images_dir = django.conf.settings.PHOTOS_DIR
|
||||
|
||||
if not os.path.exists(old_images_dir):
|
||||
print("Old images directory does not exist: %s" % old_images_dir)
|
||||
sys.exit(1)
|
||||
if not os.path.exists(new_images_dir):
|
||||
print("New images directory does not exist: %s" % new_images_dir)
|
||||
sys.exit(1)
|
||||
|
||||
old_image_files = []
|
||||
|
||||
|
||||
for (dirpath, dirnames, filenames) in os.walk(old_images_dir):
|
||||
if len(filenames) == 0:
|
||||
print("No image files found in %s" % old_images_dir)
|
||||
sys.exit(2)
|
||||
old_image_files.extend(filenames)
|
||||
break # Only interested in the files in the top directory
|
||||
|
||||
old_image_files = [ f.name.decode('utf8') for f in pathlib.Path(old_images_dir).iterdir() if f.is_file() and not f.suffix.lower() in ['.lck'] ]
|
||||
|
||||
interesting_persons = set()
|
||||
|
||||
interesting_persons.update(list(Person.objects.all()))
|
||||
|
||||
name_alias = {
|
||||
"andy": ["andrew", ],
|
||||
"ben": ["benjamin", ],
|
||||
"bill": ["william", ],
|
||||
"bob": ["robert", ],
|
||||
"chris": ["christopher", "christian"],
|
||||
"dan": ["daniel", ],
|
||||
"dave": ["david", ],
|
||||
"dick": ["richard", ],
|
||||
"fred": ["alfred", ],
|
||||
"geoff": ["geoffrey", ],
|
||||
"jake": ["jacob", ],
|
||||
"jerry": ["gerald", ],
|
||||
"jim": ["james", ],
|
||||
"joe": ["joseph", ],
|
||||
"jon": ["jonathan", ],
|
||||
"mike": ["michael", ],
|
||||
"ned": ["edward", ],
|
||||
"pete": ["peter", ],
|
||||
"ron": ["ronald", ],
|
||||
"russ": ["russel", ],
|
||||
"steve": ["stephen", ],
|
||||
"ted": ["edward", ],
|
||||
"terry": ["terence", ],
|
||||
"tom": ["thomas", ],
|
||||
"wes": ["wesley", ],
|
||||
"will": ["william", ],
|
||||
|
||||
"beth": ["elizabeth", ],
|
||||
"liz": ["elizabeth", ],
|
||||
"lynn": ["carolyn", ],
|
||||
"pat": ["patricia", "patrick", ],
|
||||
"sue": ["susan", ],
|
||||
}
|
||||
# Add lookups from long to short, from the initial set
|
||||
for key,value in name_alias.items():
|
||||
for item in value:
|
||||
if item in name_alias:
|
||||
name_alias[item] += [ key ];
|
||||
else:
|
||||
name_alias[item] = [ key ];
|
||||
|
||||
exceptions = {
|
||||
'Aboba' : 'aboba-bernard',
|
||||
'Bernardos' : 'cano-carlos',
|
||||
'Bormann' : 'bormann-carsten',
|
||||
'Hinden' : 'hinden-bob',
|
||||
'Hutton' : 'hutton-andy',
|
||||
'Narten' : 'narten-thomas', # but there's no picture of him
|
||||
'O\'Donoghue' : 'odonoghue-karen',
|
||||
'Przygienda' : 'przygienda-antoni',
|
||||
'Salowey' : 'salowey-joe',
|
||||
'Gunter Van de Velde' : 'vandevelde-gunter',
|
||||
'Eric Vyncke' : 'vynke-eric',
|
||||
'Zuniga' : 'zuniga-carlos-juan',
|
||||
'Zhen Cao' : 'zhen-cao',
|
||||
'Jamal Hadi Salim': 'hadi-salim-jamal',
|
||||
}
|
||||
|
||||
# Manually copied Bo Burman and Thubert Pascal from wg/photos/
|
||||
# Manually copied Victor Pascual (main image, not thumb) from wg/
|
||||
# Manually copied Eric Vync?ke (main image, not thumb) from wg/photos/
|
||||
# Manually copied Danial King (main image, not thumb) from wg/photos/
|
||||
# Manually copied the thumb (not labelled as such) for Tianran Zhou as both the main and thumb image from wg/photos/
|
||||
|
||||
processed_files = []
|
||||
|
||||
for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.ascii):
|
||||
substr_pattern = None
|
||||
for exception in exceptions:
|
||||
if exception in person.ascii:
|
||||
substr_pattern = exceptions[exception]
|
||||
break
|
||||
if not person.ascii.strip():
|
||||
print(" Setting person.ascii for %s" % person.name)
|
||||
person.ascii = person.name.encode('ascii', errors='replace')
|
||||
debug.show('person.ascii')
|
||||
name_parts = person.ascii.lower().split()
|
||||
if not substr_pattern:
|
||||
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
|
||||
|
||||
candidates = [x for x in old_image_files if x.lower().startswith(substr_pattern)]
|
||||
# If no joy, try a short name
|
||||
if not candidates and name_parts[0] in name_alias:
|
||||
for alias in name_alias[name_parts[0]]:
|
||||
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
|
||||
candidates += [x for x in old_image_files if x.lower().startswith(substr_pattern)]
|
||||
if candidates:
|
||||
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
|
||||
# If still no joy, reverse the name order (necessary for Deng Hui, for instance)
|
||||
if not candidates:
|
||||
substr_pattern = u'-'.join(name_parts[0:1]+name_parts[-1:])
|
||||
candidates = [x for x in old_image_files if x.lower().startswith(substr_pattern)]
|
||||
if candidates:
|
||||
print(" Used '%s %s' instead of '%s %s'" % (name_parts[-1], name_parts[0], name_parts[0], name_parts[-1], ))
|
||||
# If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
|
||||
if not candidates:
|
||||
name_parts = person.plain_name().lower().split()
|
||||
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
|
||||
candidates = [x for x in old_image_files if x.lower().startswith(substr_pattern)]
|
||||
# If no joy, try a short name
|
||||
if not candidates and name_parts[0] in name_alias:
|
||||
for alias in name_alias[name_parts[0]]:
|
||||
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
|
||||
candidates += [x for x in old_image_files if x.lower().startswith(substr_pattern)]
|
||||
if candidates:
|
||||
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
|
||||
|
||||
# Fixup for other exceptional cases
|
||||
|
||||
if person.ascii=="David Oran":
|
||||
candidates = ['oran-dave-th.jpg','oran-david.jpg']
|
||||
|
||||
if person.ascii=="Susan Hares":
|
||||
candidates = ['hares-sue-th.jpg','hares-susan.JPG']
|
||||
|
||||
if person.ascii=="Mahesh Jethanandani":
|
||||
candidates = ['Mahesh-Jethanandani-th.jpg','Jethanandani-Mahesh.jpg']
|
||||
|
||||
processed_files += [ c for c in candidates ]
|
||||
|
||||
if len(candidates) not in [0,1,2]:
|
||||
candidates = [x for x in candidates if not '00' in x]
|
||||
|
||||
if len(candidates) == 1:
|
||||
candidates = candidates + candidates
|
||||
|
||||
if len(candidates) not in [0,2]:
|
||||
thumb = [ c for c in candidates if '-th.' in c ][0]
|
||||
photo = [ c for c in candidates if '-th.' not in c ][0]
|
||||
trunc = [thumb, photo]
|
||||
print(" Truncating %s to %s" % (candidates, trunc))
|
||||
candidates = trunc
|
||||
|
||||
if candidates and '-th' in candidates[1]:
|
||||
candidates.reverse()
|
||||
|
||||
|
||||
# At this point we either have no candidates or two. If two, the first will be the thumb
|
||||
|
||||
def copy(old, new):
|
||||
if not os.path.exists(new):
|
||||
print("Copying "+old+" to "+new)
|
||||
shutil.copy(old, new)
|
||||
shutil.copystat(old, new)
|
||||
|
||||
if len(candidates)==2:
|
||||
old_name = candidates[1]
|
||||
old_thumb_name = candidates[0]
|
||||
old_name_ext = os.path.splitext(old_name)[1]
|
||||
old_thumb_name_ext = os.path.splitext(old_thumb_name)[1]
|
||||
|
||||
new_name = person.photo_name(thumb=False)+old_name_ext.lower()
|
||||
new_thumb_name = person.photo_name(thumb=True)+old_thumb_name_ext.lower()
|
||||
|
||||
copy( os.path.join(old_images_dir,old_name), os.path.join(new_images_dir,new_name) )
|
||||
|
||||
#
|
||||
copy( os.path.join(old_images_dir,old_thumb_name), os.path.join(new_images_dir,new_thumb_name) )
|
||||
|
||||
print("")
|
||||
not_processed = 0
|
||||
for file in pathlib.Path(old_images_dir).iterdir():
|
||||
if ( file.is_file()
|
||||
and not file.suffix.lower() in ['.txt', '.lck', '.html',]
|
||||
and not file.name.startswith('index.')
|
||||
and not file.name.startswith('milestoneupdate')
|
||||
and not file.name.startswith('nopicture')
|
||||
and not file.name.startswith('robots.txt')
|
||||
):
|
||||
if not file.name.decode('utf8') in processed_files:
|
||||
not_processed += 1
|
||||
print(u"Not processed: "+file.name.decode('utf8'))
|
||||
print("")
|
||||
print("")
|
||||
print("Not processed: %s files" % not_processed)
|
|
@ -1,121 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os, sys, shutil, pathlib
|
||||
|
||||
# boilerplate
|
||||
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../web/"))
|
||||
sys.path = [ basedir ] + sys.path
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ietf.settings")
|
||||
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
from ietf.group.models import Role
|
||||
|
||||
old_images_dir = os.path.join(django.conf.settings.OLD_PHOTOS_DIR,'wg/images/')
|
||||
new_images_dir = os.path.join(django.conf.settings.PHOTOS_DIR,django.conf.settings.PHOTO_URL_PREFIX)
|
||||
|
||||
old_image_files = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(old_images_dir):
|
||||
old_image_files.extend(filenames)
|
||||
break # Only interested in the files in the top directory
|
||||
|
||||
old_image_files_lc = map(lambda x:x.lower(),old_image_files)
|
||||
|
||||
interesting_persons = set()
|
||||
|
||||
interesting_persons.update([r.person for r in Role.objects.filter(group__type='wg',group__state='active',name='chair')])
|
||||
interesting_persons.update([r.person for r in Role.objects.filter(group__type='rg',group__state='active',name='chair')])
|
||||
interesting_persons.update([r.person for r in Role.objects.filter(group__type='area',group__state='active',name_id='ad')])
|
||||
interesting_persons.update([r.person for r in Role.objects.filter(group__acronym='iab',name_id='member')])
|
||||
interesting_persons.update([r.person for r in Role.objects.filter(group__acronym='irtf',name_id='chair')])
|
||||
|
||||
#from ietf.person.models import Person
|
||||
#interesting_persons = Person.objects.filter(name__contains="Burman")
|
||||
|
||||
exceptions = {
|
||||
'Aboba' : 'aboba-bernard',
|
||||
'Bernardos' : 'cano-carlos',
|
||||
'Bormann' : 'bormann-carsten',
|
||||
'Wesley George' : 'george-wes',
|
||||
'Hinden' : 'hinden-bob',
|
||||
'Hutton' : 'hutton-andy',
|
||||
'Narten' : 'narten-thomas', # but there's no picture of him
|
||||
'O\'Donoghue' : 'odonoghue-karen',
|
||||
'Przygienda' : 'przygienda-antoni',
|
||||
'Salowey' : 'salowey-joe',
|
||||
'Patricia Thaler' : 'thaler-pat',
|
||||
'Gunter Van de Velde' : 'vandevelde-gunter',
|
||||
'Eric Vyncke' : 'vynke-eric',
|
||||
'Zuniga' : 'zuniga-carlos-juan',
|
||||
'Zhen Cao' : 'zhen-cao',
|
||||
|
||||
}
|
||||
|
||||
# Manually copied Bo Burman and Thubert Pascal from wg/photos/
|
||||
# Manually copied Victor Pascual (main image, not thumb) from wg/
|
||||
# Manually copied Eric Vync?ke (main image, not thumb) from wg/photos/
|
||||
# Manually copied Danial King (main image, not thumb) from wg/photos/
|
||||
# Manually copied the thumb (not labelled as such) for Tianran Zhou as both the main and thumb image from wg/photos/
|
||||
|
||||
|
||||
processed_files = []
|
||||
|
||||
for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.ascii):
|
||||
substr_pattern = None
|
||||
for exception in exceptions:
|
||||
if exception in person.ascii:
|
||||
substr_pattern = exceptions[exception]
|
||||
break
|
||||
if not substr_pattern:
|
||||
name_parts = person.ascii.lower().split()
|
||||
substr_pattern = '-'.join(name_parts[-1:]+name_parts[0:1])
|
||||
|
||||
candidates = [x for x in old_image_files_lc if x.startswith(substr_pattern)]
|
||||
|
||||
# Fixup for other exceptional cases
|
||||
if person.ascii=="Lee Howard":
|
||||
candidates = candidates[:2] # strip howard-lee1.jpg
|
||||
|
||||
if person.ascii=="David Oran":
|
||||
candidates = ['oran-dave-th.jpg','oran-david.jpg']
|
||||
|
||||
if person.ascii=="Susan Hares":
|
||||
candidates = ['hares-sue-th.jpg','hares-susan.jpg']
|
||||
|
||||
if person.ascii=="Mahesh Jethanandani":
|
||||
candidates = ['mahesh-jethanandani-th.jpg','jethanandani-mahesh.jpg']
|
||||
|
||||
if len(candidates) not in [0,2]:
|
||||
candidates = [x for x in candidates if not '00' in x]
|
||||
|
||||
# At this point we either have no candidates or two. If two, the first will be the thumb
|
||||
|
||||
def original_case(name):
|
||||
return old_image_files[old_image_files_lc.index(name)]
|
||||
|
||||
def copy(old, new):
|
||||
global processed_files
|
||||
print("Copying", old, "to", new)
|
||||
shutil.copy(old, new)
|
||||
processed_files.append(old)
|
||||
|
||||
if len(candidates)==2:
|
||||
old_name = original_case(candidates[1])
|
||||
old_thumb_name = original_case(candidates[0])
|
||||
old_name_ext = os.path.splitext(old_name)[1]
|
||||
old_thumb_name_ext = os.path.splitext(old_thumb_name)[1]
|
||||
|
||||
new_name = person.photo_name(thumb=False)+old_name_ext.lower()
|
||||
new_thumb_name = person.photo_name(thumb=True)+old_thumb_name_ext.lower()
|
||||
|
||||
copy( os.path.join(old_images_dir,old_name), os.path.join(new_images_dir,new_name) )
|
||||
|
||||
#
|
||||
copy( os.path.join(old_images_dir,old_thumb_name), os.path.join(new_images_dir,new_thumb_name) )
|
||||
|
||||
|
||||
for file in pathlib.Path(old_images_dir).iterdir():
|
||||
if file.is_file():
|
||||
if not str(file) in processed_files:
|
||||
print("Not processed:", file.name)
|
Loading…
Reference in a new issue