Refactored the import_mailman_listinfo command to run faster (20s instead of 30+m) and added time logging (for --verbosity 2)

- Legacy-Id: 18715
This commit is contained in:
Henrik Levkowetz 2020-11-19 16:13:31 +00:00
parent 6c953bd010
commit af7f51a81f

View file

@ -1,6 +1,7 @@
# Copyright The IETF Trust 2016-2019, All Rights Reserved # Copyright The IETF Trust 2016-2019, All Rights Reserved
import sys import sys
import time
from textwrap import dedent from textwrap import dedent
import debug # pyflakes:ignore import debug # pyflakes:ignore
@ -21,24 +22,40 @@ except ImportError:
pass pass
from ietf.mailinglists.models import List, Subscribed from ietf.mailinglists.models import List, Subscribed
from ietf.utils.log import log
from ietf.utils.text import decode from ietf.utils.text import decode
mark = time.time()
def import_mailman_listinfo(verbosity=0): def import_mailman_listinfo(verbosity=0):
def note(msg): def note(msg):
if verbosity > 1: if verbosity > 2:
sys.stdout.write(msg) sys.stdout.write(msg)
sys.stdout.write('\n') sys.stdout.write('\n')
def log_time(msg):
global mark
if verbosity > 1:
t = time.time()
log(msg+' (%.1fs)'% (t-mark))
mark = t
if not have_mailman: if not have_mailman:
note("Could not import mailman modules -- skipping import of mailman list info") note("Could not import mailman modules -- skipping import of mailman list info")
return return
log("Starting import of list info from Mailman")
names = list(Utils.list_names()) names = list(Utils.list_names())
names.sort() names.sort()
log_time("Fetched list of mailman list names")
addr_max_length = Subscribed._meta.get_field('email').max_length addr_max_length = Subscribed._meta.get_field('email').max_length
subscribed = { l.name: set(l.subscribed_set.values_list('email', flat=True)) for l in List.objects.all().prefetch_related('subscribed_set') }
log_time("Computed dictionary of list members")
for name in names: for name in names:
mlist = MailList.MailList(name, lock=False) mlist = MailList.MailList(name, lock=False)
note("List: %s" % mlist.internal_name()) note("List: %s" % mlist.internal_name())
log_time("Fetched Mailman list object for %s" % name)
lists = List.objects.filter(name=mlist.real_name) lists = List.objects.filter(name=mlist.real_name)
if lists.count() > 1: if lists.count() > 1:
@ -46,35 +63,58 @@ def import_mailman_listinfo(verbosity=0):
for item in lists[1:]: for item in lists[1:]:
item.delete() item.delete()
mmlist, created = List.objects.get_or_create(name=mlist.real_name) mmlist, created = List.objects.get_or_create(name=mlist.real_name)
mmlist.description = decode(mlist.description)[:256] dirty = False
mmlist.advertised = mlist.advertised desc = decode(mlist.description)[:256]
mmlist.save() if mmlist.description != desc:
mmlist.description = desc
dirtry = True
if mmlist.advertised != mlist.advertised:
mmlist.advertised = mlist.advertised
dirty = True
if dirty:
mmlist.save()
log_time(" Updated database List object for %s" % name)
# The following calls return lowercased addresses # The following calls return lowercased addresses
if mlist.advertised: if mlist.advertised:
members = mlist.getRegularMemberKeys() + mlist.getDigestMemberKeys() members = mlist.getRegularMemberKeys() + mlist.getDigestMemberKeys()
members = [ m for m in members if mlist.getDeliveryStatus(m) == MemberAdaptor.ENABLED ] log_time(" Fetched list of list members")
known = Subscribed.objects.filter(lists__name=name).values_list('email', flat=True) members = set([ m for m in members if mlist.getDeliveryStatus(m) == MemberAdaptor.ENABLED ])
for addr in known: log_time(" Filtered list of list members")
if not addr in members: if not mlist.real_name in subscribed:
log("Note: didn't find '%s' in the dictionary of subscriptions" % mlist.real_name)
continue
known = subscribed[mlist.real_name]
log_time(" Fetched known list members from database")
to_remove = known - members
to_add = members - known
for addr in to_remove:
note(" Removing subscription: %s" % (addr)) note(" Removing subscription: %s" % (addr))
old = Subscribed.objects.get(email=addr) old = Subscribed.objects.get(email=addr)
log_time(" Fetched subscribed object")
old.lists.remove(mmlist) old.lists.remove(mmlist)
log_time(" Removed %s from %s" % (mmlist, old))
if old.lists.count() == 0: if old.lists.count() == 0:
note(" Removing address with no subscriptions: %s" % (addr)) note(" Removing address with no subscriptions: %s" % (addr))
old.delete() old.delete()
for addr in members: log_time(" Removed %s" % old)
log_time(" Removed addresses no longer subscribed")
if to_remove:
log(" Removed %s addresses from %s" % (len(to_remove), name))
for addr in to_add:
if len(addr) > addr_max_length: if len(addr) > addr_max_length:
sys.stderr.write(" ** Email address subscribed to '%s' too long for table: <%s>\n" % (name, addr)) sys.stderr.write(" ** Email address subscribed to '%s' too long for table: <%s>\n" % (name, addr))
continue continue
if not addr in known: note(" Adding subscription: %s" % (addr))
note(" Adding subscription: %s" % (addr)) try:
try: new, created = Subscribed.objects.get_or_create(email=addr)
new, created = Subscribed.objects.get_or_create(email=addr) except MultipleObjectsReturned as e:
except MultipleObjectsReturned as e: sys.stderr.write(" ** Error handling %s in %s: %s\n" % (addr, name, e))
sys.stderr.write(" ** Error handling %s in %s: %s\n" % (addr, name, e)) continue
continue new.lists.add(mmlist)
new.lists.add(mmlist) log_time(" Added new addresses")
if to_add:
log(" Added %s addresses to %s" % (len(to_add), name))
log("Completed import of list info from Mailman")
class Command(BaseCommand): class Command(BaseCommand):
""" """