Merged in [12990] and [12991] from rcross@amsl.com:

Refactor session audio file import.  Add informative email.  Fixes #2164.
 - Legacy-Id: 12998
Note: SVN reference [12990] has been migrated to Git commit 084f8a7495

Note: SVN reference [12991] has been migrated to Git commit 6a5f180fb0
This commit is contained in:
Henrik Levkowetz 2017-03-12 12:15:37 +00:00
commit 0af1223b3c
6 changed files with 274 additions and 35 deletions

View file

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.5 on 2017-03-07 11:59
from __future__ import unicode_literals
import os
from django.db import migrations
from ietf.secr.proceedings.proc_utils import import_audio_files
def purge_missing_files(apps, meeting):
Document = apps.get_model('doc', 'Document')
url = 'https://www.ietf.org/audio/ietf{}'.format(meeting.number)
documents = Document.objects.filter(external_url__startswith=url)
for document in documents:
filename = document.external_url.split('/')[-1]
if not os.path.exists(os.path.join('/a/www/audio/ietf{}'.format(meeting.number),filename)):
print "Removing missing recording: {} ({})".format(filename,document.pk)
document.delete()
def forward(apps, schema_editor):
Meeting = apps.get_model('meeting', 'Meeting')
Document = apps.get_model('doc', 'Document')
for meeting in Meeting.objects.filter(number__in=range(94,98)):
print '\nMeeting #{}:'.format(meeting.number)
purge_missing_files(apps, meeting)
before = Document.objects.filter(type='recording').count()
import_audio_files(meeting)
after = Document.objects.filter(type='recording').count()
print ' {} Documents Added'.format(after - before)
def backward(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
('meeting', '0046_auto_20170201_0857'),
]
operations = [
migrations.RunPython(forward, backward)
]

View file

@ -11,6 +11,7 @@ import debug # pyflakes:ignore
from ietf.dbtemplate.models import DBTemplate
from ietf.meeting.models import Session
from ietf.group.utils import can_manage_materials
from ietf.secr.proceedings.proc_utils import import_audio_files
def group_sessions(sessions):
@ -122,6 +123,7 @@ def finalize(meeting):
sp.rev = '00'
sp.save()
import_audio_files(meeting)
create_proceedings_templates(meeting)
meeting.proceedings_final = True
meeting.save()

View file

@ -7,21 +7,23 @@ from urllib2 import urlopen
import datetime
import glob
import os
import re
import shutil
import subprocess
import debug # pyflakes:ignore
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.http import HttpRequest
from django.shortcuts import render_to_response, render
from django.db.utils import ConnectionDoesNotExist
from ietf.doc.models import Document, RelatedDocument, DocEvent, NewRevisionDocEvent, State
from ietf.doc.models import Document, DocAlias, RelatedDocument, DocEvent, NewRevisionDocEvent, State
from ietf.group.models import Group, Role
from ietf.group.utils import get_charter_text
from ietf.meeting.helpers import get_schedule
from ietf.meeting.models import Session, Meeting, SchedTimeSessAssignment, SessionPresentation
from ietf.meeting.models import Session, Meeting, SchedTimeSessAssignment, SessionPresentation, TimeSlot
from ietf.person.models import Person
from ietf.secr.proceedings.models import InterimMeeting # proxy model
from ietf.secr.proceedings.models import Registration
@ -29,36 +31,86 @@ from ietf.secr.utils.document import get_rfc_num
from ietf.secr.utils.group import groups_by_session
from ietf.secr.utils.meeting import get_proceedings_path, get_materials, get_session
from ietf.utils.log import log
from ietf.utils.mail import send_mail
AUDIO_FILE_RE = re.compile(r'ietf(?P<number>[\d]+)-(?P<room>.*)-(?P<time>[\d]{8}-[\d]{4})')
# -------------------------------------------------
# Helper Functions
# -------------------------------------------------
def check_audio_files(group,meeting):
def import_audio_files(meeting):
'''
Checks for audio files and creates corresponding materials (docs) for the Session
Expects audio files in the format ietf[meeting num]-[room]-YYYMMDD-HHMM-*,
Example: ietf90-salonb-20140721-1710-pm3.mp3
Expects audio files in the format ietf[meeting num]-[room]-YYYMMDD-HHMM.*,
Example: ietf90-salonb-20140721-1710.mp3
'''
for session in Session.objects.filter(group=group,
meeting=meeting,
status=('sched'),
timeslotassignments__schedule=meeting.agenda):
timeslot = session.official_timeslotassignment().timeslot
if not (timeslot.location and timeslot.time):
continue
room = timeslot.location.name.lower()
room = room.replace(' ','')
room = room.replace('/','_')
time = timeslot.time.strftime("%Y%m%d-%H%M")
filename = 'ietf{}-{}-{}*'.format(meeting.number,room,time)
path = os.path.join(settings.MEETING_RECORDINGS_DIR,'ietf{}'.format(meeting.number),filename)
for file in glob.glob(path):
url = 'https://www.ietf.org/audio/ietf{}/{}'.format(meeting.number,os.path.basename(file))
doc = Document.objects.filter(external_url=url).first()
if not doc:
create_recording(session,url)
unmatched_files = []
path = os.path.join(settings.MEETING_RECORDINGS_DIR, meeting.type.slug + meeting.number)
if not os.path.exists(path):
return None
for filename in os.listdir(path):
timeslot = get_timeslot_for_filename(filename)
if timeslot:
sessionassignments = timeslot.sessionassignments.filter(
schedule=timeslot.meeting.agenda,
session__status='sched',
).exclude(session__agenda_note__icontains='canceled').order_by('timeslot__time')
if not sessionassignments:
continue
doc = get_or_create_recording_document(filename,sessionassignments[0].session)
for sessionassignment in sessionassignments:
session = sessionassignment.session
if doc not in session.materials.all():
# add document to session
presentation = SessionPresentation.objects.create(
session=session,
document=doc,
rev=doc.rev)
session.sessionpresentation_set.add(presentation)
if not doc.docalias_set.filter(name__startswith='recording-{}-{}'.format(meeting.number,session.group.acronym)):
sequence = get_next_sequence(session.group,session.meeting,'recording')
name = 'recording-{}-{}-{}'.format(session.meeting.number,session.group.acronym,sequence)
doc.docalias_set.create(name=name)
else:
# use for reconciliation email
unmatched_files.append(filename)
if unmatched_files:
send_audio_import_warning(unmatched_files)
def get_timeslot_for_filename(filename):
'''Returns a timeslot matching the filename given.
NOTE: currently only works with ietfNN prefix (regular meetings)
'''
basename, _ = os.path.splitext(filename)
match = AUDIO_FILE_RE.match(basename)
if match:
try:
meeting = Meeting.objects.get(number=match.groupdict()['number'])
room_mapping = {normalize_room_name(room.name): room.name for room in meeting.room_set.all()}
time = datetime.datetime.strptime(match.groupdict()['time'],'%Y%m%d-%H%M')
return TimeSlot.objects.get(
meeting=meeting,
location__name=room_mapping[match.groupdict()['room']],
time=time)
except (ObjectDoesNotExist, KeyError):
return None
def normalize_room_name(name):
'''Returns room name converted to be used as portion of filename'''
return name.lower().replace(' ','').replace('/','_')
def get_or_create_recording_document(filename,session):
meeting = session.meeting
url = settings.IETF_AUDIO_URL + 'ietf{}/{}'.format(meeting.number, filename)
try:
doc = Document.objects.get(external_url=url)
return doc
except ObjectDoesNotExist:
pass
return create_recording(session,url)
def create_recording(session,url):
@ -94,6 +146,30 @@ def create_recording(session,url):
pres = SessionPresentation.objects.create(session=session,document=doc,rev=doc.rev)
session.sessionpresentation_set.add(pres)
return doc
def get_next_sequence(group,meeting,type):
'''
Returns the next sequence number to use for a document of type = type.
Takes a group=Group object, meeting=Meeting object, type = string
'''
aliases = DocAlias.objects.filter(name__startswith='{}-{}-{}-'.format(type,meeting.number,group.acronym))
if not aliases:
return 1
aliases = aliases.order_by('name')
sequence = int(aliases.last().name.split('-')[-1]) + 1
return sequence
def send_audio_import_warning(unmatched_files):
'''Send email to interested parties that some audio files weren't matched to timeslots'''
send_mail(request = None,
to = settings.AUDIO_IMPORT_EMAIL,
frm = "IETF Secretariat <ietf-secretariat@ietf.org>",
subject = "Audio file import warning",
template = "proceedings/audio_import_warning.txt",
context = dict(unmatched_files=unmatched_files),
extra = {})
def mycomp(timeslot):
'''
This takes a timeslot object and returns a key to sort by the area acronym or None
@ -172,13 +248,6 @@ def get_progress_stats(sdate,edate):
return data
def get_next_sequence(group,meeting,type):
'''
Returns the next sequence number to use for a document of type = type.
Takes a group=Group object, meeting=Meeting object, type = string
'''
return Document.objects.filter(name__startswith='{}-{}-{}-'.format(type,meeting.number,group.acronym)).count() + 1
def write_html(path,content):
f = open(path,'w')
f.write(content)
@ -226,7 +295,7 @@ def create_proceedings(meeting, group, is_final=False):
if meeting.type_id == 'ietf' and int(meeting.number) < 79:
return
check_audio_files(group,meeting)
#check_audio_files(group,meeting)
materials = get_materials(group,meeting)
chairs = group.role_set.filter(name='chair')

View file

@ -5,16 +5,21 @@ import shutil
from django.conf import settings
from django.core.urlresolvers import reverse
from ietf.doc.models import Document
from ietf.group.models import Group
from ietf.meeting.models import Session
from ietf.meeting.models import Session, TimeSlot, SchedTimeSessAssignment
from ietf.meeting.test_data import make_meeting_test_data
from ietf.utils.test_data import make_test_data
from ietf.utils.test_utils import TestCase
from ietf.utils.mail import outbox
from ietf.name.models import SessionStatusName
from ietf.meeting.factories import SessionFactory
from ietf.secr.proceedings.proc_utils import create_proceedings
from ietf.secr.proceedings.proc_utils import (create_proceedings, import_audio_files,
get_timeslot_for_filename, normalize_room_name, send_audio_import_warning,
get_or_create_recording_document, create_recording, get_next_sequence)
SECR_USER='secretary'
@ -33,6 +38,17 @@ class ProceedingsTestCase(TestCase):
self.assertEqual(response.status_code, 200)
class RecordingTestCase(TestCase):
def setUp(self):
self.meeting_recordings_dir = os.path.abspath("tmp-meeting-recordings-dir")
self.saved_meeting_recordings_dir = settings.MEETING_RECORDINGS_DIR
settings.MEETING_RECORDINGS_DIR = self.meeting_recordings_dir
if not os.path.exists(self.meeting_recordings_dir):
os.makedirs(self.meeting_recordings_dir)
def tearDown(self):
shutil.rmtree(self.meeting_recordings_dir)
settings.MEETING_RECORDINGS_DIR = self.saved_meeting_recordings_dir
def test_page(self):
meeting = make_meeting_test_data()
url = reverse('ietf.secr.proceedings.views.recording', kwargs={'meeting_num':meeting.number})
@ -62,6 +78,105 @@ class RecordingTestCase(TestCase):
response = self.client.post(url,dict(external_url=external_url),follow=True)
self.assertEqual(response.status_code, 200)
self.failUnless(external_url in response.content)
def test_import_audio_files(self):
meeting = make_meeting_test_data()
group = Group.objects.get(acronym='mars')
session = Session.objects.filter(meeting=meeting,group=group).first()
status = SessionStatusName.objects.get(slug='sched')
session.status = status
session.save()
timeslot = session.official_timeslotassignment().timeslot
self.create_audio_file_for_timeslot(timeslot)
import_audio_files(meeting)
self.assertEqual(session.materials.filter(type='recording').count(),1)
def create_audio_file_for_timeslot(self, timeslot):
filename = self.get_filename_for_timeslot(timeslot)
path = os.path.join(settings.MEETING_RECORDINGS_DIR,'ietf' + timeslot.meeting.number,filename)
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, "w") as f:
f.write('dummy')
def get_filename_for_timeslot(self, timeslot):
'''Returns the filename of a session recording given timeslot'''
return "{prefix}-{room}-{date}.mp3".format(
prefix=timeslot.meeting.type.slug + timeslot.meeting.number,
room=normalize_room_name(timeslot.location.name),
date=timeslot.time.strftime('%Y%m%d-%H%M'))
def test_import_audio_files_shared_timeslot(self):
meeting = make_meeting_test_data()
mars_session = Session.objects.filter(meeting=meeting,group__acronym='mars').first()
ames_session = Session.objects.filter(meeting=meeting,group__acronym='ames').first()
scheduled = SessionStatusName.objects.get(slug='sched')
mars_session.status = scheduled
mars_session.save()
ames_session.status = scheduled
ames_session.save()
timeslot = mars_session.official_timeslotassignment().timeslot
SchedTimeSessAssignment.objects.create(timeslot=timeslot,session=ames_session,schedule=meeting.agenda)
self.create_audio_file_for_timeslot(timeslot)
import_audio_files(meeting)
doc = mars_session.materials.filter(type='recording').first()
self.assertTrue(doc in ames_session.materials.all())
self.assertTrue(doc.docalias_set.filter(name='recording-42-mars-1'))
self.assertTrue(doc.docalias_set.filter(name='recording-42-ames-1'))
def test_normalize_room_name(self):
self.assertEqual(normalize_room_name('Test Room'),'testroom')
self.assertEqual(normalize_room_name('Rome/Venice'), 'rome_venice')
def test_get_timeslot_for_filename(self):
meeting = make_meeting_test_data()
timeslot = TimeSlot.objects.filter(meeting=meeting,type='session').first()
name = self.get_filename_for_timeslot(timeslot)
self.assertEqual(get_timeslot_for_filename(name),timeslot)
def test_get_or_create_recording_document(self):
meeting = make_meeting_test_data()
group = Group.objects.get(acronym='mars')
session = Session.objects.filter(meeting=meeting,group=group).first()
# test create
filename = 'ietf42-testroom-20000101-0800.mp3'
docs_before = Document.objects.filter(type='recording').count()
doc = get_or_create_recording_document(filename,session)
docs_after = Document.objects.filter(type='recording').count()
self.assertEqual(docs_after,docs_before + 1)
self.assertTrue(doc.external_url.endswith(filename))
# test get
docs_before = docs_after
doc2 = get_or_create_recording_document(filename,session)
docs_after = Document.objects.filter(type='recording').count()
self.assertEqual(docs_after,docs_before)
self.assertEqual(doc,doc2)
def test_create_recording(self):
meeting = make_meeting_test_data()
group = Group.objects.get(acronym='mars')
session = Session.objects.filter(meeting=meeting,group=group).first()
filename = 'ietf42-testroomt-20000101-0800.mp3'
url = settings.IETF_AUDIO_URL + 'ietf{}/{}'.format(meeting.number, filename)
doc = create_recording(session, url)
self.assertEqual(doc.name,'recording-42-mars-1')
self.assertEqual(doc.group,group)
self.assertEqual(doc.external_url,url)
self.assertTrue(doc in session.materials.all())
def test_get_next_sequence(self):
meeting = make_meeting_test_data()
group = Group.objects.get(acronym='mars')
sequence = get_next_sequence(group,meeting,'recording')
self.assertEqual(sequence,1)
def test_send_audio_import_warning(self):
length_before = len(outbox)
send_audio_import_warning(['recording-43-badroom-20000101-0800.mp3'])
self.assertEqual(len(outbox), length_before + 1)
self.assertTrue('Audio file import' in outbox[-1]['Subject'])
class OldProceedingsTestCase(TestCase):
''' Ensure coverage of fragments of old proceedings generation until those are removed '''

View file

@ -0,0 +1,9 @@
WARNING:
After the last meeting session audio file import there are {{ unmatched_files|length }}
file(s) that were not matched to a timeslot.
{% for file in unmatched_files %}{{ file }}
{% endfor %}

View file

@ -132,6 +132,7 @@ OLD_PHOTO_DIRS = [
IETF_HOST_URL = 'https://www.ietf.org/'
IETF_ID_URL = IETF_HOST_URL + 'id/'
IETF_ID_ARCHIVE_URL = IETF_HOST_URL + 'archive/id/'
IETF_AUDIO_URL = IETF_HOST_URL + 'audio/'
# Absolute path to the directory static files should be collected to.
@ -598,7 +599,7 @@ CACHES = {
}
IPR_EMAIL_FROM = 'ietf-ipr@ietf.org'
AUDIO_IMPORT_EMAIL = ['agenda@ietf.org']
IANA_EVAL_EMAIL = "drafts-eval@icann.org"
# Put real password in settings_local.py