Add import of YouTube session videos using YouTube Data API. Fixes #2249. Commit ready for merge.

- Legacy-Id: 13485
This commit is contained in:
Ryan Cross 2017-06-01 18:36:46 +00:00
parent 930aaccc87
commit 7ee2a16967
12 changed files with 11074 additions and 36 deletions

View file

@ -2,6 +2,7 @@ import os
import sys
import json
from importlib import import_module
from mock import patch
from django.apps import apps
from django.test import Client
@ -23,7 +24,8 @@ OMITTED_APPS = (
)
class CustomApiTestCase(TestCase):
def test_notify_meeting_import_audio_files(self):
@patch('ietf.secr.proceedings.proc_utils.import_youtube_video_urls')
def test_notify_meeting_import_audio_files(self, mock_import):
meeting = make_meeting_test_data()
client = Client(Accept='application/json')
# try invalid method GET

View file

@ -80,13 +80,14 @@ class InterimSessionInlineFormSet(BaseInlineFormSet):
if date:
dates.append(date)
if len(dates) < 2:
return
return self.cleaned_data
dates.sort()
last_date = dates[0]
for date in dates[1:]:
if last_date.day + 1 != date.day:
if last_date + datetime.timedelta(days=1) != date:
raise forms.ValidationError('For Multi-Day meetings, days must be consecutive')
last_date = date
return self.cleaned_data
class InterimMeetingModelForm(forms.ModelForm):
group = GroupModelChoiceField(queryset=Group.objects.filter(type__in=('wg', 'rg'), state__in=('active', 'proposed', 'bof')).order_by('acronym'), required=False)

View file

@ -970,7 +970,6 @@ class InterimTests(TestCase):
'session_set-INITIAL_FORMS':0}
r = self.client.post(urlreverse("ietf.meeting.views.interim_request"),data)
self.assertRedirects(r,urlreverse('ietf.meeting.views.upcoming'))
meeting = Meeting.objects.order_by('id').last()
self.assertEqual(meeting.type_id,'interim')

View file

@ -58,7 +58,8 @@ from ietf.meeting.helpers import send_interim_approval_request
from ietf.meeting.helpers import send_interim_announcement_request
from ietf.meeting.utils import finalize
from ietf.secr.proceedings.utils import handle_upload_file
from ietf.secr.proceedings.proc_utils import get_progress_stats, post_process, import_audio_files
from ietf.secr.proceedings.proc_utils import (get_progress_stats, post_process, import_audio_files,
import_youtube_video_urls)
from ietf.utils import log
from ietf.utils.mail import send_mail_message
from ietf.utils.pipe import pipe
@ -2173,6 +2174,7 @@ def api_import_recordings(request, number):
if request.method == 'POST':
meeting = get_meeting(number)
import_audio_files(meeting)
import_youtube_video_urls(meeting)
return HttpResponse(status=201)
else:
return HttpResponse(status=405)

View file

@ -3,16 +3,19 @@ proc_utils.py
This module contains all the functions for generating static proceedings pages
'''
from urllib2 import urlopen
import datetime
import glob
import httplib2
import os
import re
import shutil
import subprocess
import urllib2
from urllib import urlencode
import debug # pyflakes:ignore
from apiclient.discovery import build
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.http import HttpRequest
@ -34,10 +37,84 @@ from ietf.utils.log import log
from ietf.utils.mail import send_mail
AUDIO_FILE_RE = re.compile(r'ietf(?P<number>[\d]+)-(?P<room>.*)-(?P<time>[\d]{8}-[\d]{4})')
VIDEO_TITLE_RE = re.compile(r'IETF(?P<number>\d{2})-(?P<name>.*)-(?P<date>\d{8})-(?P<time>\d{4})')
# -------------------------------------------------
# Helper Functions
# Recording Functions
# -------------------------------------------------
def import_youtube_video_urls(meeting, http=httplib2.Http()):
'''Create Document and set external_url for session videos'''
youtube = build(settings.YOUTUBE_API_SERVICE_NAME, settings.YOUTUBE_API_VERSION,
developerKey=settings.YOUTUBE_API_KEY, http=http)
playlistid = get_youtube_playlistid(youtube, 'IETF' + meeting.number)
if playlistid is None:
return None
for video in get_youtube_videos(youtube, playlistid):
match = VIDEO_TITLE_RE.match(video['title'])
if match:
session = _get_session(**match.groupdict())
if session:
url = video['url']
get_or_create_recording_document(url,session)
def get_youtube_playlistid(youtube, title, http=httplib2.Http()):
'''Returns the youtube playlistId matching title string, a string'''
request = youtube.search().list(
q=title,
part='id,snippet',
channelId=settings.YOUTUBE_IETF_CHANNEL_ID,
type='playlist',
maxResults=1
)
search_response = request.execute(http=http)
try:
playlistid = search_response['items'][0]['id']['playlistId']
except (KeyError, IndexError):
return None
return playlistid
def get_youtube_videos(youtube, playlistid, http=httplib2.Http()):
'''Returns list of dictionaries with title, urls keys'''
videos = []
kwargs = dict(part="snippet",playlistId=playlistid,maxResults=50)
playlistitems = youtube.playlistItems()
request = playlistitems.list(**kwargs)
# handle pagination
while request is not None:
playlistitems_doc = request.execute(http=http)
videos.extend(_get_urls_from_json(playlistitems_doc))
request = playlistitems.list_next(request, playlistitems_doc)
return videos
def _get_session(number,name,date,time):
'''Lookup session using data from video title'''
meeting = Meeting.objects.get(number=number)
schedule = meeting.agenda
timeslot_time = datetime.datetime.strptime(date + time,'%Y%m%d%H%M')
try:
assignment = SchedTimeSessAssignment.objects.get(
schedule = schedule,
session__group__acronym = name.lower(),
timeslot__time = timeslot_time,
)
except (SchedTimeSessAssignment.DoesNotExist, SchedTimeSessAssignment.MultipleObjectsReturned):
return None
return assignment.session
def _get_urls_from_json(doc):
'''Returns list of dictonary titel,url from search results'''
urls = []
for item in doc['items']:
title = item['snippet']['title']
#params = dict(v=item['snippet']['resourceId']['videoId'], list=item['snippet']['playlistId'])
params = [('v',item['snippet']['resourceId']['videoId']), ('list',item['snippet']['playlistId'])]
url = settings.YOUTUBE_BASE_URL + '?' + urlencode(params)
urls.append(dict(title=title, url=url))
return urls
def import_audio_files(meeting):
'''
Checks for audio files and creates corresponding materials (docs) for the Session
@ -58,20 +135,9 @@ def import_audio_files(meeting):
).exclude(session__agenda_note__icontains='canceled').order_by('timeslot__time')
if not sessionassignments:
continue
doc = get_or_create_recording_document(filename,sessionassignments[0].session)
for sessionassignment in sessionassignments:
session = sessionassignment.session
if doc not in session.materials.all():
# add document to session
presentation = SessionPresentation.objects.create(
session=session,
document=doc,
rev=doc.rev)
session.sessionpresentation_set.add(presentation)
if not doc.docalias_set.filter(name__startswith='recording-{}-{}'.format(meeting.number,session.group.acronym)):
sequence = get_next_sequence(session.group,session.meeting,'recording')
name = 'recording-{}-{}-{}'.format(session.meeting.number,session.group.acronym,sequence)
doc.docalias_set.create(name=name)
url = settings.IETF_AUDIO_URL + 'ietf{}/{}'.format(meeting.number, filename)
doc = get_or_create_recording_document(url,sessionassignments[0].session)
attach_recording(doc, [ x.session for x in sessionassignments ])
else:
# use for reconciliation email
unmatched_files.append(filename)
@ -98,20 +164,30 @@ def get_timeslot_for_filename(filename):
except (ObjectDoesNotExist, KeyError):
return None
def attach_recording(doc, sessions):
'''Associate recording document with sessions'''
for session in sessions:
if doc not in session.materials.all():
# add document to session
presentation = SessionPresentation.objects.create(
session=session,
document=doc,
rev=doc.rev)
session.sessionpresentation_set.add(presentation)
if not doc.docalias_set.filter(name__startswith='recording-{}-{}'.format(session.meeting.number,session.group.acronym)):
sequence = get_next_sequence(session.group,session.meeting,'recording')
name = 'recording-{}-{}-{}'.format(session.meeting.number,session.group.acronym,sequence)
doc.docalias_set.create(name=name)
def normalize_room_name(name):
'''Returns room name converted to be used as portion of filename'''
return name.lower().replace(' ','').replace('/','_')
def get_or_create_recording_document(filename,session):
meeting = session.meeting
url = settings.IETF_AUDIO_URL + 'ietf{}/{}'.format(meeting.number, filename)
def get_or_create_recording_document(url,session):
try:
doc = Document.objects.get(external_url=url)
return doc
return Document.objects.get(external_url=url)
except ObjectDoesNotExist:
pass
return create_recording(session,url)
return create_recording(session,url)
def create_recording(session,url):
'''
@ -182,6 +258,10 @@ def mycomp(timeslot):
key = None
return key
# -------------------------------------------------
# End Recording Functions
# -------------------------------------------------
def get_progress_stats(sdate,edate):
'''
This function takes a date range and produces a dictionary of statistics / objects for
@ -489,7 +569,7 @@ def gen_agenda(context):
# get the text agenda from datatracker
url = 'https://datatracker.ietf.org/meeting/%s/agenda.txt' % meeting.number
text = urlopen(url).read()
text = urllib2.urlopen(url).read()
path = os.path.join(settings.SECR_PROCEEDINGS_DIR,meeting.number,'agenda.txt')
write_html(path,text)

View file

@ -1,24 +1,28 @@
import debug # pyflakes:ignore
import json
import os
import shutil
from apiclient.discovery import build
from apiclient.http import HttpMock
from mock import patch
from django.conf import settings
from django.urls import reverse
from ietf.doc.models import Document
from ietf.group.models import Group
from ietf.meeting.factories import SessionFactory
from ietf.meeting.models import Session, TimeSlot, SchedTimeSessAssignment
from ietf.meeting.test_data import make_meeting_test_data
from ietf.name.models import SessionStatusName
from ietf.utils.test_data import make_test_data
from ietf.utils.test_utils import TestCase
from ietf.utils.mail import outbox
from ietf.name.models import SessionStatusName
from ietf.meeting.factories import SessionFactory
from ietf.secr.proceedings.proc_utils import (create_proceedings, import_audio_files,
get_timeslot_for_filename, normalize_room_name, send_audio_import_warning,
get_or_create_recording_document, create_recording, get_next_sequence)
get_or_create_recording_document, create_recording, get_next_sequence,
get_youtube_playlistid, get_youtube_videos, import_youtube_video_urls,
_get_session, _get_urls_from_json)
SECR_USER='secretary'
@ -37,6 +41,68 @@ class ProceedingsTestCase(TestCase):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
class VideoRecordingTestCase(TestCase):
@patch('ietf.secr.proceedings.proc_utils.get_youtube_videos')
@patch('ietf.secr.proceedings.proc_utils.get_youtube_playlistid')
def test_import_youtube_video_urls(self, mock_playlistid, mock_videos):
meeting = make_meeting_test_data()
session = Session.objects.filter(meeting=meeting, group__acronym='mars').first()
title = self._get_video_title_for_session(session)
url = 'https://youtube.com?v=test'
mock_playlistid.return_value = 'PLC86T-6ZTP5g87jdxNqdWV5475U-yEE8M'
mock_videos.return_value = [{'title':title,'url':url}]
discovery = os.path.join(settings.BASE_DIR, "../test/data/youtube-discovery.json")
http = HttpMock(discovery, {'status': '200'})
import_youtube_video_urls(meeting=meeting, http=http)
doc = Document.objects.get(external_url=url)
self.assertTrue(doc in session.materials.all())
def _get_video_title_for_session(self, session):
'''Returns the youtube video title of a session recording given session'''
timeslot = session.official_timeslotassignment().timeslot
return "{prefix}-{group}-{date}".format(
prefix=session.meeting.type.slug + session.meeting.number,
group=session.group.acronym,
date=timeslot.time.strftime('%Y%m%d-%H%M')).upper()
def test_get_youtube_playlistid(self):
discovery = os.path.join(settings.BASE_DIR, "../test/data/youtube-discovery.json")
http = HttpMock(discovery, {'status': '200'})
youtube = build(settings.YOUTUBE_API_SERVICE_NAME, settings.YOUTUBE_API_VERSION,
developerKey='',http=http)
path = os.path.join(settings.BASE_DIR, "../test/data/youtube-playlistid.json")
http = HttpMock(path, {'status': '200'})
self.assertEqual(get_youtube_playlistid(youtube, 'IETF98', http=http),'PLC86T-test')
def test_get_youtube_videos(self):
discovery = os.path.join(settings.BASE_DIR, "../test/data/youtube-discovery.json")
http = HttpMock(discovery, {'status': '200'})
youtube = build(settings.YOUTUBE_API_SERVICE_NAME, settings.YOUTUBE_API_VERSION,
developerKey='',http=http)
path = os.path.join(settings.BASE_DIR, "../test/data/youtube-playlistitems.json")
http = HttpMock(path, {'status': '200'})
videos = get_youtube_videos(youtube, 'PLC86T', http=http)
self.assertEqual(len(videos),2)
def test_get_session(self):
meeting = make_meeting_test_data()
session = Session.objects.filter(meeting=meeting, group__acronym='mars').first()
number = meeting.number
name = session.group.acronym
date = session.official_timeslotassignment().timeslot.time.strftime('%Y%m%d')
time = session.official_timeslotassignment().timeslot.time.strftime('%H%M')
self.assertEqual(_get_session(number,name,date,time),session)
def test_get_urls_from_json(self):
path = os.path.join(settings.BASE_DIR, "../test/data/youtube-playlistitems.json")
with open(path) as f:
doc = json.load(f)
urls = _get_urls_from_json(doc)
self.assertEqual(len(urls),2)
self.assertEqual(urls[0]['title'],'IETF98 Wrap Up')
self.assertEqual(urls[0]['url'],'https://www.youtube.com/watch?v=lhYWB5FFkg4&list=PLC86T-6ZTP5jo6kIuqdyeYYhsKv9sUwG1')
class RecordingTestCase(TestCase):
def setUp(self):
self.meeting_recordings_dir = self.tempdir('meeting-recordings')

View file

@ -289,7 +289,8 @@ def recording(request, meeting_num):
session.
'''
meeting = get_object_or_404(Meeting, number=meeting_num)
sessions = meeting.session_set.filter(type__in=('session','plenary','other'),status='sched').order_by('group__acronym')
assignments = meeting.agenda.assignments.exclude(session__type__in=('reg','break')).order_by('session__group__acronym')
sessions = [ x.session for x in assignments ]
if request.method == 'POST':
form = RecordingForm(request.POST,meeting=meeting)

View file

@ -732,6 +732,11 @@ SECR_PPT2PDF_COMMAND = ['/usr/bin/soffice','--headless','--convert-to','pdf','--
REGISTRATION_ATTENDEES_BASE_URL = 'https://ietf.org/registration/attendees/'
NEW_PROCEEDINGS_START = 95
USE_ETAGS=True
YOUTUBE_API_KEY = ''
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
YOUTUBE_BASE_URL = 'https://www.youtube.com/watch'
YOUTUBE_IETF_CHANNEL_ID = 'UC8dtK9njBLdFnBahHFp0eZQ'
PRODUCTION_TIMEZONE = "America/Los_Angeles"

View file

@ -20,6 +20,7 @@ django-tastypie>=0.13.2
django-widget-tweaks>=1.3
docutils>=0.12
factory-boy>=2.8.1
google-api-python-client
# Faker # from factory-boy
hashids>=1.1.0
html5lib>=0.90,<0.99999999 # ietf.utils.html needs a rewrite for html5lib 1.x -- major code changes in sanitizer

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
{"regionCode": "US", "kind": "youtube#searchListResponse", "etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/eoV8llUEbIu5LXnwqBaLOkOK0Hg\"", "pageInfo": {"resultsPerPage": 1, "totalResults": 1}, "items": [{"snippet": {"thumbnails": {"default": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/default.jpg", "width": 120, "height": 90}, "high": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/hqdefault.jpg", "width": 480, "height": 360}, "medium": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/mqdefault.jpg", "width": 320, "height": 180}}, "title": "IETF98", "channelId": "UC8dtK9njBLdFnBahHFp0eZQ", "publishedAt": "2017-03-30T12:41:04.000Z", "liveBroadcastContent": "none", "channelTitle": "IETF - Internet Engineering Task Force", "description": "Videos from the IETF 98 Meeting held in Chicago, Illinois, United States 26-31 March 2017."}, "kind": "youtube#searchResult", "etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/X3dbZGRvgpvedtOP0KLGhZLg5UI\"", "id": {"kind": "youtube#playlist", "playlistId": "PLC86T-test"}}]}

View file

@ -0,0 +1 @@
{"items": [{"snippet": {"playlistId": "PLC86T-6ZTP5jo6kIuqdyeYYhsKv9sUwG1", "thumbnails": {"default": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/default.jpg", "width": 120, "height": 90}, "high": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/hqdefault.jpg", "width": 480, "height": 360}, "medium": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/mqdefault.jpg", "width": 320, "height": 180}, "maxres": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/maxresdefault.jpg", "width": 1280, "height": 720}, "standard": {"url": "https://i.ytimg.com/vi/lhYWB5FFkg4/sddefault.jpg", "width": 640, "height": 480}}, "title": "IETF98 Wrap Up", "resourceId": {"kind": "youtube#video", "videoId": "lhYWB5FFkg4"}, "channelId": "UC8dtK9njBLdFnBahHFp0eZQ", "publishedAt": "2017-04-06T13:32:39.000Z", "channelTitle": "IETF - Internet Engineering Task Force", "position": 0, "description": "Jari Arkko and Alissa Cooper recap some highlights the IETF 98 meeting held 26-31 March 2017 in Chicago, Illinois, United States"}, "kind": "youtube#playlistItem", "etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/eW_De3gQF2fRzN_rPBbX-kY7oBI\"", "id": "UExDODZULTZaVFA1am82a0l1cWR5ZVlZaHNLdjlzVXdHMS40OTQ5QjlEMDgzN0FBNUIw"}, {"snippet": {"playlistId": "PLC86T-6ZTP5jo6kIuqdyeYYhsKv9sUwG1", "thumbnails": {"default": {"url": "https://i.ytimg.com/vi/lPSTcBITbvs/default.jpg", "width": 120, "height": 90}, "high": {"url": "https://i.ytimg.com/vi/lPSTcBITbvs/hqdefault.jpg", "width": 480, "height": 360}, "medium": {"url": "https://i.ytimg.com/vi/lPSTcBITbvs/mqdefault.jpg", "width": 320, "height": 180}}, "title": "IETF 98 - QUIC Tutorial", "resourceId": {"kind": "youtube#video", "videoId": "lPSTcBITbvs"}, "channelId": "UC8dtK9njBLdFnBahHFp0eZQ", "publishedAt": "2017-03-30T12:41:35.000Z", "channelTitle": "IETF - Internet Engineering Task Force", "position": 1, "description": "A tutorial about the new QUIC protocol"}, "kind": "youtube#playlistItem", "etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/GhKVt6zTuEpFavgtf9GWlWuzX9s\"", "id": "UExDODZULTZaVFA1am82a0l1cWR5ZVlZaHNLdjlzVXdHMS41NkI0NEY2RDEwNTU3Q0M2"}], "kind": "youtube#playlistItemListResponse", "etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/jlFue-jZVpFMOuLUXQZH4Y0Lh3Y\"", "pageInfo": {"resultsPerPage": 2, "totalResults": 110}}