Added a couple of meeting material crawls to check that internal material links aren't broken.

- Legacy-Id: 14861
This commit is contained in:
Henrik Levkowetz 2018-03-18 16:10:14 +00:00
parent 7f0de265e9
commit 306431319a

View file

@ -16,6 +16,7 @@ from django.contrib.auth.models import User
from mock import patch from mock import patch
from pyquery import PyQuery from pyquery import PyQuery
from StringIO import StringIO from StringIO import StringIO
from bs4 import BeautifulSoup
from ietf.doc.models import Document from ietf.doc.models import Document
from ietf.group.models import Group, Role from ietf.group.models import Group, Role
@ -1613,6 +1614,23 @@ class MaterialsTests(TestCase):
settings.AGENDA_PATH = self.saved_agenda_path settings.AGENDA_PATH = self.saved_agenda_path
shutil.rmtree(self.materials_dir) shutil.rmtree(self.materials_dir)
def crawl_materials(self, url, top):
seen = set()
def follow(url):
seen.add(url)
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
if not ('.' in url and url.rsplit('.', 1)[1] in ['tgz', 'pdf', ]):
if r.content:
page = unicontent(r)
soup = BeautifulSoup(page, 'html.parser')
for a in soup('a'):
href = a.get('href')
path = urlparse.urlparse(href).path
if (path and path not in seen and path.startswith(top)):
follow(path)
follow(url)
def test_upload_bluesheets(self): def test_upload_bluesheets(self):
session = SessionFactory(meeting__type_id='ietf') session = SessionFactory(meeting__type_id='ietf')
url = urlreverse('ietf.meeting.views.upload_session_bluesheets',kwargs={'num':session.meeting.number,'session_id':session.id}) url = urlreverse('ietf.meeting.views.upload_session_bluesheets',kwargs={'num':session.meeting.number,'session_id':session.id})
@ -1757,6 +1775,11 @@ class MaterialsTests(TestCase):
r = self.client.post(url,dict(file=test_file)) r = self.client.post(url,dict(file=test_file))
self.assertContains(r, 'Could not identify the file encoding') self.assertContains(r, 'Could not identify the file encoding')
# Verify that we don't have dead links
url = url=urlreverse('ietf.meeting.views.session_details', kwargs={'num':session.meeting.number, 'acronym': session.group.acronym})
top = '/meeting/%s/' % session.meeting.number
self.crawl_materials(url=url, top=top)
def test_upload_minutes_agenda_unscheduled(self): def test_upload_minutes_agenda_unscheduled(self):
for doctype in ('minutes','agenda'): for doctype in ('minutes','agenda'):
session = SessionFactory(meeting__type_id='ietf', add_to_schedule=False) session = SessionFactory(meeting__type_id='ietf', add_to_schedule=False)
@ -1799,6 +1822,11 @@ class MaterialsTests(TestCase):
doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document
self.assertEqual(doc.rev,'00') self.assertEqual(doc.rev,'00')
# Verify that we don't have dead links
url = url=urlreverse('ietf.meeting.views.session_details', kwargs={'num':session.meeting.number, 'acronym': session.group.acronym})
top = '/meeting/%s/' % session.meeting.number
self.crawl_materials(url=url, top=top)
def test_upload_slides(self): def test_upload_slides(self):
session1 = SessionFactory(meeting__type_id='ietf') session1 = SessionFactory(meeting__type_id='ietf')