From 306431319a0936aac6a4dfb1c1834cc057c770d3 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Sun, 18 Mar 2018 16:10:14 +0000 Subject: [PATCH] Added a couple of meeting material crawls to check that internal material links aren't broken. - Legacy-Id: 14861 --- ietf/meeting/tests_views.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index 8a63222bb..1fdf62b2c 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -16,6 +16,7 @@ from django.contrib.auth.models import User from mock import patch from pyquery import PyQuery from StringIO import StringIO +from bs4 import BeautifulSoup from ietf.doc.models import Document from ietf.group.models import Group, Role @@ -1613,6 +1614,23 @@ class MaterialsTests(TestCase): settings.AGENDA_PATH = self.saved_agenda_path shutil.rmtree(self.materials_dir) + def crawl_materials(self, url, top): + seen = set() + def follow(url): + seen.add(url) + r = self.client.get(url) + self.assertEqual(r.status_code, 200) + if not ('.' in url and url.rsplit('.', 1)[1] in ['tgz', 'pdf', ]): + if r.content: + page = unicontent(r) + soup = BeautifulSoup(page, 'html.parser') + for a in soup('a'): + href = a.get('href') + path = urlparse.urlparse(href).path + if (path and path not in seen and path.startswith(top)): + follow(path) + follow(url) + def test_upload_bluesheets(self): session = SessionFactory(meeting__type_id='ietf') url = urlreverse('ietf.meeting.views.upload_session_bluesheets',kwargs={'num':session.meeting.number,'session_id':session.id}) @@ -1757,6 +1775,11 @@ class MaterialsTests(TestCase): r = self.client.post(url,dict(file=test_file)) self.assertContains(r, 'Could not identify the file encoding') + # Verify that we don't have dead links + url = url=urlreverse('ietf.meeting.views.session_details', kwargs={'num':session.meeting.number, 'acronym': session.group.acronym}) + top = '/meeting/%s/' % session.meeting.number + self.crawl_materials(url=url, top=top) + def test_upload_minutes_agenda_unscheduled(self): for doctype in ('minutes','agenda'): session = SessionFactory(meeting__type_id='ietf', add_to_schedule=False) @@ -1799,6 +1822,11 @@ class MaterialsTests(TestCase): doc = session.sessionpresentation_set.filter(document__type_id=doctype).first().document self.assertEqual(doc.rev,'00') + # Verify that we don't have dead links + url = url=urlreverse('ietf.meeting.views.session_details', kwargs={'num':session.meeting.number, 'acronym': session.group.acronym}) + top = '/meeting/%s/' % session.meeting.number + self.crawl_materials(url=url, top=top) + def test_upload_slides(self): session1 = SessionFactory(meeting__type_id='ietf')