From 087bf1e77c7e677e3db402737d686ab5b1c6ec75 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Sun, 3 Nov 2024 14:34:16 +0000 Subject: [PATCH] fix: prefer PDF in (+refactor) materials_document view (#8136) * fix: prefer PDF in materials_document(); refactor * style: Black * refactor: split urls entry into simpler pair It seems Django cannot reverse a URL pattern that uses "|" to combine options when one inclues a named variable and the other does not. * test: test extension choice * fix: fix test failures * refactor: get rid of io.open() * refactor: reunite url patterns Adds option for a trailing "/" when using an extension, which was not previously supported but should be somewhere between harmless and a feature. --- ietf/meeting/tests_views.py | 57 +++++++++++++++++++++++++++++++++++-- ietf/meeting/urls.py | 2 +- ietf/meeting/views.py | 54 +++++++++++++++++++++-------------- 3 files changed, 88 insertions(+), 25 deletions(-) diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index 0d291f12b..d227708aa 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -125,8 +125,12 @@ class BaseMeetingTestCase(TestCase): settings.MEETINGHOST_LOGO_PATH = self.saved_meetinghost_logo_path super().tearDown() - def write_materials_file(self, meeting, doc, content, charset="utf-8"): - path = os.path.join(self.materials_dir, "%s/%s/%s" % (meeting.number, doc.type_id, doc.uploaded_filename)) + def write_materials_file(self, meeting, doc, content, charset="utf-8", with_ext=None): + if with_ext is None: + filename = doc.uploaded_filename + else: + filename = Path(doc.uploaded_filename).with_suffix(with_ext) + path = os.path.join(self.materials_dir, "%s/%s/%s" % (meeting.number, doc.type_id, filename)) dirname = os.path.dirname(path) if not os.path.exists(dirname): @@ -753,7 +757,56 @@ class MeetingTests(BaseMeetingTestCase): ) self.assertEqual(len(q(f'a[href^="{edit_url}#session"]')), 1, f'Link to session_details page for {acro}') + def test_materials_document_extension_choice(self): + def _url(**kwargs): + return urlreverse("ietf.meeting.views.materials_document", kwargs=kwargs) + presentation = SessionPresentationFactory( + document__rev="00", + document__name="slides-whatever", + document__uploaded_filename="slides-whatever-00.txt", + document__type_id="slides", + document__states=(("reuse_policy", "single"),) + ) + session = presentation.session + meeting = session.meeting + # This is not a realistic set of files to exist, but is useful for testing. Normally, + # we'd have _either_ txt, pdf, or pptx + pdf. + self.write_materials_file(meeting, presentation.document, "Hi I'm a txt", with_ext=".txt") + self.write_materials_file(meeting, presentation.document, "Hi I'm a pptx", with_ext=".pptx") + + # with no rev, prefers the uploaded_filename + r = self.client.get(_url(document="slides-whatever", num=meeting.number)) # no rev + self.assertEqual(r.status_code, 200) + self.assertEqual(r.content.decode(), "Hi I'm a txt") + + # with a rev, prefers pptx because it comes first alphabetically + r = self.client.get(_url(document="slides-whatever-00", num=meeting.number)) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.content.decode(), "Hi I'm a pptx") + + # now create a pdf + self.write_materials_file(meeting, presentation.document, "Hi I'm a pdf", with_ext=".pdf") + + # with no rev, still prefers uploaded_filename + r = self.client.get(_url(document="slides-whatever", num=meeting.number)) # no rev + self.assertEqual(r.status_code, 200) + self.assertEqual(r.content.decode(), "Hi I'm a txt") + + # pdf should be preferred with a rev + r = self.client.get(_url(document="slides-whatever-00", num=meeting.number)) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.content.decode(), "Hi I'm a pdf") + + # and explicit extensions should, of course, be respected + for ext in ["pdf", "pptx", "txt"]: + r = self.client.get(_url(document="slides-whatever-00", num=meeting.number, ext=f".{ext}")) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.content.decode(), f"Hi I'm a {ext}") + + # and 404 should come up if the ext is not found + r = self.client.get(_url(document="slides-whatever-00", num=meeting.number, ext=".docx")) + self.assertEqual(r.status_code, 404) def test_materials_editable_groups(self): meeting = make_meeting_test_data() diff --git a/ietf/meeting/urls.py b/ietf/meeting/urls.py index f2e65578e..42a5de623 100644 --- a/ietf/meeting/urls.py +++ b/ietf/meeting/urls.py @@ -83,7 +83,7 @@ type_ietf_only_patterns_id_optional = [ url(r'^week-view(?:.html)?/?$', AgendaRedirectView.as_view(pattern_name='agenda', permanent=True)), url(r'^materials(?:.html)?/?$', views.materials), url(r'^request_minutes/?$', views.request_minutes), - url(r'^materials/%(document)s((?P\.[a-z0-9]+)|/)?$' % settings.URL_REGEXPS, views.materials_document), + url(r'^materials/%(document)s(?P\.[a-z0-9]+)?/?$' % settings.URL_REGEXPS, views.materials_document), url(r'^session/?$', views.materials_editable_groups), url(r'^proceedings(?:.html)?/?$', views.proceedings), url(r'^proceedings(?:.html)?/finalize/?$', views.finalize_proceedings), diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index bce6ccd1b..64fe73f48 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -4,7 +4,6 @@ import csv import datetime -import glob import io import itertools import json @@ -20,6 +19,7 @@ from calendar import timegm from collections import OrderedDict, Counter, deque, defaultdict, namedtuple from functools import partialmethod import jsonschema +from pathlib import Path from urllib.parse import parse_qs, unquote, urlencode, urlsplit, urlunsplit from tempfile import mkstemp from wsgiref.handlers import format_date_time @@ -250,7 +250,14 @@ def _get_materials_doc(meeting, name): @cache_page(1 * 60) def materials_document(request, document, num=None, ext=None): - meeting=get_meeting(num,type_in=['ietf','interim']) + """Materials document view + + :param request: Django request + :param document: Name of document without an extension + :param num: meeting number + :param ext: extension including preceding '.' + """ + meeting = get_meeting(num, type_in=["ietf", "interim"]) num = meeting.number try: doc, rev = _get_materials_doc(meeting=meeting, name=document) @@ -258,32 +265,34 @@ def materials_document(request, document, num=None, ext=None): raise Http404("No such document for meeting %s" % num) if not rev: - filename = doc.get_file_name() + filename = Path(doc.get_file_name()) else: - filename = os.path.join(doc.get_file_path(), document) + filename = Path(doc.get_file_path()) / document if ext: - if not filename.endswith(ext): - name, _ = os.path.splitext(filename) - filename = name + ext - else: - filenames = glob.glob(filename+'.*') - if filenames: - filename = filenames[0] - _, basename = os.path.split(filename) - if not os.path.exists(filename): - raise Http404("File not found: %s" % filename) + filename = filename.with_suffix(ext) + elif filename.suffix == "": + # If we don't already have an extension, try to add one + ext_choices = { + # Construct a map from suffix to full filename + fn.suffix: fn + for fn in sorted(filename.parent.glob(filename.stem + ".*")) + } + if len(ext_choices) > 0: + if ".pdf" in ext_choices: + filename = ext_choices[".pdf"] + else: + filename = list(ext_choices.values())[0] + if not filename.exists(): + raise Http404(f"File not found: {filename}") old_proceedings_format = meeting.number.isdigit() and int(meeting.number) <= 96 if settings.MEETING_MATERIALS_SERVE_LOCALLY or old_proceedings_format: - with io.open(filename, 'rb') as file: - bytes = file.read() - + bytes = filename.read_bytes() mtype, chset = get_mime_type(bytes) content_type = "%s; charset=%s" % (mtype, chset) - file_ext = os.path.splitext(filename) - if len(file_ext) == 2 and file_ext[1] == '.md' and mtype == 'text/plain': - sorted_accept = sort_accept_tuple(request.META.get('HTTP_ACCEPT')) + if filename.suffix == ".md" and mtype == "text/plain": + sorted_accept = sort_accept_tuple(request.META.get("HTTP_ACCEPT")) for atype in sorted_accept: if atype[0] == "text/markdown": content_type = content_type.replace("plain", "markdown", 1) @@ -293,7 +302,7 @@ def materials_document(request, document, num=None, ext=None): "minimal.html", { "content": markdown.markdown(bytes.decode(encoding=chset)), - "title": basename, + "title": filename.name, }, ) content_type = content_type.replace("plain", "html", 1) @@ -302,11 +311,12 @@ def materials_document(request, document, num=None, ext=None): break response = HttpResponse(bytes, content_type=content_type) - response['Content-Disposition'] = 'inline; filename="%s"' % basename + response["Content-Disposition"] = f'inline; filename="{filename.name}"' return response else: return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting)) + @login_required def materials_editable_groups(request, num=None): meeting = get_meeting(num)