fix: prefer PDF in (+refactor) materials_document view (#8136)

* fix: prefer PDF in materials_document(); refactor

* style: Black

* refactor: split urls entry into simpler pair

It seems Django cannot reverse a URL pattern that uses "|" to
combine options when one inclues a named variable and the other
does not.

* test: test extension choice

* fix: fix test failures

* refactor: get rid of io.open()

* refactor: reunite url patterns

Adds option for a trailing "/" when using an extension,
which was not previously supported but should be somewhere
between harmless and a feature.
This commit is contained in:
Jennifer Richards 2024-11-03 14:34:16 +00:00 committed by GitHub
parent 1b4f481fbe
commit 087bf1e77c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 88 additions and 25 deletions

View file

@ -125,8 +125,12 @@ class BaseMeetingTestCase(TestCase):
settings.MEETINGHOST_LOGO_PATH = self.saved_meetinghost_logo_path settings.MEETINGHOST_LOGO_PATH = self.saved_meetinghost_logo_path
super().tearDown() super().tearDown()
def write_materials_file(self, meeting, doc, content, charset="utf-8"): def write_materials_file(self, meeting, doc, content, charset="utf-8", with_ext=None):
path = os.path.join(self.materials_dir, "%s/%s/%s" % (meeting.number, doc.type_id, doc.uploaded_filename)) if with_ext is None:
filename = doc.uploaded_filename
else:
filename = Path(doc.uploaded_filename).with_suffix(with_ext)
path = os.path.join(self.materials_dir, "%s/%s/%s" % (meeting.number, doc.type_id, filename))
dirname = os.path.dirname(path) dirname = os.path.dirname(path)
if not os.path.exists(dirname): if not os.path.exists(dirname):
@ -753,7 +757,56 @@ class MeetingTests(BaseMeetingTestCase):
) )
self.assertEqual(len(q(f'a[href^="{edit_url}#session"]')), 1, f'Link to session_details page for {acro}') self.assertEqual(len(q(f'a[href^="{edit_url}#session"]')), 1, f'Link to session_details page for {acro}')
def test_materials_document_extension_choice(self):
def _url(**kwargs):
return urlreverse("ietf.meeting.views.materials_document", kwargs=kwargs)
presentation = SessionPresentationFactory(
document__rev="00",
document__name="slides-whatever",
document__uploaded_filename="slides-whatever-00.txt",
document__type_id="slides",
document__states=(("reuse_policy", "single"),)
)
session = presentation.session
meeting = session.meeting
# This is not a realistic set of files to exist, but is useful for testing. Normally,
# we'd have _either_ txt, pdf, or pptx + pdf.
self.write_materials_file(meeting, presentation.document, "Hi I'm a txt", with_ext=".txt")
self.write_materials_file(meeting, presentation.document, "Hi I'm a pptx", with_ext=".pptx")
# with no rev, prefers the uploaded_filename
r = self.client.get(_url(document="slides-whatever", num=meeting.number)) # no rev
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content.decode(), "Hi I'm a txt")
# with a rev, prefers pptx because it comes first alphabetically
r = self.client.get(_url(document="slides-whatever-00", num=meeting.number))
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content.decode(), "Hi I'm a pptx")
# now create a pdf
self.write_materials_file(meeting, presentation.document, "Hi I'm a pdf", with_ext=".pdf")
# with no rev, still prefers uploaded_filename
r = self.client.get(_url(document="slides-whatever", num=meeting.number)) # no rev
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content.decode(), "Hi I'm a txt")
# pdf should be preferred with a rev
r = self.client.get(_url(document="slides-whatever-00", num=meeting.number))
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content.decode(), "Hi I'm a pdf")
# and explicit extensions should, of course, be respected
for ext in ["pdf", "pptx", "txt"]:
r = self.client.get(_url(document="slides-whatever-00", num=meeting.number, ext=f".{ext}"))
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content.decode(), f"Hi I'm a {ext}")
# and 404 should come up if the ext is not found
r = self.client.get(_url(document="slides-whatever-00", num=meeting.number, ext=".docx"))
self.assertEqual(r.status_code, 404)
def test_materials_editable_groups(self): def test_materials_editable_groups(self):
meeting = make_meeting_test_data() meeting = make_meeting_test_data()

View file

@ -83,7 +83,7 @@ type_ietf_only_patterns_id_optional = [
url(r'^week-view(?:.html)?/?$', AgendaRedirectView.as_view(pattern_name='agenda', permanent=True)), url(r'^week-view(?:.html)?/?$', AgendaRedirectView.as_view(pattern_name='agenda', permanent=True)),
url(r'^materials(?:.html)?/?$', views.materials), url(r'^materials(?:.html)?/?$', views.materials),
url(r'^request_minutes/?$', views.request_minutes), url(r'^request_minutes/?$', views.request_minutes),
url(r'^materials/%(document)s((?P<ext>\.[a-z0-9]+)|/)?$' % settings.URL_REGEXPS, views.materials_document), url(r'^materials/%(document)s(?P<ext>\.[a-z0-9]+)?/?$' % settings.URL_REGEXPS, views.materials_document),
url(r'^session/?$', views.materials_editable_groups), url(r'^session/?$', views.materials_editable_groups),
url(r'^proceedings(?:.html)?/?$', views.proceedings), url(r'^proceedings(?:.html)?/?$', views.proceedings),
url(r'^proceedings(?:.html)?/finalize/?$', views.finalize_proceedings), url(r'^proceedings(?:.html)?/finalize/?$', views.finalize_proceedings),

View file

@ -4,7 +4,6 @@
import csv import csv
import datetime import datetime
import glob
import io import io
import itertools import itertools
import json import json
@ -20,6 +19,7 @@ from calendar import timegm
from collections import OrderedDict, Counter, deque, defaultdict, namedtuple from collections import OrderedDict, Counter, deque, defaultdict, namedtuple
from functools import partialmethod from functools import partialmethod
import jsonschema import jsonschema
from pathlib import Path
from urllib.parse import parse_qs, unquote, urlencode, urlsplit, urlunsplit from urllib.parse import parse_qs, unquote, urlencode, urlsplit, urlunsplit
from tempfile import mkstemp from tempfile import mkstemp
from wsgiref.handlers import format_date_time from wsgiref.handlers import format_date_time
@ -250,7 +250,14 @@ def _get_materials_doc(meeting, name):
@cache_page(1 * 60) @cache_page(1 * 60)
def materials_document(request, document, num=None, ext=None): def materials_document(request, document, num=None, ext=None):
meeting=get_meeting(num,type_in=['ietf','interim']) """Materials document view
:param request: Django request
:param document: Name of document without an extension
:param num: meeting number
:param ext: extension including preceding '.'
"""
meeting = get_meeting(num, type_in=["ietf", "interim"])
num = meeting.number num = meeting.number
try: try:
doc, rev = _get_materials_doc(meeting=meeting, name=document) doc, rev = _get_materials_doc(meeting=meeting, name=document)
@ -258,32 +265,34 @@ def materials_document(request, document, num=None, ext=None):
raise Http404("No such document for meeting %s" % num) raise Http404("No such document for meeting %s" % num)
if not rev: if not rev:
filename = doc.get_file_name() filename = Path(doc.get_file_name())
else: else:
filename = os.path.join(doc.get_file_path(), document) filename = Path(doc.get_file_path()) / document
if ext: if ext:
if not filename.endswith(ext): filename = filename.with_suffix(ext)
name, _ = os.path.splitext(filename) elif filename.suffix == "":
filename = name + ext # If we don't already have an extension, try to add one
else: ext_choices = {
filenames = glob.glob(filename+'.*') # Construct a map from suffix to full filename
if filenames: fn.suffix: fn
filename = filenames[0] for fn in sorted(filename.parent.glob(filename.stem + ".*"))
_, basename = os.path.split(filename) }
if not os.path.exists(filename): if len(ext_choices) > 0:
raise Http404("File not found: %s" % filename) if ".pdf" in ext_choices:
filename = ext_choices[".pdf"]
else:
filename = list(ext_choices.values())[0]
if not filename.exists():
raise Http404(f"File not found: {filename}")
old_proceedings_format = meeting.number.isdigit() and int(meeting.number) <= 96 old_proceedings_format = meeting.number.isdigit() and int(meeting.number) <= 96
if settings.MEETING_MATERIALS_SERVE_LOCALLY or old_proceedings_format: if settings.MEETING_MATERIALS_SERVE_LOCALLY or old_proceedings_format:
with io.open(filename, 'rb') as file: bytes = filename.read_bytes()
bytes = file.read()
mtype, chset = get_mime_type(bytes) mtype, chset = get_mime_type(bytes)
content_type = "%s; charset=%s" % (mtype, chset) content_type = "%s; charset=%s" % (mtype, chset)
file_ext = os.path.splitext(filename) if filename.suffix == ".md" and mtype == "text/plain":
if len(file_ext) == 2 and file_ext[1] == '.md' and mtype == 'text/plain': sorted_accept = sort_accept_tuple(request.META.get("HTTP_ACCEPT"))
sorted_accept = sort_accept_tuple(request.META.get('HTTP_ACCEPT'))
for atype in sorted_accept: for atype in sorted_accept:
if atype[0] == "text/markdown": if atype[0] == "text/markdown":
content_type = content_type.replace("plain", "markdown", 1) content_type = content_type.replace("plain", "markdown", 1)
@ -293,7 +302,7 @@ def materials_document(request, document, num=None, ext=None):
"minimal.html", "minimal.html",
{ {
"content": markdown.markdown(bytes.decode(encoding=chset)), "content": markdown.markdown(bytes.decode(encoding=chset)),
"title": basename, "title": filename.name,
}, },
) )
content_type = content_type.replace("plain", "html", 1) content_type = content_type.replace("plain", "html", 1)
@ -302,11 +311,12 @@ def materials_document(request, document, num=None, ext=None):
break break
response = HttpResponse(bytes, content_type=content_type) response = HttpResponse(bytes, content_type=content_type)
response['Content-Disposition'] = 'inline; filename="%s"' % basename response["Content-Disposition"] = f'inline; filename="{filename.name}"'
return response return response
else: else:
return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting)) return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting))
@login_required @login_required
def materials_editable_groups(request, num=None): def materials_editable_groups(request, num=None):
meeting = get_meeting(num) meeting = get_meeting(num)