From b65a37b6e8f6cfe105cd89c61c7046fff9d21523 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 14 Nov 2024 18:05:38 -0400 Subject: [PATCH] feat: POST for document search requests (#8206) * refactor: doc search via POST (WIP) Changes the search view to use a POST instead of a GET. Refactors cache key computation to use cleaned data. Still todo: * refactor frontpage view to match * refactor menubar search (?) * refactor stats view that uses SearchForm * revive or drop the "backwards compatibility" branch * feat: convert GET search to POST Still todo: * refactor frontpage view to match * refactor menubar search (?) * refactor stats view that uses SearchForm * fix: revert frontpage changes, search works Still todo: * refactor stats view that uses SearchForm * fix: define vars in all branches * refactor: update stats use of SearchForm * chore: improve message * fix: remove lint * chore: comments re: QuerySetAny * test: test query string search params * style: Black * test: refactor test_search() * test: refactor test_search_became_rfc() * test: use scroll_and_click helper --- ietf/api/serializer.py | 1 + ietf/doc/tests.py | 155 +++++++++++++++------ ietf/doc/tests_js.py | 4 +- ietf/doc/utils.py | 11 +- ietf/doc/views_search.py | 107 +++++++++----- ietf/liaisons/forms.py | 1 + ietf/liaisons/widgets.py | 1 + ietf/templates/doc/search/search_form.html | 2 + 8 files changed, 196 insertions(+), 86 deletions(-) diff --git a/ietf/api/serializer.py b/ietf/api/serializer.py index 27f194c5b..ca34ea649 100644 --- a/ietf/api/serializer.py +++ b/ietf/api/serializer.py @@ -146,6 +146,7 @@ class AdminJsonSerializer(Serializer): field_value = None else: field_value = field + # Need QuerySetAny instead of QuerySet until django-stubs 5.0.1 if isinstance(field_value, QuerySetAny) or isinstance(field_value, list): self._current[name] = dict([ (rel.pk, self.expand_related(rel, name)) for rel in field_value ]) else: diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index a1f9f8da2..fa655cb88 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -71,96 +71,163 @@ from ietf.doc.utils_search import AD_WORKLOAD class SearchTests(TestCase): - def test_search(self): + def test_search_handles_querystring_parameters(self): + """Search parameters via querystring should not actually search""" + url = urlreverse("ietf.doc.views_search.search") + r = self.client.get(url + "?name=some-document-name&oldDrafts=on") + # Check that we got a valid response and that the warning about query string parameters is shown. + self.assertContains( + r, + "Searching via the URL query string is no longer supported.", + status_code=200, + ) + # Check that the form was filled in correctly (not an exhaustive check, but different from the + # form defaults) + pq = PyQuery(r.content) + self.assertEqual( + pq("form#search_form input#id_name").attr("value"), + "some-document-name", + "The name field should be set in the SearchForm", + ) + self.assertEqual( + pq("form#search_form input#id_olddrafts").attr("checked"), + "checked", + "The old drafts checkbox should be selected in the SearchForm", + ) + self.assertIsNone( + pq("form#search_form input#id_rfcs").attr("checked"), + "The RFCs checkbox should not be selected in the SearchForm", + ) + self.assertIsNone( + pq("form#search_form input#id_activedrafts").attr("checked"), + "The active drafts checkbox should not be selected in the SearchForm", + ) - draft = WgDraftFactory(name='draft-ietf-mars-test',group=GroupFactory(acronym='mars',parent=Group.objects.get(acronym='farfut')),authors=[PersonFactory()],ad=PersonFactory()) + def test_search(self): + draft = WgDraftFactory( + name="draft-ietf-mars-test", + group=GroupFactory(acronym="mars", parent=Group.objects.get(acronym="farfut")), + authors=[PersonFactory()], + ad=PersonFactory(), + ) rfc = WgRfcFactory() draft.set_state(State.objects.get(used=True, type="draft-iesg", slug="pub-req")) - old_draft = IndividualDraftFactory(name='draft-foo-mars-test',authors=[PersonFactory()],title="Optimizing Martian Network Topologies") + old_draft = IndividualDraftFactory( + name="draft-foo-mars-test", + authors=[PersonFactory()], + title="Optimizing Martian Network Topologies", + ) old_draft.set_state(State.objects.get(used=True, type="draft", slug="expired")) - - base_url = urlreverse('ietf.doc.views_search.search') - + + url = urlreverse("ietf.doc.views_search.search") + # only show form, no search yet - r = self.client.get(base_url) + r = self.client.get(url) self.assertEqual(r.status_code, 200) - + # no match - r = self.client.get(base_url + "?activedrafts=on&name=thisisnotadocumentname") + r = self.client.post(url, {"activedrafts": "on", "name": "thisisnotadocumentname"}) self.assertEqual(r.status_code, 200) self.assertContains(r, "No documents match") - - r = self.client.get(base_url + "?rfcs=on&name=xyzzy") + + r = self.client.post(url, {"rfcs": "on", "name": "xyzzy"}) self.assertEqual(r.status_code, 200) self.assertContains(r, "No documents match") - - r = self.client.get(base_url + "?olddrafts=on&name=bar") + + r = self.client.post(url, {"olddrafts": "on", "name": "bar"}) self.assertEqual(r.status_code, 200) self.assertContains(r, "No documents match") - - r = self.client.get(base_url + "?olddrafts=on&name=foo") + + r = self.client.post(url, {"olddrafts": "on", "name": "foo"}) self.assertEqual(r.status_code, 200) self.assertContains(r, "draft-foo-mars-test") - - r = self.client.get(base_url + "?olddrafts=on&name=FoO") # mixed case + + r = self.client.post(url, {"olddrafts": "on", "name": "FoO"}) # mixed case self.assertEqual(r.status_code, 200) self.assertContains(r, "draft-foo-mars-test") - + # find by RFC - r = self.client.get(base_url + "?rfcs=on&name=%s" % rfc.name) + r = self.client.post(url, {"rfcs": "on", "name": rfc.name}) self.assertEqual(r.status_code, 200) self.assertContains(r, rfc.title) - + # find by active/inactive - + draft.set_state(State.objects.get(type="draft", slug="active")) - r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.name) + r = self.client.post(url, {"activedrafts": "on", "name": draft.name}) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + draft.set_state(State.objects.get(type="draft", slug="expired")) - r = self.client.get(base_url + "?olddrafts=on&name=%s" % draft.name) + r = self.client.post(url, {"olddrafts": "on", "name": draft.name}) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + draft.set_state(State.objects.get(type="draft", slug="active")) - + # find by title - r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.title.split()[0]) + r = self.client.post(url, {"activedrafts": "on", "name": draft.title.split()[0]}) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by author - r = self.client.get(base_url + "?activedrafts=on&by=author&author=%s" % draft.documentauthor_set.first().person.name_parts()[1]) + r = self.client.post( + url, + { + "activedrafts": "on", + "by": "author", + "author": draft.documentauthor_set.first().person.name_parts()[1], + }, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by group - r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym) + r = self.client.post( + url, + {"activedrafts": "on", "by": "group", "group": draft.group.acronym}, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - - r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym.swapcase()) + + r = self.client.post( + url, + {"activedrafts": "on", "by": "group", "group": draft.group.acronym.swapcase()}, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by area - r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id) + r = self.client.post( + url, + {"activedrafts": "on", "by": "area", "area": draft.group.parent_id}, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by area - r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id) + r = self.client.post( + url, + {"activedrafts": "on", "by": "area", "area": draft.group.parent_id}, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by AD - r = self.client.get(base_url + "?activedrafts=on&by=ad&ad=%s" % draft.ad_id) + r = self.client.post(url, {"activedrafts": "on", "by": "ad", "ad": draft.ad_id}) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) - + # find by IESG state - r = self.client.get(base_url + "?activedrafts=on&by=state&state=%s&substate=" % draft.get_state("draft-iesg").pk) + r = self.client.post( + url, + { + "activedrafts": "on", + "by": "state", + "state": draft.get_state("draft-iesg").pk, + "substate": "", + }, + ) self.assertEqual(r.status_code, 200) self.assertContains(r, draft.title) @@ -169,15 +236,15 @@ class SearchTests(TestCase): rfc = WgRfcFactory() draft.set_state(State.objects.get(type="draft", slug="rfc")) draft.relateddocument_set.create(relationship_id="became_rfc", target=rfc) - base_url = urlreverse('ietf.doc.views_search.search') + url = urlreverse("ietf.doc.views_search.search") # find by RFC - r = self.client.get(base_url + f"?rfcs=on&name={rfc.name}") + r = self.client.post(url, {"rfcs": "on", "name": rfc.name}) self.assertEqual(r.status_code, 200) self.assertContains(r, rfc.title) # find by draft - r = self.client.get(base_url + f"?activedrafts=on&rfcs=on&name={draft.name}") + r = self.client.post(url, {"activedrafts": "on", "rfcs": "on", "name": draft.name}) self.assertEqual(r.status_code, 200) self.assertContains(r, rfc.title) diff --git a/ietf/doc/tests_js.py b/ietf/doc/tests_js.py index acd74c4a0..9a5aad13b 100644 --- a/ietf/doc/tests_js.py +++ b/ietf/doc/tests_js.py @@ -92,10 +92,8 @@ class EditAuthorsTests(IetfSeleniumTestCase): self.assertEqual(len(author_forms), 1) # get the "add author" button so we can add blank author forms - add_author_button = self.driver.find_element(By.ID, 'add-author-button') for index, auth in enumerate(authors): - self.scroll_to_element(add_author_button) # Can only click if it's in view! - add_author_button.click() # Create a new form. Automatically scrolls to it. + self.scroll_and_click((By.ID, 'add-author-button')) # Create new form. Automatically scrolls to it. author_forms = authors_list.find_elements(By.CLASS_NAME, 'author-panel') authors_added = index + 1 self.assertEqual(len(author_forms), authors_added + 1) # Started with 1 author, hence +1 diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 97243a20d..9b2570d8b 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -3,9 +3,7 @@ import datetime -import hashlib import io -import json import math import os import re @@ -348,6 +346,7 @@ def augment_events_with_revision(doc, events): """Take a set of events for doc and add a .rev attribute with the revision they refer to by checking NewRevisionDocEvents.""" + # Need QuerySetAny instead of QuerySet until django-stubs 5.0.1 if isinstance(events, QuerySetAny): qs = events.filter(newrevisiondocevent__isnull=False) else: @@ -1047,12 +1046,8 @@ def make_rev_history(doc): return sorted(history, key=lambda x: x['published']) -def get_search_cache_key(params): - from ietf.doc.views_search import SearchForm - fields = set(SearchForm.base_fields) - set(['sort',]) - kwargs = dict([ (k,v) for (k,v) in list(params.items()) if k in fields ]) - key = "doc:document:search:" + hashlib.sha512(json.dumps(kwargs, sort_keys=True).encode('utf-8')).hexdigest() - return key +def get_search_cache_key(key_fragment): + return f"doc:document:search:{key_fragment}" def build_file_urls(doc: Union[Document, DocHistory]): diff --git a/ietf/doc/views_search.py b/ietf/doc/views_search.py index 4fa3b2560..7b71dd77b 100644 --- a/ietf/doc/views_search.py +++ b/ietf/doc/views_search.py @@ -37,6 +37,8 @@ import re import datetime import copy +import hashlib +import json import operator from collections import defaultdict @@ -44,16 +46,17 @@ from functools import reduce from django import forms from django.conf import settings +from django.contrib import messages from django.core.cache import cache, caches from django.urls import reverse as urlreverse from django.db.models import Q -from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect, QueryDict +from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect from django.shortcuts import render from django.utils import timezone from django.utils.html import strip_tags from django.utils.cache import _generate_cache_key # type: ignore from django.utils.text import slugify - +from django_stubs_ext import QuerySetAny import debug # pyflakes:ignore @@ -145,6 +148,29 @@ class SearchForm(forms.Form): q['irtfstate'] = None return q + def cache_key_fragment(self): + """Hash a bound form to get a value for use in a cache key + + Raises a ValueError if the form is not valid. + """ + def _serialize_value(val): + # Need QuerySetAny instead of QuerySet until django-stubs 5.0.1 + if isinstance(val, QuerySetAny): + return [item.pk for item in val] + else: + return getattr(val, "pk", val) # use pk if present, else value + + if not self.is_valid(): + raise ValueError(f"SearchForm invalid: {self.errors}") + contents = { + field_name: _serialize_value(field_value) + for field_name, field_value in self.cleaned_data.items() + if field_name != "sort" and field_value is not None + } + contents_json = json.dumps(contents, sort_keys=True) + return hashlib.sha512(contents_json.encode("utf-8")).hexdigest() + + def retrieve_search_results(form, all_types=False): """Takes a validated SearchForm and return the results.""" @@ -256,45 +282,64 @@ def retrieve_search_results(form, all_types=False): return docs + def search(request): - if request.GET: - # backwards compatibility - get_params = request.GET.copy() - if 'activeDrafts' in request.GET: - get_params['activedrafts'] = request.GET['activeDrafts'] - if 'oldDrafts' in request.GET: - get_params['olddrafts'] = request.GET['oldDrafts'] - if 'subState' in request.GET: - get_params['substate'] = request.GET['subState'] + """Search for a draft""" + # defaults for results / meta + results = [] + meta = {"by": None, "searching": False} - form = SearchForm(get_params) - if not form.is_valid(): - return HttpResponseBadRequest("form not valid: %s" % form.errors) - - cache_key = get_search_cache_key(get_params) - cached_val = cache.get(cache_key) - if cached_val: - [results, meta] = cached_val - else: - results = retrieve_search_results(form) - results, meta = prepare_document_table(request, results, get_params) - cache.set(cache_key, [results, meta]) # for settings.CACHE_MIDDLEWARE_SECONDS - log(f"Search results computed for {get_params}") - meta['searching'] = True + if request.method == "POST": + form = SearchForm(data=request.POST) + if form.is_valid(): + cache_key = get_search_cache_key(form.cache_key_fragment()) + cached_val = cache.get(cache_key) + if cached_val: + [results, meta] = cached_val + else: + results = retrieve_search_results(form) + results, meta = prepare_document_table( + request, results, form.cleaned_data + ) + cache.set( + cache_key, [results, meta] + ) # for settings.CACHE_MIDDLEWARE_SECONDS + log(f"Search results computed for {form.cleaned_data}") + meta["searching"] = True else: - form = SearchForm() - results = [] - meta = { 'by': None, 'searching': False } - get_params = QueryDict('') + if request.GET: + # backwards compatibility - fill in the form + get_params = request.GET.copy() + if "activeDrafts" in request.GET: + get_params["activedrafts"] = request.GET["activeDrafts"] + if "oldDrafts" in request.GET: + get_params["olddrafts"] = request.GET["oldDrafts"] + if "subState" in request.GET: + get_params["substate"] = request.GET["subState"] + form = SearchForm(data=get_params) + messages.error( + request, + ( + "Searching via the URL query string is no longer supported. " + "The form below has been filled in with the parameters from your request. " + 'To execute your search, please click "Search".' + ), + ) + else: + form = SearchForm() - return render(request, 'doc/search/search.html', { - 'form':form, 'docs':results, 'meta':meta, 'queryargs':get_params.urlencode() }, + return render( + request, + "doc/search/search.html", + context={"form": form, "docs": results, "meta": meta}, ) + def frontpage(request): form = SearchForm() return render(request, 'doc/frontpage.html', {'form':form}) + def search_for_name(request, name): def find_unique(n): exact = Document.objects.filter(name__iexact=n).first() diff --git a/ietf/liaisons/forms.py b/ietf/liaisons/forms.py index 1d91041b2..a75028bf7 100644 --- a/ietf/liaisons/forms.py +++ b/ietf/liaisons/forms.py @@ -203,6 +203,7 @@ class SearchLiaisonForm(forms.Form): class CustomModelMultipleChoiceField(ModelMultipleChoiceField): '''If value is a QuerySet, return it as is (for use in widget.render)''' def prepare_value(self, value): + # Need QuerySetAny instead of QuerySet until django-stubs 5.0.1 if isinstance(value, QuerySetAny): return value if (hasattr(value, '__iter__') and diff --git a/ietf/liaisons/widgets.py b/ietf/liaisons/widgets.py index 74368e83f..3d4f2d13a 100644 --- a/ietf/liaisons/widgets.py +++ b/ietf/liaisons/widgets.py @@ -35,6 +35,7 @@ class ShowAttachmentsWidget(Widget): html = '
' % name html += 'No files attached' html += '
' + # Need QuerySetAny instead of QuerySet until django-stubs 5.0.1 if value and isinstance(value, QuerySetAny): for attachment in value: html += '%s ' % (conditional_escape(attachment.document.get_href()), conditional_escape(attachment.document.title)) diff --git a/ietf/templates/doc/search/search_form.html b/ietf/templates/doc/search/search_form.html index d4f463ec6..6c91894c8 100644 --- a/ietf/templates/doc/search/search_form.html +++ b/ietf/templates/doc/search/search_form.html @@ -4,8 +4,10 @@ {% load widget_tweaks %} {% load ietf_filters %}
+ {% csrf_token %}
{{ form.name|add_class:"form-control"|attr:"placeholder:Document name/title/RFC number"|attr:"aria-label:Document name/title/RFC number" }}