feat: POST for document search requests (#8206)

* refactor: doc search via POST (WIP)

Changes the search view to use a POST instead of a GET. Refactors cache key computation to use cleaned data.

Still todo:
 * refactor frontpage view to match
 * refactor menubar search (?)
 * refactor stats view that uses SearchForm
 * revive or drop the "backwards compatibility" branch

* feat: convert GET search to POST

Still todo:
 * refactor frontpage view to match
 * refactor menubar search (?)
 * refactor stats view that uses SearchForm

* fix: revert frontpage changes, search works

Still todo:
 * refactor stats view that uses SearchForm

* fix: define vars in all branches

* refactor: update stats use of SearchForm

* chore: improve message

* fix: remove lint

* chore: comments re: QuerySetAny

* test: test query string search params

* style: Black

* test: refactor test_search()

* test: refactor test_search_became_rfc()

* test: use scroll_and_click helper
This commit is contained in:
Jennifer Richards 2024-11-14 18:05:38 -04:00 committed by GitHub
parent 901fdd8d44
commit b65a37b6e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 196 additions and 86 deletions

View file

@ -146,6 +146,7 @@ class AdminJsonSerializer(Serializer):
field_value = None field_value = None
else: else:
field_value = field field_value = field
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(field_value, QuerySetAny) or isinstance(field_value, list): if isinstance(field_value, QuerySetAny) or isinstance(field_value, list):
self._current[name] = dict([ (rel.pk, self.expand_related(rel, name)) for rel in field_value ]) self._current[name] = dict([ (rel.pk, self.expand_related(rel, name)) for rel in field_value ])
else: else:

View file

@ -71,96 +71,163 @@ from ietf.doc.utils_search import AD_WORKLOAD
class SearchTests(TestCase): class SearchTests(TestCase):
def test_search(self): def test_search_handles_querystring_parameters(self):
"""Search parameters via querystring should not actually search"""
url = urlreverse("ietf.doc.views_search.search")
r = self.client.get(url + "?name=some-document-name&oldDrafts=on")
# Check that we got a valid response and that the warning about query string parameters is shown.
self.assertContains(
r,
"Searching via the URL query string is no longer supported.",
status_code=200,
)
# Check that the form was filled in correctly (not an exhaustive check, but different from the
# form defaults)
pq = PyQuery(r.content)
self.assertEqual(
pq("form#search_form input#id_name").attr("value"),
"some-document-name",
"The name field should be set in the SearchForm",
)
self.assertEqual(
pq("form#search_form input#id_olddrafts").attr("checked"),
"checked",
"The old drafts checkbox should be selected in the SearchForm",
)
self.assertIsNone(
pq("form#search_form input#id_rfcs").attr("checked"),
"The RFCs checkbox should not be selected in the SearchForm",
)
self.assertIsNone(
pq("form#search_form input#id_activedrafts").attr("checked"),
"The active drafts checkbox should not be selected in the SearchForm",
)
draft = WgDraftFactory(name='draft-ietf-mars-test',group=GroupFactory(acronym='mars',parent=Group.objects.get(acronym='farfut')),authors=[PersonFactory()],ad=PersonFactory()) def test_search(self):
draft = WgDraftFactory(
name="draft-ietf-mars-test",
group=GroupFactory(acronym="mars", parent=Group.objects.get(acronym="farfut")),
authors=[PersonFactory()],
ad=PersonFactory(),
)
rfc = WgRfcFactory() rfc = WgRfcFactory()
draft.set_state(State.objects.get(used=True, type="draft-iesg", slug="pub-req")) draft.set_state(State.objects.get(used=True, type="draft-iesg", slug="pub-req"))
old_draft = IndividualDraftFactory(name='draft-foo-mars-test',authors=[PersonFactory()],title="Optimizing Martian Network Topologies") old_draft = IndividualDraftFactory(
name="draft-foo-mars-test",
authors=[PersonFactory()],
title="Optimizing Martian Network Topologies",
)
old_draft.set_state(State.objects.get(used=True, type="draft", slug="expired")) old_draft.set_state(State.objects.get(used=True, type="draft", slug="expired"))
base_url = urlreverse('ietf.doc.views_search.search') url = urlreverse("ietf.doc.views_search.search")
# only show form, no search yet # only show form, no search yet
r = self.client.get(base_url) r = self.client.get(url)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
# no match # no match
r = self.client.get(base_url + "?activedrafts=on&name=thisisnotadocumentname") r = self.client.post(url, {"activedrafts": "on", "name": "thisisnotadocumentname"})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match") self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?rfcs=on&name=xyzzy") r = self.client.post(url, {"rfcs": "on", "name": "xyzzy"})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match") self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?olddrafts=on&name=bar") r = self.client.post(url, {"olddrafts": "on", "name": "bar"})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match") self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?olddrafts=on&name=foo") r = self.client.post(url, {"olddrafts": "on", "name": "foo"})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, "draft-foo-mars-test") self.assertContains(r, "draft-foo-mars-test")
r = self.client.get(base_url + "?olddrafts=on&name=FoO") # mixed case r = self.client.post(url, {"olddrafts": "on", "name": "FoO"}) # mixed case
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, "draft-foo-mars-test") self.assertContains(r, "draft-foo-mars-test")
# find by RFC # find by RFC
r = self.client.get(base_url + "?rfcs=on&name=%s" % rfc.name) r = self.client.post(url, {"rfcs": "on", "name": rfc.name})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title) self.assertContains(r, rfc.title)
# find by active/inactive # find by active/inactive
draft.set_state(State.objects.get(type="draft", slug="active")) draft.set_state(State.objects.get(type="draft", slug="active"))
r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.name) r = self.client.post(url, {"activedrafts": "on", "name": draft.name})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
draft.set_state(State.objects.get(type="draft", slug="expired")) draft.set_state(State.objects.get(type="draft", slug="expired"))
r = self.client.get(base_url + "?olddrafts=on&name=%s" % draft.name) r = self.client.post(url, {"olddrafts": "on", "name": draft.name})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
draft.set_state(State.objects.get(type="draft", slug="active")) draft.set_state(State.objects.get(type="draft", slug="active"))
# find by title # find by title
r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.title.split()[0]) r = self.client.post(url, {"activedrafts": "on", "name": draft.title.split()[0]})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by author # find by author
r = self.client.get(base_url + "?activedrafts=on&by=author&author=%s" % draft.documentauthor_set.first().person.name_parts()[1]) r = self.client.post(
url,
{
"activedrafts": "on",
"by": "author",
"author": draft.documentauthor_set.first().person.name_parts()[1],
},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by group # find by group
r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym) r = self.client.post(
url,
{"activedrafts": "on", "by": "group", "group": draft.group.acronym},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym.swapcase()) r = self.client.post(
url,
{"activedrafts": "on", "by": "group", "group": draft.group.acronym.swapcase()},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by area # find by area
r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id) r = self.client.post(
url,
{"activedrafts": "on", "by": "area", "area": draft.group.parent_id},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by area # find by area
r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id) r = self.client.post(
url,
{"activedrafts": "on", "by": "area", "area": draft.group.parent_id},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by AD # find by AD
r = self.client.get(base_url + "?activedrafts=on&by=ad&ad=%s" % draft.ad_id) r = self.client.post(url, {"activedrafts": "on", "by": "ad", "ad": draft.ad_id})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
# find by IESG state # find by IESG state
r = self.client.get(base_url + "?activedrafts=on&by=state&state=%s&substate=" % draft.get_state("draft-iesg").pk) r = self.client.post(
url,
{
"activedrafts": "on",
"by": "state",
"state": draft.get_state("draft-iesg").pk,
"substate": "",
},
)
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title) self.assertContains(r, draft.title)
@ -169,15 +236,15 @@ class SearchTests(TestCase):
rfc = WgRfcFactory() rfc = WgRfcFactory()
draft.set_state(State.objects.get(type="draft", slug="rfc")) draft.set_state(State.objects.get(type="draft", slug="rfc"))
draft.relateddocument_set.create(relationship_id="became_rfc", target=rfc) draft.relateddocument_set.create(relationship_id="became_rfc", target=rfc)
base_url = urlreverse('ietf.doc.views_search.search') url = urlreverse("ietf.doc.views_search.search")
# find by RFC # find by RFC
r = self.client.get(base_url + f"?rfcs=on&name={rfc.name}") r = self.client.post(url, {"rfcs": "on", "name": rfc.name})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title) self.assertContains(r, rfc.title)
# find by draft # find by draft
r = self.client.get(base_url + f"?activedrafts=on&rfcs=on&name={draft.name}") r = self.client.post(url, {"activedrafts": "on", "rfcs": "on", "name": draft.name})
self.assertEqual(r.status_code, 200) self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title) self.assertContains(r, rfc.title)

View file

@ -92,10 +92,8 @@ class EditAuthorsTests(IetfSeleniumTestCase):
self.assertEqual(len(author_forms), 1) self.assertEqual(len(author_forms), 1)
# get the "add author" button so we can add blank author forms # get the "add author" button so we can add blank author forms
add_author_button = self.driver.find_element(By.ID, 'add-author-button')
for index, auth in enumerate(authors): for index, auth in enumerate(authors):
self.scroll_to_element(add_author_button) # Can only click if it's in view! self.scroll_and_click((By.ID, 'add-author-button')) # Create new form. Automatically scrolls to it.
add_author_button.click() # Create a new form. Automatically scrolls to it.
author_forms = authors_list.find_elements(By.CLASS_NAME, 'author-panel') author_forms = authors_list.find_elements(By.CLASS_NAME, 'author-panel')
authors_added = index + 1 authors_added = index + 1
self.assertEqual(len(author_forms), authors_added + 1) # Started with 1 author, hence +1 self.assertEqual(len(author_forms), authors_added + 1) # Started with 1 author, hence +1

View file

@ -3,9 +3,7 @@
import datetime import datetime
import hashlib
import io import io
import json
import math import math
import os import os
import re import re
@ -348,6 +346,7 @@ def augment_events_with_revision(doc, events):
"""Take a set of events for doc and add a .rev attribute with the """Take a set of events for doc and add a .rev attribute with the
revision they refer to by checking NewRevisionDocEvents.""" revision they refer to by checking NewRevisionDocEvents."""
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(events, QuerySetAny): if isinstance(events, QuerySetAny):
qs = events.filter(newrevisiondocevent__isnull=False) qs = events.filter(newrevisiondocevent__isnull=False)
else: else:
@ -1047,12 +1046,8 @@ def make_rev_history(doc):
return sorted(history, key=lambda x: x['published']) return sorted(history, key=lambda x: x['published'])
def get_search_cache_key(params): def get_search_cache_key(key_fragment):
from ietf.doc.views_search import SearchForm return f"doc:document:search:{key_fragment}"
fields = set(SearchForm.base_fields) - set(['sort',])
kwargs = dict([ (k,v) for (k,v) in list(params.items()) if k in fields ])
key = "doc:document:search:" + hashlib.sha512(json.dumps(kwargs, sort_keys=True).encode('utf-8')).hexdigest()
return key
def build_file_urls(doc: Union[Document, DocHistory]): def build_file_urls(doc: Union[Document, DocHistory]):

View file

@ -37,6 +37,8 @@
import re import re
import datetime import datetime
import copy import copy
import hashlib
import json
import operator import operator
from collections import defaultdict from collections import defaultdict
@ -44,16 +46,17 @@ from functools import reduce
from django import forms from django import forms
from django.conf import settings from django.conf import settings
from django.contrib import messages
from django.core.cache import cache, caches from django.core.cache import cache, caches
from django.urls import reverse as urlreverse from django.urls import reverse as urlreverse
from django.db.models import Q from django.db.models import Q
from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect, QueryDict from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect
from django.shortcuts import render from django.shortcuts import render
from django.utils import timezone from django.utils import timezone
from django.utils.html import strip_tags from django.utils.html import strip_tags
from django.utils.cache import _generate_cache_key # type: ignore from django.utils.cache import _generate_cache_key # type: ignore
from django.utils.text import slugify from django.utils.text import slugify
from django_stubs_ext import QuerySetAny
import debug # pyflakes:ignore import debug # pyflakes:ignore
@ -145,6 +148,29 @@ class SearchForm(forms.Form):
q['irtfstate'] = None q['irtfstate'] = None
return q return q
def cache_key_fragment(self):
"""Hash a bound form to get a value for use in a cache key
Raises a ValueError if the form is not valid.
"""
def _serialize_value(val):
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(val, QuerySetAny):
return [item.pk for item in val]
else:
return getattr(val, "pk", val) # use pk if present, else value
if not self.is_valid():
raise ValueError(f"SearchForm invalid: {self.errors}")
contents = {
field_name: _serialize_value(field_value)
for field_name, field_value in self.cleaned_data.items()
if field_name != "sort" and field_value is not None
}
contents_json = json.dumps(contents, sort_keys=True)
return hashlib.sha512(contents_json.encode("utf-8")).hexdigest()
def retrieve_search_results(form, all_types=False): def retrieve_search_results(form, all_types=False):
"""Takes a validated SearchForm and return the results.""" """Takes a validated SearchForm and return the results."""
@ -256,45 +282,64 @@ def retrieve_search_results(form, all_types=False):
return docs return docs
def search(request): def search(request):
if request.GET: """Search for a draft"""
# backwards compatibility # defaults for results / meta
get_params = request.GET.copy() results = []
if 'activeDrafts' in request.GET: meta = {"by": None, "searching": False}
get_params['activedrafts'] = request.GET['activeDrafts']
if 'oldDrafts' in request.GET:
get_params['olddrafts'] = request.GET['oldDrafts']
if 'subState' in request.GET:
get_params['substate'] = request.GET['subState']
form = SearchForm(get_params) if request.method == "POST":
if not form.is_valid(): form = SearchForm(data=request.POST)
return HttpResponseBadRequest("form not valid: %s" % form.errors) if form.is_valid():
cache_key = get_search_cache_key(form.cache_key_fragment())
cache_key = get_search_cache_key(get_params)
cached_val = cache.get(cache_key) cached_val = cache.get(cache_key)
if cached_val: if cached_val:
[results, meta] = cached_val [results, meta] = cached_val
else: else:
results = retrieve_search_results(form) results = retrieve_search_results(form)
results, meta = prepare_document_table(request, results, get_params) results, meta = prepare_document_table(
cache.set(cache_key, [results, meta]) # for settings.CACHE_MIDDLEWARE_SECONDS request, results, form.cleaned_data
log(f"Search results computed for {get_params}") )
meta['searching'] = True cache.set(
cache_key, [results, meta]
) # for settings.CACHE_MIDDLEWARE_SECONDS
log(f"Search results computed for {form.cleaned_data}")
meta["searching"] = True
else:
if request.GET:
# backwards compatibility - fill in the form
get_params = request.GET.copy()
if "activeDrafts" in request.GET:
get_params["activedrafts"] = request.GET["activeDrafts"]
if "oldDrafts" in request.GET:
get_params["olddrafts"] = request.GET["oldDrafts"]
if "subState" in request.GET:
get_params["substate"] = request.GET["subState"]
form = SearchForm(data=get_params)
messages.error(
request,
(
"Searching via the URL query string is no longer supported. "
"The form below has been filled in with the parameters from your request. "
'To execute your search, please click "Search".'
),
)
else: else:
form = SearchForm() form = SearchForm()
results = []
meta = { 'by': None, 'searching': False }
get_params = QueryDict('')
return render(request, 'doc/search/search.html', { return render(
'form':form, 'docs':results, 'meta':meta, 'queryargs':get_params.urlencode() }, request,
"doc/search/search.html",
context={"form": form, "docs": results, "meta": meta},
) )
def frontpage(request): def frontpage(request):
form = SearchForm() form = SearchForm()
return render(request, 'doc/frontpage.html', {'form':form}) return render(request, 'doc/frontpage.html', {'form':form})
def search_for_name(request, name): def search_for_name(request, name):
def find_unique(n): def find_unique(n):
exact = Document.objects.filter(name__iexact=n).first() exact = Document.objects.filter(name__iexact=n).first()

View file

@ -203,6 +203,7 @@ class SearchLiaisonForm(forms.Form):
class CustomModelMultipleChoiceField(ModelMultipleChoiceField): class CustomModelMultipleChoiceField(ModelMultipleChoiceField):
'''If value is a QuerySet, return it as is (for use in widget.render)''' '''If value is a QuerySet, return it as is (for use in widget.render)'''
def prepare_value(self, value): def prepare_value(self, value):
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(value, QuerySetAny): if isinstance(value, QuerySetAny):
return value return value
if (hasattr(value, '__iter__') and if (hasattr(value, '__iter__') and

View file

@ -35,6 +35,7 @@ class ShowAttachmentsWidget(Widget):
html = '<div id="id_%s">' % name html = '<div id="id_%s">' % name
html += '<span class="d-none showAttachmentsEmpty form-control widget">No files attached</span>' html += '<span class="d-none showAttachmentsEmpty form-control widget">No files attached</span>'
html += '<div class="attachedFiles form-control widget">' html += '<div class="attachedFiles form-control widget">'
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if value and isinstance(value, QuerySetAny): if value and isinstance(value, QuerySetAny):
for attachment in value: for attachment in value:
html += '<a class="initialAttach" href="%s">%s</a>&nbsp' % (conditional_escape(attachment.document.get_href()), conditional_escape(attachment.document.title)) html += '<a class="initialAttach" href="%s">%s</a>&nbsp' % (conditional_escape(attachment.document.get_href()), conditional_escape(attachment.document.title))

View file

@ -4,8 +4,10 @@
{% load widget_tweaks %} {% load widget_tweaks %}
{% load ietf_filters %} {% load ietf_filters %}
<form id="search_form" <form id="search_form"
method="post"
class="form-horizontal" class="form-horizontal"
action="{% url 'ietf.doc.views_search.search' %}"> action="{% url 'ietf.doc.views_search.search' %}">
{% csrf_token %}
<!-- [html-validate-disable-block input-missing-label -- labelled via aria-label] --> <!-- [html-validate-disable-block input-missing-label -- labelled via aria-label] -->
<div class="input-group search_field"> <div class="input-group search_field">
{{ form.name|add_class:"form-control"|attr:"placeholder:Document name/title/RFC number"|attr:"aria-label:Document name/title/RFC number" }} {{ form.name|add_class:"form-control"|attr:"placeholder:Document name/title/RFC number"|attr:"aria-label:Document name/title/RFC number" }}