feat: POST for document search requests (#8206)

* refactor: doc search via POST (WIP)

Changes the search view to use a POST instead of a GET. Refactors cache key computation to use cleaned data.

Still todo:
 * refactor frontpage view to match
 * refactor menubar search (?)
 * refactor stats view that uses SearchForm
 * revive or drop the "backwards compatibility" branch

* feat: convert GET search to POST

Still todo:
 * refactor frontpage view to match
 * refactor menubar search (?)
 * refactor stats view that uses SearchForm

* fix: revert frontpage changes, search works

Still todo:
 * refactor stats view that uses SearchForm

* fix: define vars in all branches

* refactor: update stats use of SearchForm

* chore: improve message

* fix: remove lint

* chore: comments re: QuerySetAny

* test: test query string search params

* style: Black

* test: refactor test_search()

* test: refactor test_search_became_rfc()

* test: use scroll_and_click helper
This commit is contained in:
Jennifer Richards 2024-11-14 18:05:38 -04:00 committed by GitHub
parent 901fdd8d44
commit b65a37b6e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 196 additions and 86 deletions

View file

@ -146,6 +146,7 @@ class AdminJsonSerializer(Serializer):
field_value = None
else:
field_value = field
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(field_value, QuerySetAny) or isinstance(field_value, list):
self._current[name] = dict([ (rel.pk, self.expand_related(rel, name)) for rel in field_value ])
else:

View file

@ -71,96 +71,163 @@ from ietf.doc.utils_search import AD_WORKLOAD
class SearchTests(TestCase):
def test_search(self):
def test_search_handles_querystring_parameters(self):
"""Search parameters via querystring should not actually search"""
url = urlreverse("ietf.doc.views_search.search")
r = self.client.get(url + "?name=some-document-name&oldDrafts=on")
# Check that we got a valid response and that the warning about query string parameters is shown.
self.assertContains(
r,
"Searching via the URL query string is no longer supported.",
status_code=200,
)
# Check that the form was filled in correctly (not an exhaustive check, but different from the
# form defaults)
pq = PyQuery(r.content)
self.assertEqual(
pq("form#search_form input#id_name").attr("value"),
"some-document-name",
"The name field should be set in the SearchForm",
)
self.assertEqual(
pq("form#search_form input#id_olddrafts").attr("checked"),
"checked",
"The old drafts checkbox should be selected in the SearchForm",
)
self.assertIsNone(
pq("form#search_form input#id_rfcs").attr("checked"),
"The RFCs checkbox should not be selected in the SearchForm",
)
self.assertIsNone(
pq("form#search_form input#id_activedrafts").attr("checked"),
"The active drafts checkbox should not be selected in the SearchForm",
)
draft = WgDraftFactory(name='draft-ietf-mars-test',group=GroupFactory(acronym='mars',parent=Group.objects.get(acronym='farfut')),authors=[PersonFactory()],ad=PersonFactory())
def test_search(self):
draft = WgDraftFactory(
name="draft-ietf-mars-test",
group=GroupFactory(acronym="mars", parent=Group.objects.get(acronym="farfut")),
authors=[PersonFactory()],
ad=PersonFactory(),
)
rfc = WgRfcFactory()
draft.set_state(State.objects.get(used=True, type="draft-iesg", slug="pub-req"))
old_draft = IndividualDraftFactory(name='draft-foo-mars-test',authors=[PersonFactory()],title="Optimizing Martian Network Topologies")
old_draft = IndividualDraftFactory(
name="draft-foo-mars-test",
authors=[PersonFactory()],
title="Optimizing Martian Network Topologies",
)
old_draft.set_state(State.objects.get(used=True, type="draft", slug="expired"))
base_url = urlreverse('ietf.doc.views_search.search')
url = urlreverse("ietf.doc.views_search.search")
# only show form, no search yet
r = self.client.get(base_url)
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
# no match
r = self.client.get(base_url + "?activedrafts=on&name=thisisnotadocumentname")
r = self.client.post(url, {"activedrafts": "on", "name": "thisisnotadocumentname"})
self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?rfcs=on&name=xyzzy")
r = self.client.post(url, {"rfcs": "on", "name": "xyzzy"})
self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?olddrafts=on&name=bar")
r = self.client.post(url, {"olddrafts": "on", "name": "bar"})
self.assertEqual(r.status_code, 200)
self.assertContains(r, "No documents match")
r = self.client.get(base_url + "?olddrafts=on&name=foo")
r = self.client.post(url, {"olddrafts": "on", "name": "foo"})
self.assertEqual(r.status_code, 200)
self.assertContains(r, "draft-foo-mars-test")
r = self.client.get(base_url + "?olddrafts=on&name=FoO") # mixed case
r = self.client.post(url, {"olddrafts": "on", "name": "FoO"}) # mixed case
self.assertEqual(r.status_code, 200)
self.assertContains(r, "draft-foo-mars-test")
# find by RFC
r = self.client.get(base_url + "?rfcs=on&name=%s" % rfc.name)
r = self.client.post(url, {"rfcs": "on", "name": rfc.name})
self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title)
# find by active/inactive
draft.set_state(State.objects.get(type="draft", slug="active"))
r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.name)
r = self.client.post(url, {"activedrafts": "on", "name": draft.name})
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
draft.set_state(State.objects.get(type="draft", slug="expired"))
r = self.client.get(base_url + "?olddrafts=on&name=%s" % draft.name)
r = self.client.post(url, {"olddrafts": "on", "name": draft.name})
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
draft.set_state(State.objects.get(type="draft", slug="active"))
# find by title
r = self.client.get(base_url + "?activedrafts=on&name=%s" % draft.title.split()[0])
r = self.client.post(url, {"activedrafts": "on", "name": draft.title.split()[0]})
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by author
r = self.client.get(base_url + "?activedrafts=on&by=author&author=%s" % draft.documentauthor_set.first().person.name_parts()[1])
r = self.client.post(
url,
{
"activedrafts": "on",
"by": "author",
"author": draft.documentauthor_set.first().person.name_parts()[1],
},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by group
r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym)
r = self.client.post(
url,
{"activedrafts": "on", "by": "group", "group": draft.group.acronym},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
r = self.client.get(base_url + "?activedrafts=on&by=group&group=%s" % draft.group.acronym.swapcase())
r = self.client.post(
url,
{"activedrafts": "on", "by": "group", "group": draft.group.acronym.swapcase()},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by area
r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id)
r = self.client.post(
url,
{"activedrafts": "on", "by": "area", "area": draft.group.parent_id},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by area
r = self.client.get(base_url + "?activedrafts=on&by=area&area=%s" % draft.group.parent_id)
r = self.client.post(
url,
{"activedrafts": "on", "by": "area", "area": draft.group.parent_id},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by AD
r = self.client.get(base_url + "?activedrafts=on&by=ad&ad=%s" % draft.ad_id)
r = self.client.post(url, {"activedrafts": "on", "by": "ad", "ad": draft.ad_id})
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
# find by IESG state
r = self.client.get(base_url + "?activedrafts=on&by=state&state=%s&substate=" % draft.get_state("draft-iesg").pk)
r = self.client.post(
url,
{
"activedrafts": "on",
"by": "state",
"state": draft.get_state("draft-iesg").pk,
"substate": "",
},
)
self.assertEqual(r.status_code, 200)
self.assertContains(r, draft.title)
@ -169,15 +236,15 @@ class SearchTests(TestCase):
rfc = WgRfcFactory()
draft.set_state(State.objects.get(type="draft", slug="rfc"))
draft.relateddocument_set.create(relationship_id="became_rfc", target=rfc)
base_url = urlreverse('ietf.doc.views_search.search')
url = urlreverse("ietf.doc.views_search.search")
# find by RFC
r = self.client.get(base_url + f"?rfcs=on&name={rfc.name}")
r = self.client.post(url, {"rfcs": "on", "name": rfc.name})
self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title)
# find by draft
r = self.client.get(base_url + f"?activedrafts=on&rfcs=on&name={draft.name}")
r = self.client.post(url, {"activedrafts": "on", "rfcs": "on", "name": draft.name})
self.assertEqual(r.status_code, 200)
self.assertContains(r, rfc.title)

View file

@ -92,10 +92,8 @@ class EditAuthorsTests(IetfSeleniumTestCase):
self.assertEqual(len(author_forms), 1)
# get the "add author" button so we can add blank author forms
add_author_button = self.driver.find_element(By.ID, 'add-author-button')
for index, auth in enumerate(authors):
self.scroll_to_element(add_author_button) # Can only click if it's in view!
add_author_button.click() # Create a new form. Automatically scrolls to it.
self.scroll_and_click((By.ID, 'add-author-button')) # Create new form. Automatically scrolls to it.
author_forms = authors_list.find_elements(By.CLASS_NAME, 'author-panel')
authors_added = index + 1
self.assertEqual(len(author_forms), authors_added + 1) # Started with 1 author, hence +1

View file

@ -3,9 +3,7 @@
import datetime
import hashlib
import io
import json
import math
import os
import re
@ -348,6 +346,7 @@ def augment_events_with_revision(doc, events):
"""Take a set of events for doc and add a .rev attribute with the
revision they refer to by checking NewRevisionDocEvents."""
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(events, QuerySetAny):
qs = events.filter(newrevisiondocevent__isnull=False)
else:
@ -1047,12 +1046,8 @@ def make_rev_history(doc):
return sorted(history, key=lambda x: x['published'])
def get_search_cache_key(params):
from ietf.doc.views_search import SearchForm
fields = set(SearchForm.base_fields) - set(['sort',])
kwargs = dict([ (k,v) for (k,v) in list(params.items()) if k in fields ])
key = "doc:document:search:" + hashlib.sha512(json.dumps(kwargs, sort_keys=True).encode('utf-8')).hexdigest()
return key
def get_search_cache_key(key_fragment):
return f"doc:document:search:{key_fragment}"
def build_file_urls(doc: Union[Document, DocHistory]):

View file

@ -37,6 +37,8 @@
import re
import datetime
import copy
import hashlib
import json
import operator
from collections import defaultdict
@ -44,16 +46,17 @@ from functools import reduce
from django import forms
from django.conf import settings
from django.contrib import messages
from django.core.cache import cache, caches
from django.urls import reverse as urlreverse
from django.db.models import Q
from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect, QueryDict
from django.http import Http404, HttpResponseBadRequest, HttpResponse, HttpResponseRedirect
from django.shortcuts import render
from django.utils import timezone
from django.utils.html import strip_tags
from django.utils.cache import _generate_cache_key # type: ignore
from django.utils.text import slugify
from django_stubs_ext import QuerySetAny
import debug # pyflakes:ignore
@ -145,6 +148,29 @@ class SearchForm(forms.Form):
q['irtfstate'] = None
return q
def cache_key_fragment(self):
"""Hash a bound form to get a value for use in a cache key
Raises a ValueError if the form is not valid.
"""
def _serialize_value(val):
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(val, QuerySetAny):
return [item.pk for item in val]
else:
return getattr(val, "pk", val) # use pk if present, else value
if not self.is_valid():
raise ValueError(f"SearchForm invalid: {self.errors}")
contents = {
field_name: _serialize_value(field_value)
for field_name, field_value in self.cleaned_data.items()
if field_name != "sort" and field_value is not None
}
contents_json = json.dumps(contents, sort_keys=True)
return hashlib.sha512(contents_json.encode("utf-8")).hexdigest()
def retrieve_search_results(form, all_types=False):
"""Takes a validated SearchForm and return the results."""
@ -256,45 +282,64 @@ def retrieve_search_results(form, all_types=False):
return docs
def search(request):
if request.GET:
# backwards compatibility
get_params = request.GET.copy()
if 'activeDrafts' in request.GET:
get_params['activedrafts'] = request.GET['activeDrafts']
if 'oldDrafts' in request.GET:
get_params['olddrafts'] = request.GET['oldDrafts']
if 'subState' in request.GET:
get_params['substate'] = request.GET['subState']
"""Search for a draft"""
# defaults for results / meta
results = []
meta = {"by": None, "searching": False}
form = SearchForm(get_params)
if not form.is_valid():
return HttpResponseBadRequest("form not valid: %s" % form.errors)
cache_key = get_search_cache_key(get_params)
cached_val = cache.get(cache_key)
if cached_val:
[results, meta] = cached_val
else:
results = retrieve_search_results(form)
results, meta = prepare_document_table(request, results, get_params)
cache.set(cache_key, [results, meta]) # for settings.CACHE_MIDDLEWARE_SECONDS
log(f"Search results computed for {get_params}")
meta['searching'] = True
if request.method == "POST":
form = SearchForm(data=request.POST)
if form.is_valid():
cache_key = get_search_cache_key(form.cache_key_fragment())
cached_val = cache.get(cache_key)
if cached_val:
[results, meta] = cached_val
else:
results = retrieve_search_results(form)
results, meta = prepare_document_table(
request, results, form.cleaned_data
)
cache.set(
cache_key, [results, meta]
) # for settings.CACHE_MIDDLEWARE_SECONDS
log(f"Search results computed for {form.cleaned_data}")
meta["searching"] = True
else:
form = SearchForm()
results = []
meta = { 'by': None, 'searching': False }
get_params = QueryDict('')
if request.GET:
# backwards compatibility - fill in the form
get_params = request.GET.copy()
if "activeDrafts" in request.GET:
get_params["activedrafts"] = request.GET["activeDrafts"]
if "oldDrafts" in request.GET:
get_params["olddrafts"] = request.GET["oldDrafts"]
if "subState" in request.GET:
get_params["substate"] = request.GET["subState"]
form = SearchForm(data=get_params)
messages.error(
request,
(
"Searching via the URL query string is no longer supported. "
"The form below has been filled in with the parameters from your request. "
'To execute your search, please click "Search".'
),
)
else:
form = SearchForm()
return render(request, 'doc/search/search.html', {
'form':form, 'docs':results, 'meta':meta, 'queryargs':get_params.urlencode() },
return render(
request,
"doc/search/search.html",
context={"form": form, "docs": results, "meta": meta},
)
def frontpage(request):
form = SearchForm()
return render(request, 'doc/frontpage.html', {'form':form})
def search_for_name(request, name):
def find_unique(n):
exact = Document.objects.filter(name__iexact=n).first()

View file

@ -203,6 +203,7 @@ class SearchLiaisonForm(forms.Form):
class CustomModelMultipleChoiceField(ModelMultipleChoiceField):
'''If value is a QuerySet, return it as is (for use in widget.render)'''
def prepare_value(self, value):
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if isinstance(value, QuerySetAny):
return value
if (hasattr(value, '__iter__') and

View file

@ -35,6 +35,7 @@ class ShowAttachmentsWidget(Widget):
html = '<div id="id_%s">' % name
html += '<span class="d-none showAttachmentsEmpty form-control widget">No files attached</span>'
html += '<div class="attachedFiles form-control widget">'
# Need QuerySetAny instead of QuerySet until django-stubs 5.0.1
if value and isinstance(value, QuerySetAny):
for attachment in value:
html += '<a class="initialAttach" href="%s">%s</a>&nbsp' % (conditional_escape(attachment.document.get_href()), conditional_escape(attachment.document.title))

View file

@ -4,8 +4,10 @@
{% load widget_tweaks %}
{% load ietf_filters %}
<form id="search_form"
method="post"
class="form-horizontal"
action="{% url 'ietf.doc.views_search.search' %}">
{% csrf_token %}
<!-- [html-validate-disable-block input-missing-label -- labelled via aria-label] -->
<div class="input-group search_field">
{{ form.name|add_class:"form-control"|attr:"placeholder:Document name/title/RFC number"|attr:"aria-label:Document name/title/RFC number" }}