feat: async investigate_fragment task; celery results backend (#8428)

* feat: investigate docs asynchronously

* refactor: move script to its own js file

* fix: adjust polling interval/duration

* test: test new task

* fix: extra tag/fix whitespace

* style: restore whitespace (I hope)

* style: black/standard styling

* test: fix test of investigate view

* test: improve/delint tests
This commit is contained in:
Jennifer Richards 2025-01-17 11:16:15 -04:00 committed by GitHub
parent df27ba9934
commit c848a5a00b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 386 additions and 124 deletions

View file

@ -276,6 +276,7 @@ class InvestigateForm(forms.Form):
),
min_length=8,
)
task_id = forms.CharField(required=False, widget=forms.HiddenInput)
def clean_name_fragment(self):
disallowed_characters = ["%", "/", "\\", "*"]

View file

@ -31,6 +31,7 @@ from .utils import (
generate_idnits2_rfcs_obsoleted,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
)
@ -119,3 +120,11 @@ def generate_draft_bibxml_files_task(days=7, process_all=False):
update_or_create_draft_bibxml_file(event.doc, event.rev)
except Exception as err:
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
@shared_task(ignore_result=False)
def investigate_fragment_task(name_fragment: str):
return {
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}

View file

@ -3280,7 +3280,8 @@ class InvestigateTests(TestCase):
"draft-this-should-not-be-possible-00.txt",
)
def test_investigate(self):
def test_investigate_get(self):
"""GET with no querystring should retrieve the investigate UI"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
r = self.client.get(url)
@ -3288,36 +3289,143 @@ class InvestigateTests(TestCase):
q = PyQuery(r.content)
self.assertEqual(len(q("form#investigate")), 1)
self.assertEqual(len(q("div#results")), 0)
r = self.client.post(url, dict(name_fragment="this-is-not-found"))
@mock.patch("ietf.doc.views_doc.AsyncResult")
def test_investgate_get_task_id(self, mock_asyncresult):
"""GET with querystring should lookup task status"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
mock_asyncresult.return_value.ready.return_value = True
r = self.client.get(url + "?id=a-task-id")
self.assertEqual(r.status_code, 200)
self.assertEqual(r.json(), {"status": "ready"})
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
mock_asyncresult.return_value.ready.return_value = False
r = self.client.get(url + "?id=a-task-id")
self.assertEqual(r.status_code, 200)
self.assertEqual(r.json(), {"status": "notready"})
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
@mock.patch("ietf.doc.views_doc.investigate_fragment_task")
def test_investigate_post(self, mock_investigate_fragment_task):
"""POST with a name_fragment and no task_id should start a celery task"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
# test some invalid cases
r = self.client.post(url, {"name_fragment": "short"}) # limit is >= 8 characters
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
self.assertFalse(mock_investigate_fragment_task.delay.called)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, {"name_fragment": f"bad{char}character"})
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
self.assertFalse(mock_investigate_fragment_task.delay.called)
# now a valid one
mock_investigate_fragment_task.delay.return_value.id = "a-task-id"
r = self.client.post(url, {"name_fragment": "this-is-a-valid-fragment"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_investigate_fragment_task.delay.called)
self.assertEqual(mock_investigate_fragment_task.delay.call_args, mock.call("this-is-a-valid-fragment"))
self.assertEqual(r.json(), {"id": "a-task-id"})
@mock.patch("ietf.doc.views_doc.AsyncResult")
def test_investigate_post_task_id(self, mock_asyncresult):
"""POST with name_fragment and task_id should retrieve results"""
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
# First, test a non-successful result - this could be a failure or non-existent task id
mock_result = mock_asyncresult.return_value
mock_result.successful.return_value = False
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertContains(r, "The investigation task failed.", status_code=200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
self.assertFalse(mock_result.get.called)
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "some-fragment")
self.assertEqual(q("#id_task_id").val(), "a-task-id")
# now the various successful result mixes
mock_result = mock_asyncresult.return_value
mock_result.successful.return_value = True
mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": set(),
"unverifiable_collections": set(),
"unexpected": set(),
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="mixed-provenance"))
# This file was created in setUp. It allows the view to render properly
# but its location / content don't matter for this test otherwise.
a_file_that_exists = Path(settings.INTERNET_DRAFT_PATH) / "draft-this-is-active-00.txt"
mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": {a_file_that_exists},
"unverifiable_collections": {a_file_that_exists},
"unexpected": set(),
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 1)
self.assertEqual(len(q("table#unverifiable")), 1)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="not-be-possible"))
mock_result.get.return_value = {
"name_fragment": "different-fragment",
"results": {
"can_verify": set(),
"unverifiable_collections": set(),
"unexpected": {a_file_that_exists},
}
}
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
self.assertEqual(r.status_code, 200)
self.assertTrue(mock_asyncresult.called)
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
mock_asyncresult.reset_mock()
q = PyQuery(r.content)
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 1)
r = self.client.post(url, dict(name_fragment="short"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
class LogIOErrorTests(TestCase):

View file

@ -20,6 +20,7 @@ from .tasks import (
generate_draft_bibxml_files_task,
generate_idnits2_rfcs_obsoleted_task,
generate_idnits2_rfc_status_task,
investigate_fragment_task,
notify_expirations_task,
)
@ -98,6 +99,18 @@ class TaskTests(TestCase):
self.assertEqual(mock_expire.call_args_list[1], mock.call(docs[1]))
self.assertEqual(mock_expire.call_args_list[2], mock.call(docs[2]))
def test_investigate_fragment_task(self):
investigation_results = object() # singleton
with mock.patch(
"ietf.doc.tasks.investigate_fragment", return_value=investigation_results
) as mock_inv:
retval = investigate_fragment_task("some fragment")
self.assertTrue(mock_inv.called)
self.assertEqual(mock_inv.call_args, mock.call("some fragment"))
self.assertEqual(
retval, {"name_fragment": "some fragment", "results": investigation_results}
)
class Idnits2SupportTests(TestCase):
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ['DERIVED_DIR']

View file

@ -41,10 +41,11 @@ import re
from pathlib import Path
from celery.result import AsyncResult
from django.core.cache import caches
from django.core.exceptions import PermissionDenied
from django.db.models import Max
from django.http import HttpResponse, Http404, HttpResponseBadRequest
from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.template.loader import render_to_string
from django.urls import reverse as urlreverse
@ -59,8 +60,9 @@ from ietf.doc.models import ( Document, DocHistory, DocEvent, BallotDocEvent, Ba
ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent,
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
RelatedDocument, RelatedDocHistory)
from ietf.doc.tasks import investigate_fragment_task
from ietf.doc.utils import (augment_events_with_revision,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
add_events_message_info, get_unicode_document_content,
@ -2275,16 +2277,67 @@ def idnits2_state(request, name, rev=None):
content_type="text/plain;charset=utf-8",
)
@role_required("Secretariat")
def investigate(request):
"""Investigate a fragment
A plain GET with no querystring returns the UI page.
POST with the task_id field empty starts an async task and returns a JSON response with
the ID needed to monitor the task for results.
GET with a querystring parameter "id" will poll the status of the async task and return "ready"
or "notready".
POST with the task_id field set to the id of a "ready" task will return its results or an error
if the task failed or the id is invalid (expired, never exited, etc).
"""
results = None
# Start an investigation or retrieve a result on a POST
if request.method == "POST":
form = InvestigateForm(request.POST)
if form.is_valid():
name_fragment = form.cleaned_data["name_fragment"]
results = investigate_fragment(name_fragment)
task_id = form.cleaned_data["task_id"]
if task_id:
# Ignore the rest of the form and retrieve the result
task_result = AsyncResult(task_id)
if task_result.successful():
retval = task_result.get()
results = retval["results"]
form.data = form.data.copy()
form.data["name_fragment"] = retval[
"name_fragment"
] # ensure consistency
del form.data["task_id"] # do not request the task result again
else:
form = InvestigateForm()
return render(
request, "doc/investigate.html", context=dict(form=form, results=results)
form.add_error(
None,
"The investigation task failed. Please try again and ask for help if this recurs.",
)
# Falls through to the render at the end!
else:
name_fragment = form.cleaned_data["name_fragment"]
task_result = investigate_fragment_task.delay(name_fragment)
return JsonResponse({"id": task_result.id})
else:
task_id = request.GET.get("id", None)
if task_id is not None:
# Check status if we got the "id" parameter
task_result = AsyncResult(task_id)
return JsonResponse(
{"status": "ready" if task_result.ready() else "notready"}
)
else:
# Serve up an empty form
form = InvestigateForm()
# If we get here, it is just a plain GET - serve the UI
return render(
request,
"doc/investigate.html",
context={
"form": form,
"results": results,
},
)

View file

@ -452,6 +452,7 @@ INSTALLED_APPS = [
'django_vite',
'django_bootstrap5',
'django_celery_beat',
'django_celery_results',
'corsheaders',
'django_markup',
'oidc_provider',
@ -1226,7 +1227,9 @@ CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True # the default, but setting it
# https://docs.celeryq.dev/en/stable/userguide/tasks.html#rpc-result-backend-rabbitmq-qpid
# Results can be retrieved only once and only by the caller of the task. Results will be
# lost if the message broker restarts.
CELERY_RESULT_BACKEND = 'rpc://' # sends a msg via the msg broker
CELERY_RESULT_BACKEND = 'django-cache' # use a Django cache for results
CELERY_CACHE_BACKEND = 'celery-results' # which Django cache to use
CELERY_RESULT_EXPIRES = datetime.timedelta(minutes=5) # how long are results valid? (Default is 1 day)
CELERY_TASK_IGNORE_RESULT = True # ignore results unless specifically enabled for a task
# Meetecho API setup: Uncomment this and provide real credentials to enable
@ -1309,6 +1312,11 @@ if "CACHES" not in locals():
"MAX_ENTRIES": 5000,
},
},
"celery-results": {
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
"LOCATION": f"{MEMCACHED_HOST}:{MEMCACHED_PORT}",
"KEY_PREFIX": "ietf:celery",
},
}
else:
CACHES = {
@ -1347,6 +1355,11 @@ if "CACHES" not in locals():
"MAX_ENTRIES": 5000,
},
},
"celery-results": {
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
"LOCATION": "app:11211",
"KEY_PREFIX": "ietf:celery",
},
}
PUBLISH_IPR_STATES = ['posted', 'removed', 'removed_objfalse']

View file

@ -0,0 +1,53 @@
// Copyright The IETF Trust 2025, All Rights Reserved
document.addEventListener('DOMContentLoaded', () => {
const investigateForm = document.forms['investigate']
investigateForm.addEventListener('submit', (event) => {
// Intercept submission unless we've filled in the task_id field
if (!investigateForm.elements['id_task_id'].value) {
event.preventDefault()
runInvestigation()
}
})
const runInvestigation = async () => {
// Submit the request
const response = await fetch('', {
method: investigateForm.method, body: new FormData(investigateForm)
})
if (!response.ok) {
loadResultsFromTask('bogus-task-id') // bad task id will generate an error from Django
}
const taskId = (await response.json()).id
// Poll for completion of the investigation up to 18*10 = 180 seconds
waitForResults(taskId, 18)
}
const waitForResults = async (taskId, retries) => {
// indicate that investigation is in progress
document.getElementById('spinner').classList.remove('d-none')
document.getElementById('investigate-button').disabled = true
investigateForm.elements['id_name_fragment'].disabled = true
const response = await fetch('?' + new URLSearchParams({ id: taskId }))
if (!response.ok) {
loadResultsFromTask('bogus-task-id') // bad task id will generate an error from Django
}
const result = await response.json()
if (result.status !== 'ready' && retries > 0) {
// 10 seconds per retry
setTimeout(waitForResults, 10000, taskId, retries - 1)
} else {
/* Either the response is ready or we timed out waiting. In either case, submit
the task_id via POST and let Django display an error if it's not ready. Before
submitting, re-enable the form fields so the POST is valid. Other in-progress
indicators will be reset when the POST response is loaded. */
loadResultsFromTask(taskId)
}
}
const loadResultsFromTask = (taskId) => {
investigateForm.elements['id_name_fragment'].disabled = false
investigateForm.elements['id_task_id'].value = taskId
investigateForm.submit()
}
})

View file

@ -8,11 +8,20 @@
{% block content %}
{% origin %}
<h1>Investigate</h1>
<div class="mb-3">
<form id="investigate" method="post">
{% csrf_token %}
{% bootstrap_form form %}
<button class="btn btn-primary" type="submit">Investigate</button>
<button class="btn btn-primary" type="submit" id="investigate-button">
<span id="spinner"
class="spinner-border spinner-border-sm d-none"
role="status"
aria-hidden="true">
</span>
Investigate
</button>
</form>
</div>
{% if results %}
<div id="results">
{% if results.can_verify %}
@ -30,16 +39,16 @@
{% for path in results.can_verify %}
{% with url=path|url_for_path %}
<tr>
<td>{{path.name}}</td>
<td>{{ path.name }}</td>
<td>
{% if path|mtime_is_epoch %}
Timestamp has been lost (is Unix Epoch)
{% else %}
{{path|mtime|date:"DATETIME_FORMAT"}}
{{ path|mtime|date:"DATETIME_FORMAT" }}
{% endif %}
</td>
<td><a href="{{url}}">{{url}}</a></td>
<td>{{path}}</td>
<td><a href="{{ url }}">{{ url }}</a></td>
<td>{{ path }}</td>
</tr>
{% endwith %}
{% endfor %}
@ -64,16 +73,16 @@
{% for path in results.unverifiable_collections %}
{% with url=path|url_for_path %}
<tr>
<td>{{path.name}}</td>
<td>{{ path.name }}</td>
<td>
{% if path|mtime_is_epoch %}
Timestamp has been lost (is Unix Epoch)
{% else %}
{{path|mtime|date:"DATETIME_FORMAT"}}
{{ path|mtime|date:"DATETIME_FORMAT" }}
{% endif %}
</td>
<td><a href="{{url}}">{{url}}</a></td>
<td>{{path}}</td>
<td><a href="{{ url }}">{{ url }}</a></td>
<td>{{ path }}</td>
</tr>
{% endwith %}
{% endfor %}
@ -94,15 +103,15 @@
{% for path in results.unexpected %}
{% with url=path|url_for_path %}
<tr>
<td>{{path.name}}</td>
<td>{{ path.name }}</td>
<td>
{% if path|mtime_is_epoch %}
Timestamp has been lost (is Unix Epoch)
{% else %}
{{path|mtime|date:"DATETIME_FORMAT"}}
{{ path|mtime|date:"DATETIME_FORMAT" }}
{% endif %}
</td>
<td><a href="{{url}}">{{url}}</a></td>
<td><a href="{{ url }}">{{ url }}</a></td>
</tr>
{% endwith %}
{% endfor %}
@ -114,4 +123,5 @@
{% endblock %}
{% block js %}
<script src="{% static "ietf/js/list.js" %}"></script>
<script src="{% static "ietf/js/investigate.js" %}"></script>
{% endblock %}

View file

@ -132,6 +132,7 @@
"ietf/static/js/highcharts.js",
"ietf/static/js/highstock.js",
"ietf/static/js/ietf.js",
"ietf/static/js/investigate.js",
"ietf/static/js/ipr-edit.js",
"ietf/static/js/ipr-search.js",
"ietf/static/js/js-cookie.js",

View file

@ -13,6 +13,7 @@ Django>4.2,<5
django-analytical>=3.1.0
django-bootstrap5>=21.3
django-celery-beat>=2.3.0
django-celery-results>=2.5.1
django-csp>=3.7
django-cors-headers>=3.11.0
django-debug-toolbar>=3.2.4