From a9525ab4f46943ec0b1df3f96ecee54721dbda96 Mon Sep 17 00:00:00 2001 From: Ole Laursen Date: Tue, 31 Jan 2017 16:32:20 +0000 Subject: [PATCH] Revamp stats selector UI a bit to accomodate statistics counting authors (instead of documents), add stats with documents per author - Legacy-Id: 12766 --- ietf/static/ietf/js/document-stats.js | 22 +- ietf/stats/tests.py | 2 +- ietf/stats/urls.py | 2 +- ietf/stats/views.py | 355 +++++++++++------- ietf/templates/stats/document_stats.html | 23 +- .../document_stats_author_documents.html | 65 ++++ .../stats/document_stats_authors.html | 4 +- .../stats/document_stats_format.html | 2 +- .../stats/document_stats_formlang.html | 2 +- .../templates/stats/document_stats_pages.html | 2 +- .../templates/stats/document_stats_words.html | 2 +- .../stats/includes/docnames_cell.html | 1 - .../includes/number_with_details_cell.html | 1 + ietf/templates/stats/index.html | 6 +- 14 files changed, 318 insertions(+), 171 deletions(-) create mode 100644 ietf/templates/stats/document_stats_author_documents.html delete mode 100644 ietf/templates/stats/includes/docnames_cell.html create mode 100644 ietf/templates/stats/includes/number_with_details_cell.html diff --git a/ietf/static/ietf/js/document-stats.js b/ietf/static/ietf/js/document-stats.js index 922a7205e..7d97d868e 100644 --- a/ietf/static/ietf/js/document-stats.js +++ b/ietf/static/ietf/js/document-stats.js @@ -15,20 +15,26 @@ $(document).ready(function () { var chart = Highcharts.chart('chart', window.chartConf); } - $(".popover-docnames").each(function () { + $(".popover-details").each(function () { var stdNameRegExp = new RegExp("^(rfc|bcp|fyi|std)[0-9]+$", 'i'); + var draftRegExp = new RegExp("^draft-", 'i'); - var html = []; - $.each(($(this).data("docnames") || "").split(" "), function (i, docname) { - if (!$.trim(docname)) + var html = [];t + $.each(($(this).data("elements") || "").split("|"), function (i, element) { + if (!$.trim(element)) return; - var displayName = docname; + if (draftRegExp.test(element) || stdNameRegExp.test(element)) { + var displayName = element; - if (stdNameRegExp.test(docname)) - displayName = docname.slice(0, 3).toUpperCase() + " " + docname.slice(3); + if (stdNameRegExp.test(element)) + displayName = element.slice(0, 3).toUpperCase() + " " + element.slice(3); - html.push('
' + displayName + '
'); + html.push('
' + displayName + '
'); + } + else { + html.push('
' + element + '
'); + } }); if ($(this).data("sliced")) diff --git a/ietf/stats/tests.py b/ietf/stats/tests.py index 7bb1ab230..fa973544c 100644 --- a/ietf/stats/tests.py +++ b/ietf/stats/tests.py @@ -25,7 +25,7 @@ class StatisticsTests(TestCase): self.assertTrue(authors_url in r["Location"]) # check various stats types - for stats_type in ["authors", "pages", "words", "format", "formlang"]: + for stats_type in ["authors", "pages", "words", "format", "formlang", "author/documents"]: for document_type in ["", "rfc", "draft"]: for time_choice in ["", "5y"]: url = urlreverse(ietf.stats.views.document_stats, kwargs={ "stats_type": stats_type }) diff --git a/ietf/stats/urls.py b/ietf/stats/urls.py index a700a5a3a..ee4fb4018 100644 --- a/ietf/stats/urls.py +++ b/ietf/stats/urls.py @@ -5,6 +5,6 @@ import ietf.stats.views urlpatterns = patterns('', url("^$", ietf.stats.views.stats_index), - url("^document/(?:(?Pauthors|pages|words|format|formlang)/)?$", ietf.stats.views.document_stats), + url("^document/(?:(?Pauthors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citation)/)?$", ietf.stats.views.document_stats), url("^review/(?:(?Pcompletion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, ietf.stats.views.review_stats), ) diff --git a/ietf/stats/views.py b/ietf/stats/views.py index ace78f5cf..c151888b5 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -9,7 +9,7 @@ from django.shortcuts import render from django.contrib.auth.decorators import login_required from django.core.urlresolvers import reverse as urlreverse from django.http import HttpResponseRedirect, HttpResponseForbidden -from django.db.models import Count +from django.db.models import Count, Q from django.utils.safestring import mark_safe from django.conf import settings @@ -88,7 +88,7 @@ def document_stats(request, stats_type=None): return urlreverse(document_stats, kwargs={ k: v for k, v in kwargs.iteritems() if v is not None }) + generate_query_string(request.GET, get_overrides) # statistics type - one of the tables or the chart - possible_stats_types = add_url_to_choices([ + possible_document_stats_types = add_url_to_choices([ ("authors", "Number of authors"), ("pages", "Pages"), ("words", "Words"), @@ -96,8 +96,18 @@ def document_stats(request, stats_type=None): ("formlang", "Formal languages"), ], lambda slug: build_document_stats_url(stats_type_override=slug)) + # statistics type - one of the tables or the chart + possible_author_stats_types = add_url_to_choices([ + ("author/documents", "Number of documents"), + ("author/affiliation", "Affiliation"), + ("author/country", "Country"), + ("author/continent", "Continent"), + ("author/citation", "Citations"), + ], lambda slug: build_document_stats_url(stats_type_override=slug)) + + if not stats_type: - return HttpResponseRedirect(build_document_stats_url(stats_type_override=possible_stats_types[0][0])) + return HttpResponseRedirect(build_document_stats_url(stats_type_override=possible_document_stats_types[0][0])) possible_document_types = add_url_to_choices([ @@ -124,196 +134,248 @@ def document_stats(request, stats_type=None): except ValueError: pass - def generate_canonical_names(docalias_qs): - for doc_id, ts in itertools.groupby(docalias_qs.order_by("document"), lambda t: t[0]): - chosen = None - for t in ts: - if chosen is None: - chosen = t - else: - if t[0].startswith("rfc"): - chosen = t - elif t[0].startswith("draft") and not chosen[0].startswith("rfc"): - chosen = t - - yield chosen - - # filter documents - docalias_qs = DocAlias.objects.filter(document__type="draft") - - if document_type == "rfc": - docalias_qs = docalias_qs.filter(document__states__type="draft", document__states__slug="rfc") - elif document_type == "draft": - docalias_qs = docalias_qs.exclude(document__states__type="draft", document__states__slug="rfc") - - if from_time: - # this is actually faster than joining in the database, - # despite the round-trip back and forth - docs_within_time_constraint = list(Document.objects.filter( - type="draft", - docevent__time__gte=from_time, - docevent__type__in=["published_rfc", "new_revision"], - ).values_list("pk")) - - docalias_qs = docalias_qs.filter(document__in=docs_within_time_constraint) - chart_data = [] table_data = [] - - if document_type == "rfc": - doc_label = "RFC" - elif document_type == "draft": - doc_label = "draft" - else: - doc_label = "document" - stats_title = "" bin_size = 1 - total_docs = docalias_qs.count() - if stats_type == "authors": - stats_title = "Number of authors for each {}".format(doc_label) + if any(stats_type == t[0] for t in possible_document_stats_types): + # filter documents + docalias_qs = DocAlias.objects.filter(document__type="draft") - bins = defaultdict(list) + if document_type == "rfc": + docalias_qs = docalias_qs.filter(document__states__type="draft", document__states__slug="rfc") + elif document_type == "draft": + docalias_qs = docalias_qs.exclude(document__states__type="draft", document__states__slug="rfc") - for name, author_count in generate_canonical_names(docalias_qs.values_list("name").annotate(Count("document__documentauthor"))): - bins[author_count].append(name) + if from_time: + # this is actually faster than joining in the database, + # despite the round-trip back and forth + docs_within_time_constraint = list(Document.objects.filter( + type="draft", + docevent__time__gte=from_time, + docevent__type__in=["published_rfc", "new_revision"], + ).values_list("pk")) - series_data = [] - for author_count, names in sorted(bins.iteritems(), key=lambda t: t[0]): - percentage = len(names) * 100.0 / total_docs - series_data.append((author_count, percentage)) - table_data.append((author_count, percentage, names)) + docalias_qs = docalias_qs.filter(document__in=docs_within_time_constraint) - chart_data.append({ - "data": series_data, - "animation": False, - }) + if document_type == "rfc": + doc_label = "RFC" + elif document_type == "draft": + doc_label = "draft" + else: + doc_label = "document" - elif stats_type == "pages": - stats_title = "Number of pages for each {}".format(doc_label) + total_docs = docalias_qs.count() - bins = defaultdict(list) + def generate_canonical_names(docalias_qs): + for doc_id, ts in itertools.groupby(docalias_qs.order_by("document"), lambda t: t[0]): + chosen = None + for t in ts: + if chosen is None: + chosen = t + else: + if t[0].startswith("rfc"): + chosen = t + elif t[0].startswith("draft") and not chosen[0].startswith("rfc"): + chosen = t - for name, pages in generate_canonical_names(docalias_qs.values_list("name", "document__pages")): - bins[pages].append(name) + yield chosen - series_data = [] - for pages, names in sorted(bins.iteritems(), key=lambda t: t[0]): - percentage = len(names) * 100.0 / total_docs - if pages is not None: - series_data.append((pages, len(names))) - table_data.append((pages, percentage, names)) + if stats_type == "authors": + stats_title = "Number of authors for each {}".format(doc_label) - chart_data.append({ - "data": series_data, - "animation": False, - }) + bins = defaultdict(list) - elif stats_type == "words": - stats_title = "Number of words for each {}".format(doc_label) + for name, author_count in generate_canonical_names(docalias_qs.values_list("name").annotate(Count("document__documentauthor"))): + bins[author_count].append(name) - bin_size = 500 + series_data = [] + for author_count, names in sorted(bins.iteritems(), key=lambda t: t[0]): + percentage = len(names) * 100.0 / total_docs + series_data.append((author_count, percentage)) + table_data.append((author_count, percentage, names)) - bins = defaultdict(list) + chart_data.append({ + "data": series_data, + "animation": False, + }) - for name, words in generate_canonical_names(docalias_qs.values_list("name", "document__words")): - bins[put_into_bin(words, bin_size)].append(name) + elif stats_type == "pages": + stats_title = "Number of pages for each {}".format(doc_label) - series_data = [] - for (value, words), names in sorted(bins.iteritems(), key=lambda t: t[0][0]): - percentage = len(names) * 100.0 / total_docs - if words is not None: - series_data.append((value, len(names))) + bins = defaultdict(list) - table_data.append((words, percentage, names)) + for name, pages in generate_canonical_names(docalias_qs.values_list("name", "document__pages")): + bins[pages].append(name) - chart_data.append({ - "data": series_data, - "animation": False, - }) + series_data = [] + for pages, names in sorted(bins.iteritems(), key=lambda t: t[0]): + percentage = len(names) * 100.0 / total_docs + if pages is not None: + series_data.append((pages, len(names))) + table_data.append((pages, percentage, names)) - elif stats_type == "format": - stats_title = "Submission formats for each {}".format(doc_label) + chart_data.append({ + "data": series_data, + "animation": False, + }) - bins = defaultdict(list) + elif stats_type == "words": + stats_title = "Number of words for each {}".format(doc_label) - # on new documents, we should have a Submission row with the file types - submission_types = {} + bin_size = 500 - for doc_name, file_types in Submission.objects.values_list("draft", "file_types").order_by("submission_date", "id"): - submission_types[doc_name] = file_types + bins = defaultdict(list) - doc_names_with_missing_types = {} - for canonical_name, rev, doc_name in generate_canonical_names(docalias_qs.values_list("name", "document__rev", "document__name")): - types = submission_types.get(doc_name) - if types: - for dot_ext in types.split(","): - bins[dot_ext.lstrip(".").upper()].append(canonical_name) + for name, words in generate_canonical_names(docalias_qs.values_list("name", "document__words")): + bins[put_into_bin(words, bin_size)].append(name) - else: + series_data = [] + for (value, words), names in sorted(bins.iteritems(), key=lambda t: t[0][0]): + percentage = len(names) * 100.0 / total_docs + if words is not None: + series_data.append((value, len(names))) + + table_data.append((words, percentage, names)) + + chart_data.append({ + "data": series_data, + "animation": False, + }) + + elif stats_type == "format": + stats_title = "Submission formats for each {}".format(doc_label) + + bins = defaultdict(list) + + # on new documents, we should have a Submission row with the file types + submission_types = {} + + for doc_name, file_types in Submission.objects.values_list("draft", "file_types").order_by("submission_date", "id"): + submission_types[doc_name] = file_types + + doc_names_with_missing_types = {} + for canonical_name, rev, doc_name in generate_canonical_names(docalias_qs.values_list("name", "document__rev", "document__name")): + types = submission_types.get(doc_name) + if types: + for dot_ext in types.split(","): + bins[dot_ext.lstrip(".").upper()].append(canonical_name) - if canonical_name.startswith("rfc"): - filename = canonical_name else: - filename = canonical_name + "-" + rev - doc_names_with_missing_types[filename] = canonical_name + if canonical_name.startswith("rfc"): + filename = canonical_name + else: + filename = canonical_name + "-" + rev - # look up the remaining documents on disk - for filename in itertools.chain(os.listdir(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR), os.listdir(settings.RFC_PATH)): - t = filename.split(".", 1) - if len(t) != 2: - continue + doc_names_with_missing_types[filename] = canonical_name - basename, ext = t - if any(ext.lower().endswith(blacklisted_ext.lower()) for blacklisted_ext in settings.DOCUMENT_FORMAT_BLACKLIST): - continue + # look up the remaining documents on disk + for filename in itertools.chain(os.listdir(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR), os.listdir(settings.RFC_PATH)): + t = filename.split(".", 1) + if len(t) != 2: + continue - canonical_name = doc_names_with_missing_types.get(basename) + basename, ext = t + if any(ext.lower().endswith(blacklisted_ext.lower()) for blacklisted_ext in settings.DOCUMENT_FORMAT_BLACKLIST): + continue - if canonical_name: - bins[ext.upper()].append(canonical_name) + canonical_name = doc_names_with_missing_types.get(basename) - series_data = [] - for fmt, names in sorted(bins.iteritems(), key=lambda t: t[0]): - percentage = len(names) * 100.0 / total_docs - series_data.append((fmt, len(names))) + if canonical_name: + bins[ext.upper()].append(canonical_name) - table_data.append((fmt, percentage, names)) + series_data = [] + for fmt, names in sorted(bins.iteritems(), key=lambda t: t[0]): + percentage = len(names) * 100.0 / total_docs + series_data.append((fmt, len(names))) - chart_data.append({ - "data": series_data, - "animation": False, - }) + table_data.append((fmt, percentage, names)) - elif stats_type == "formlang": - stats_title = "Formal languages used for each {}".format(doc_label) + chart_data.append({ + "data": series_data, + "animation": False, + }) - bins = defaultdict(list) + elif stats_type == "formlang": + stats_title = "Formal languages used for each {}".format(doc_label) - for name, formal_language_name in generate_canonical_names(docalias_qs.values_list("name", "document__formal_languages__name")): - bins[formal_language_name].append(name) + bins = defaultdict(list) - series_data = [] - for formal_language, names in sorted(bins.iteritems(), key=lambda t: t[0]): - percentage = len(names) * 100.0 / total_docs - if formal_language is not None: - series_data.append((formal_language, len(names))) - table_data.append((formal_language, percentage, names)) + for name, formal_language_name in generate_canonical_names(docalias_qs.values_list("name", "document__formal_languages__name")): + bins[formal_language_name].append(name) + + series_data = [] + for formal_language, names in sorted(bins.iteritems(), key=lambda t: t[0]): + percentage = len(names) * 100.0 / total_docs + if formal_language is not None: + series_data.append((formal_language, len(names))) + table_data.append((formal_language, percentage, names)) + + chart_data.append({ + "data": series_data, + "animation": False, + }) + + elif any(stats_type == t[0] for t in possible_author_stats_types): + person_filters = Q(documentauthor__document__type="draft") + + # filter persons + if document_type == "rfc": + person_filters &= Q(documentauthor__document__states__type="draft", documentauthor__document__states__slug="rfc") + elif document_type == "draft": + person_filters &= ~Q(documentauthor__document__states__type="draft", documentauthor__document__states__slug="rfc") + + if from_time: + # this is actually faster than joining in the database, + # despite the round-trip back and forth + docs_within_time_constraint = list(Document.objects.filter( + type="draft", + docevent__time__gte=from_time, + docevent__type__in=["published_rfc", "new_revision"], + ).values_list("pk")) + + person_filters &= Q(documentauthor__document__in=docs_within_time_constraint) + + person_qs = Person.objects.filter(person_filters) + + if document_type == "rfc": + doc_label = "RFC" + elif document_type == "draft": + doc_label = "draft" + else: + doc_label = "document" + + total_persons = person_qs.count() + + if stats_type == "author/documents": + stats_title = "Number of {}s for each author".format(doc_label) + + bins = defaultdict(list) + + for name, document_count in person_qs.values_list("name").annotate(Count("documentauthor")): + bins[document_count].append(name) + + series_data = [] + for document_count, names in sorted(bins.iteritems(), key=lambda t: t[0]): + percentage = len(names) * 100.0 / total_persons + series_data.append((document_count, percentage)) + table_data.append((document_count, percentage, names)) + + chart_data.append({ + "data": series_data, + "animation": False, + }) - chart_data.append({ - "data": series_data, - "animation": False, - }) - return render(request, "stats/document_stats.html", { "chart_data": mark_safe(json.dumps(chart_data)), "table_data": table_data, "stats_title": stats_title, - "possible_stats_types": possible_stats_types, + "possible_document_stats_types": possible_document_stats_types, + "possible_author_stats_types": possible_author_stats_types, "stats_type": stats_type, "possible_document_types": possible_document_types, "document_type": document_type, @@ -321,9 +383,10 @@ def document_stats(request, stats_type=None): "time_choice": time_choice, "doc_label": doc_label, "bin_size": bin_size, - "content_template": "stats/document_stats_{}.html".format(stats_type), + "content_template": "stats/document_stats_{}.html".format(stats_type.replace("/", "_")), }) + @login_required def review_stats(request, stats_type=None, acronym=None): # This view is a bit complex because we want to show a bunch of diff --git a/ietf/templates/stats/document_stats.html b/ietf/templates/stats/document_stats.html index bac17e587..2e4bda401 100644 --- a/ietf/templates/stats/document_stats.html +++ b/ietf/templates/stats/document_stats.html @@ -13,28 +13,39 @@ {% block content %} {% origin %} -

Document statistics

+

Draft/RFC statistics

- Show: + Documents: +
- {% for slug, label, url in possible_stats_types %} + {% for slug, label, url in possible_document_stats_types %} {{ label }} {% endfor %}
- Document type: + Authors: +
- {% for slug, label, url in possible_document_types %} - {{ label }} + {% for slug, label, url in possible_author_stats_types %} + {{ label }} {% endfor %}
+
Options
+
+ Document type: +
+ {% for slug, label, url in possible_document_types %} + {{ label }} + {% endfor %} +
+ Time:
{% for slug, label, url in possible_time_choices %} diff --git a/ietf/templates/stats/document_stats_author_documents.html b/ietf/templates/stats/document_stats_author_documents.html new file mode 100644 index 000000000..0d21b41d2 --- /dev/null +++ b/ietf/templates/stats/document_stats_author_documents.html @@ -0,0 +1,65 @@ +

{{ stats_title }}

+ +
+ + + +

Data

+ + + + + + + + + + + {% for document_count, percentage, names in table_data %} + + + + + + {% endfor %} + +
DocumentsPercentage of authorsAuthors
{{ document_count }}{{ percentage|floatformat:2 }}%{% include "stats/includes/number_with_details_cell.html" %}
diff --git a/ietf/templates/stats/document_stats_authors.html b/ietf/templates/stats/document_stats_authors.html index 143da1114..70fe249fb 100644 --- a/ietf/templates/stats/document_stats_authors.html +++ b/ietf/templates/stats/document_stats_authors.html @@ -49,7 +49,7 @@ Authors Percentage of {{ doc_label }}s - {{ doc_label }}s + {{ doc_label|capfirst }}s @@ -57,7 +57,7 @@ {{ author_count }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/docnames_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" %} {% endfor %} diff --git a/ietf/templates/stats/document_stats_format.html b/ietf/templates/stats/document_stats_format.html index 7e701343f..ce1512a09 100644 --- a/ietf/templates/stats/document_stats_format.html +++ b/ietf/templates/stats/document_stats_format.html @@ -53,7 +53,7 @@ {{ pages }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/docnames_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" %} {% endfor %} diff --git a/ietf/templates/stats/document_stats_formlang.html b/ietf/templates/stats/document_stats_formlang.html index 248a45b82..e4b586d95 100644 --- a/ietf/templates/stats/document_stats_formlang.html +++ b/ietf/templates/stats/document_stats_formlang.html @@ -53,7 +53,7 @@ {{ formal_language }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/docnames_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" %} {% endfor %} diff --git a/ietf/templates/stats/document_stats_pages.html b/ietf/templates/stats/document_stats_pages.html index f4c930e46..dca167b1c 100644 --- a/ietf/templates/stats/document_stats_pages.html +++ b/ietf/templates/stats/document_stats_pages.html @@ -51,7 +51,7 @@ {{ pages }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/docnames_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" %} {% endfor %} diff --git a/ietf/templates/stats/document_stats_words.html b/ietf/templates/stats/document_stats_words.html index d5983f1d6..956e49ea7 100644 --- a/ietf/templates/stats/document_stats_words.html +++ b/ietf/templates/stats/document_stats_words.html @@ -51,7 +51,7 @@ {{ pages }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/docnames_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" %} {% endfor %} diff --git a/ietf/templates/stats/includes/docnames_cell.html b/ietf/templates/stats/includes/docnames_cell.html deleted file mode 100644 index fecdbe3a3..000000000 --- a/ietf/templates/stats/includes/docnames_cell.html +++ /dev/null @@ -1 +0,0 @@ -{{ names|length }} diff --git a/ietf/templates/stats/includes/number_with_details_cell.html b/ietf/templates/stats/includes/number_with_details_cell.html new file mode 100644 index 000000000..cdadc287a --- /dev/null +++ b/ietf/templates/stats/includes/number_with_details_cell.html @@ -0,0 +1 @@ +{{ names|length }} diff --git a/ietf/templates/stats/index.html b/ietf/templates/stats/index.html index 77b8b7925..9e8cc2e1f 100644 --- a/ietf/templates/stats/index.html +++ b/ietf/templates/stats/index.html @@ -9,9 +9,11 @@

{% block title %}Statistics{% endblock %}

+

Statistics on...

+ {% endblock %}