datatracker/ietf/stats/views.py
Ole Laursen 9308948195 Add person, affiliation and country (through django-countries) to
DocumentAuthor, rename author field to email and make it optional (for
modeling old email-less submissions), remove the authors many to many
referencing field from Document as it is not really pointing the right
place.

Update the Secretariat tools to show affiliation and country.

Add migration for getting rid of the fake email addresses that the
migration script created some years ago (just set the author email
field to null).
 - Legacy-Id: 12739
2017-01-26 17:10:08 +00:00

597 lines
21 KiB
Python

import datetime
import itertools
import json
import calendar
import os
from collections import defaultdict
from django.shortcuts import render
from django.contrib.auth.decorators import login_required
from django.core.urlresolvers import reverse as urlreverse
from django.http import HttpResponseRedirect, HttpResponseForbidden
from django.db.models import Count
from django.utils.safestring import mark_safe
from django.conf import settings
import dateutil.relativedelta
from ietf.review.utils import (extract_review_request_data,
aggregate_raw_review_request_stats,
ReviewRequestData,
compute_review_request_stats,
sum_raw_review_request_aggregations)
from ietf.submit.models import Submission
from ietf.group.models import Role, Group
from ietf.person.models import Person
from ietf.name.models import ReviewRequestStateName, ReviewResultName
from ietf.doc.models import DocAlias
from ietf.ietfauth.utils import has_role
def stats_index(request):
return render(request, "stats/index.html")
def generate_query_string(query_dict, overrides):
query_part = u""
if query_dict or overrides:
d = query_dict.copy()
for k, v in overrides.iteritems():
if type(v) in (list, tuple):
if not v:
if k in d:
del d[k]
else:
d.setlist(k, v)
else:
if v is None or v == u"":
if k in d:
del d[k]
else:
d[k] = v
if d:
query_part = u"?" + d.urlencode()
return query_part
def document_stats(request, stats_type=None, document_type=None):
def build_document_stats_url(stats_type_override=Ellipsis, document_type_override=Ellipsis, get_overrides={}):
kwargs = {
"stats_type": stats_type if stats_type_override is Ellipsis else stats_type_override,
"document_type": document_type if document_type_override is Ellipsis else document_type_override,
}
return urlreverse(document_stats, kwargs={ k: v for k, v in kwargs.iteritems() if v is not None }) + generate_query_string(request.GET, get_overrides)
# statistics type - one of the tables or the chart
possible_stats_types = [
("authors", "Authors"),
("pages", "Pages"),
("words", "Words"),
("format", "Format"),
("formlang", "Formal languages"),
]
possible_stats_types = [ (slug, label, build_document_stats_url(stats_type_override=slug))
for slug, label in possible_stats_types ]
if not stats_type:
return HttpResponseRedirect(build_document_stats_url(stats_type_override=possible_stats_types[0][0]))
possible_document_types = [
("all", "All"),
("rfc", "RFCs"),
("draft", "Drafts"),
]
possible_document_types = [ (slug, label, build_document_stats_url(document_type_override=slug))
for slug, label in possible_document_types ]
if not document_type:
return HttpResponseRedirect(build_document_stats_url(document_type_override=possible_document_types[0][0]))
def put_into_bin(value, bin_size):
if value is None:
return (value, value)
v = (value // bin_size) * bin_size
return (v, "{} - {}".format(v, v + bin_size - 1))
def generate_canonical_names(docalias_qs):
for doc_id, ts in itertools.groupby(docalias_qs.order_by("document"), lambda t: t[0]):
chosen = None
for t in ts:
if chosen is None:
chosen = t
else:
if t[0].startswith("rfc"):
chosen = t
elif t[0].startswith("draft") and not chosen[0].startswith("rfc"):
chosen = t
yield chosen
# filter documents
docalias_qs = DocAlias.objects.filter(document__type="draft")
if document_type == "rfc":
docalias_qs = docalias_qs.filter(document__states__type="draft", document__states__slug="rfc")
elif document_type == "draft":
docalias_qs = docalias_qs.exclude(document__states__type="draft", document__states__slug="rfc")
chart_data = []
table_data = []
if document_type == "all":
doc_label = "document"
elif document_type == "rfc":
doc_label = "RFC"
elif document_type == "draft":
doc_label = "draft"
stats_title = ""
bin_size = 1
total_docs = docalias_qs.count()
if stats_type == "authors":
stats_title = "Number of authors for each {}".format(doc_label)
bins = defaultdict(list)
for name, author_count in generate_canonical_names(docalias_qs.values_list("name").annotate(Count("document__documentauthor"))):
bins[author_count].append(name)
series_data = []
for author_count, names in sorted(bins.iteritems(), key=lambda t: t[0]):
percentage = len(names) * 100.0 / total_docs
series_data.append((author_count, percentage))
table_data.append((author_count, percentage, names))
chart_data.append({
"data": series_data,
"animation": False,
})
elif stats_type == "pages":
stats_title = "Number of pages for each {}".format(doc_label)
bins = defaultdict(list)
for name, pages in generate_canonical_names(docalias_qs.values_list("name", "document__pages")):
bins[pages].append(name)
series_data = []
for pages, names in sorted(bins.iteritems(), key=lambda t: t[0]):
percentage = len(names) * 100.0 / total_docs
if pages is not None:
series_data.append((pages, len(names)))
table_data.append((pages, percentage, names))
chart_data.append({
"data": series_data,
"animation": False,
})
elif stats_type == "words":
stats_title = "Number of words for each {}".format(doc_label)
bin_size = 500
bins = defaultdict(list)
for name, words in generate_canonical_names(docalias_qs.values_list("name", "document__words")):
bins[put_into_bin(words, bin_size)].append(name)
series_data = []
for (value, words), names in sorted(bins.iteritems(), key=lambda t: t[0][0]):
percentage = len(names) * 100.0 / total_docs
if words is not None:
series_data.append((value, len(names)))
table_data.append((words, percentage, names))
chart_data.append({
"data": series_data,
"animation": False,
})
elif stats_type == "format":
stats_title = "Submission formats for each {}".format(doc_label)
bins = defaultdict(list)
# on new documents, we should have a Submission row with the file types
submission_types = {}
for doc_name, file_types in Submission.objects.values_list("draft", "file_types").order_by("submission_date", "id"):
submission_types[doc_name] = file_types
doc_names_with_missing_types = {}
for canonical_name, rev, doc_name in generate_canonical_names(docalias_qs.values_list("name", "document__rev", "document__name")):
types = submission_types.get(doc_name)
if types:
for dot_ext in types.split(","):
bins[dot_ext.lstrip(".").upper()].append(canonical_name)
else:
if canonical_name.startswith("rfc"):
filename = canonical_name
else:
filename = canonical_name + "-" + rev
doc_names_with_missing_types[filename] = canonical_name
# look up the remaining documents on disk
for filename in itertools.chain(os.listdir(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR), os.listdir(settings.RFC_PATH)):
t = filename.split(".", 1)
if len(t) != 2:
continue
basename, ext = t
if any(ext.lower().endswith(blacklisted_ext.lower()) for blacklisted_ext in settings.DOCUMENT_FORMAT_BLACKLIST):
continue
canonical_name = doc_names_with_missing_types.get(basename)
if canonical_name:
bins[ext.upper()].append(canonical_name)
series_data = []
for fmt, names in sorted(bins.iteritems(), key=lambda t: t[0]):
percentage = len(names) * 100.0 / total_docs
series_data.append((fmt, len(names)))
table_data.append((fmt, percentage, names))
chart_data.append({
"data": series_data,
"animation": False,
})
elif stats_type == "formlang":
stats_title = "Formal languages used for each {}".format(doc_label)
bins = defaultdict(list)
for name, formal_language_name in generate_canonical_names(docalias_qs.values_list("name", "document__formal_languages__name")):
bins[formal_language_name].append(name)
series_data = []
for formal_language, names in sorted(bins.iteritems(), key=lambda t: t[0]):
percentage = len(names) * 100.0 / total_docs
if formal_language is not None:
series_data.append((formal_language, len(names)))
table_data.append((formal_language, percentage, names))
chart_data.append({
"data": series_data,
"animation": False,
})
return render(request, "stats/document_stats.html", {
"chart_data": mark_safe(json.dumps(chart_data)),
"table_data": table_data,
"stats_title": stats_title,
"possible_stats_types": possible_stats_types,
"stats_type": stats_type,
"possible_document_types": possible_document_types,
"document_type": document_type,
"doc_label": doc_label,
"bin_size": bin_size,
"content_template": "stats/document_stats_{}.html".format(stats_type),
})
@login_required
def review_stats(request, stats_type=None, acronym=None):
# This view is a bit complex because we want to show a bunch of
# tables with various filtering options, and both a team overview
# and a reviewers-within-team overview - and a time series chart.
# And in order to make the UI quick to navigate, we're not using
# one big form but instead presenting a bunch of immediate
# actions, with a URL scheme where the most common options (level
# and statistics type) are incorporated directly into the URL to
# be a bit nicer.
def build_review_stats_url(stats_type_override=Ellipsis, acronym_override=Ellipsis, get_overrides={}):
kwargs = {
"stats_type": stats_type if stats_type_override is Ellipsis else stats_type_override,
}
acr = acronym if acronym_override is Ellipsis else acronym_override
if acr:
kwargs["acronym"] = acr
return urlreverse(review_stats, kwargs=kwargs) + generate_query_string(request.GET, get_overrides)
def get_choice(get_parameter, possible_choices, multiple=False):
values = request.GET.getlist(get_parameter)
found = [t[0] for t in possible_choices if t[0] in values]
if multiple:
return found
else:
if found:
return found[0]
else:
return None
# which overview - team or reviewer
if acronym:
level = "reviewer"
else:
level = "team"
# statistics type - one of the tables or the chart
possible_stats_types = [
("completion", "Completion status"),
("results", "Review results"),
("states", "Request states"),
]
if level == "team":
possible_stats_types.append(("time", "Changes over time"))
possible_stats_types = [ (slug, label, build_review_stats_url(stats_type_override=slug))
for slug, label in possible_stats_types ]
if not stats_type:
return HttpResponseRedirect(build_review_stats_url(stats_type_override=possible_stats_types[0][0]))
# what to count
possible_count_choices = [
("", "Review requests"),
("pages", "Reviewed pages"),
]
possible_count_choices = [ (slug, label, build_review_stats_url(get_overrides={ "count": slug })) for slug, label in possible_count_choices ]
count = get_choice("count", possible_count_choices) or ""
# time range
def parse_date(s):
if not s:
return None
try:
return datetime.datetime.strptime(s.strip(), "%Y-%m-%d").date()
except ValueError:
return None
today = datetime.date.today()
from_date = parse_date(request.GET.get("from")) or today - dateutil.relativedelta.relativedelta(years=1)
to_date = parse_date(request.GET.get("to")) or today
from_time = datetime.datetime.combine(from_date, datetime.time.min)
to_time = datetime.datetime.combine(to_date, datetime.time.max)
# teams/reviewers
teams = list(Group.objects.exclude(reviewrequest=None).distinct().order_by("name"))
reviewer_filter_args = {}
# - interlude: access control
if has_role(request.user, ["Secretariat", "Area Director"]):
pass
else:
secr_access = set()
reviewer_only_access = set()
for r in Role.objects.filter(person__user=request.user, name__in=["secr", "reviewer"], group__in=teams).distinct():
if r.name_id == "secr":
secr_access.add(r.group_id)
reviewer_only_access.discard(r.group_id)
elif r.name_id == "reviewer":
if not r.group_id in secr_access:
reviewer_only_access.add(r.group_id)
if not secr_access and not reviewer_only_access:
return HttpResponseForbidden("You do not have the necessary permissions to view this page")
teams = [t for t in teams if t.pk in secr_access or t.pk in reviewer_only_access]
for t in reviewer_only_access:
reviewer_filter_args[t] = { "user": request.user }
reviewers_for_team = None
if level == "team":
for t in teams:
t.reviewer_stats_url = build_review_stats_url(acronym_override=t.acronym)
query_teams = teams
query_reviewers = None
group_by_objs = { t.pk: t for t in query_teams }
group_by_index = ReviewRequestData._fields.index("team")
elif level == "reviewer":
for t in teams:
if t.acronym == acronym:
reviewers_for_team = t
break
else:
return HttpResponseRedirect(urlreverse(review_stats))
query_reviewers = list(Person.objects.filter(
email__reviewrequest__time__gte=from_time,
email__reviewrequest__time__lte=to_time,
email__reviewrequest__team=reviewers_for_team,
**reviewer_filter_args.get(t.pk, {})
).distinct())
query_reviewers.sort(key=lambda p: p.last_name())
query_teams = [t]
group_by_objs = { r.pk: r for r in query_reviewers }
group_by_index = ReviewRequestData._fields.index("reviewer")
# now filter and aggregate the data
possible_teams = possible_completion_types = possible_results = possible_states = None
selected_teams = selected_completion_type = selected_result = selected_state = None
if stats_type == "time":
possible_teams = [(t.acronym, t.acronym) for t in teams]
selected_teams = get_choice("team", possible_teams, multiple=True)
def add_if_exists_else_subtract(element, l):
if element in l:
return [x for x in l if x != element]
else:
return l + [element]
possible_teams = [(slug, label, build_review_stats_url(get_overrides={
"team": add_if_exists_else_subtract(slug, selected_teams)
})) for slug, label in possible_teams]
query_teams = [t for t in query_teams if t.acronym in selected_teams]
extracted_data = extract_review_request_data(query_teams, query_reviewers, from_time, to_time)
req_time_index = ReviewRequestData._fields.index("req_time")
def time_key_fn(t):
d = t[req_time_index].date()
#d -= datetime.timedelta(days=d.weekday()) # weekly
d -= datetime.timedelta(days=d.day) # monthly
return d
found_results = set()
found_states = set()
aggrs = []
for d, request_data_items in itertools.groupby(extracted_data, key=time_key_fn):
raw_aggr = aggregate_raw_review_request_stats(request_data_items, count=count)
aggr = compute_review_request_stats(raw_aggr)
aggrs.append((d, aggr))
for slug in aggr["result"]:
found_results.add(slug)
for slug in aggr["state"]:
found_states.add(slug)
results = ReviewResultName.objects.filter(slug__in=found_results)
states = ReviewRequestStateName.objects.filter(slug__in=found_states)
# choice
possible_completion_types = [
("completed_in_time", "Completed in time"),
("completed_late", "Completed late"),
("not_completed", "Not completed"),
("average_assignment_to_closure_days", "Avg. compl. days"),
]
possible_completion_types = [
(slug, label, build_review_stats_url(get_overrides={ "completion": slug, "result": None, "state": None }))
for slug, label in possible_completion_types
]
selected_completion_type = get_choice("completion", possible_completion_types)
possible_results = [
(r.slug, r.name, build_review_stats_url(get_overrides={ "completion": None, "result": r.slug, "state": None }))
for r in results
]
selected_result = get_choice("result", possible_results)
possible_states = [
(s.slug, s.name, build_review_stats_url(get_overrides={ "completion": None, "result": None, "state": s.slug }))
for s in states
]
selected_state = get_choice("state", possible_states)
if not selected_completion_type and not selected_result and not selected_state:
selected_completion_type = "completed_in_time"
series_data = []
for d, aggr in aggrs:
v = 0
if selected_completion_type is not None:
v = aggr[selected_completion_type]
elif selected_result is not None:
v = aggr["result"][selected_result]
elif selected_state is not None:
v = aggr["state"][selected_state]
series_data.append((calendar.timegm(d.timetuple()) * 1000, v))
data = json.dumps([{
"data": series_data
}])
else: # tabular data
extracted_data = extract_review_request_data(query_teams, query_reviewers, from_time, to_time, ordering=[level])
data = []
found_results = set()
found_states = set()
raw_aggrs = []
for group_pk, request_data_items in itertools.groupby(extracted_data, key=lambda t: t[group_by_index]):
raw_aggr = aggregate_raw_review_request_stats(request_data_items, count=count)
raw_aggrs.append(raw_aggr)
aggr = compute_review_request_stats(raw_aggr)
# skip zero-valued rows
if aggr["open"] == 0 and aggr["completed"] == 0 and aggr["not_completed"] == 0:
continue
aggr["obj"] = group_by_objs.get(group_pk)
for slug in aggr["result"]:
found_results.add(slug)
for slug in aggr["state"]:
found_states.add(slug)
data.append(aggr)
# add totals row
if len(raw_aggrs) > 1:
totals = compute_review_request_stats(sum_raw_review_request_aggregations(raw_aggrs))
totals["obj"] = "Totals"
data.append(totals)
results = ReviewResultName.objects.filter(slug__in=found_results)
states = ReviewRequestStateName.objects.filter(slug__in=found_states)
# massage states/results breakdowns for template rendering
for aggr in data:
aggr["state_list"] = [aggr["state"].get(x.slug, 0) for x in states]
aggr["result_list"] = [aggr["result"].get(x.slug, 0) for x in results]
return render(request, 'stats/review_stats.html', {
"team_level_url": build_review_stats_url(acronym_override=None),
"level": level,
"reviewers_for_team": reviewers_for_team,
"teams": teams,
"data": data,
"states": states,
"results": results,
# options
"possible_stats_types": possible_stats_types,
"stats_type": stats_type,
"possible_count_choices": possible_count_choices,
"count": count,
"from_date": from_date,
"to_date": to_date,
"today": today,
# time options
"possible_teams": possible_teams,
"selected_teams": selected_teams,
"possible_completion_types": possible_completion_types,
"selected_completion_type": selected_completion_type,
"possible_results": possible_results,
"selected_result": selected_result,
"possible_states": possible_states,
"selected_state": selected_state,
})