From a2b8819cfea78dd6fe73d99ebc19015c7bc0a224 Mon Sep 17 00:00:00 2001 From: Ole Laursen Date: Thu, 4 May 2017 15:55:15 +0000 Subject: [PATCH] Add meeting statistics - overview and country/continent across meetings + detail pages with country and continent - Legacy-Id: 13264 --- ietf/bin/fetch-meeting-registration-data | 4 +- .../ietf/js/{document-stats.js => stats.js} | 4 +- .../0003_registration_registrationstats.py | 4 +- ietf/stats/models.py | 4 +- ietf/stats/tests.py | 12 +- ietf/stats/urls.py | 2 + ietf/stats/utils.py | 8 +- ietf/stats/views.py | 260 +++++++++++++++--- ietf/templates/stats/index.html | 1 + ietf/templates/stats/meeting_stats.html | 46 ++++ .../stats/meeting_stats_continent.html | 64 +++++ .../stats/meeting_stats_country.html | 69 +++++ .../stats/meeting_stats_overview.html | 75 +++++ 13 files changed, 504 insertions(+), 49 deletions(-) rename ietf/static/ietf/js/{document-stats.js => stats.js} (99%) create mode 100644 ietf/templates/stats/meeting_stats.html create mode 100644 ietf/templates/stats/meeting_stats_continent.html create mode 100644 ietf/templates/stats/meeting_stats_country.html create mode 100644 ietf/templates/stats/meeting_stats_overview.html diff --git a/ietf/bin/fetch-meeting-registration-data b/ietf/bin/fetch-meeting-registration-data index f2b9b59cd..69db34726 100755 --- a/ietf/bin/fetch-meeting-registration-data +++ b/ietf/bin/fetch-meeting-registration-data @@ -25,7 +25,7 @@ import syslog from ietf.meeting.models import Meeting -from ietf.stats.utils import get_registration_data +from ietf.stats.utils import get_meeting_registration_data meetings = Meeting.objects.none() if args.meeting: @@ -39,7 +39,7 @@ else: sys.exit(1) for meeting in meetings: - total = get_registration_data(meeting) + total = get_meeting_registration_data(meeting) msg = "Fetched data for meeting {}: {} registrations added".format(meeting.number, total) if sys.stdout.isatty(): print(msg) # make debugging a bit easier diff --git a/ietf/static/ietf/js/document-stats.js b/ietf/static/ietf/js/stats.js similarity index 99% rename from ietf/static/ietf/js/document-stats.js rename to ietf/static/ietf/js/stats.js index 25dfcc785..e62901a20 100644 --- a/ietf/static/ietf/js/document-stats.js +++ b/ietf/static/ietf/js/stats.js @@ -14,7 +14,7 @@ $(document).ready(function () { var chart = Highcharts.chart('chart', window.chartConf); } - + /* $(".popover-details").each(function () { var stdNameRegExp = new RegExp("^(rfc|bcp|fyi|std)[0-9]+$", 'i'); var draftRegExp = new RegExp("^draft-", 'i'); @@ -49,5 +49,5 @@ $(document).ready(function () { }).on("click", function (e) { e.preventDefault(); }); - }); + });*/ }); diff --git a/ietf/stats/migrations/0003_registration_registrationstats.py b/ietf/stats/migrations/0003_registration_registrationstats.py index 58956d115..46bc6a84e 100644 --- a/ietf/stats/migrations/0003_registration_registrationstats.py +++ b/ietf/stats/migrations/0003_registration_registrationstats.py @@ -16,13 +16,13 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='Registration', + name='MeetingRegistration', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('first_name', models.CharField(max_length=255)), ('last_name', models.CharField(max_length=255)), ('affiliation', models.CharField(blank=True, max_length=255)), - ('country', models.CharField(max_length=2)), + ('country_code', models.CharField(max_length=2)), ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='meeting.Meeting')), ('person', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='person.Person')), ], diff --git a/ietf/stats/models.py b/ietf/stats/models.py index ae6ac0aa7..051af882d 100644 --- a/ietf/stats/models.py +++ b/ietf/stats/models.py @@ -42,13 +42,13 @@ class CountryAlias(models.Model): class Meta: verbose_name_plural = "country aliases" -class Registration(models.Model): +class MeetingRegistration(models.Model): """Registration attendee records from the IETF registration system""" meeting = models.ForeignKey(Meeting) first_name = models.CharField(max_length=255) last_name = models.CharField(max_length=255) affiliation = models.CharField(blank=True, max_length=255) - country = models.CharField(max_length=2) # ISO 3166 + country_code = models.CharField(max_length=2) # ISO 3166 person = models.ForeignKey(Person, blank=True, null=True) def __unicode__(self): diff --git a/ietf/stats/tests.py b/ietf/stats/tests.py index c306a6e6e..82af2e3a3 100644 --- a/ietf/stats/tests.py +++ b/ietf/stats/tests.py @@ -8,8 +8,8 @@ from django.urls import reverse as urlreverse from ietf.utils.test_data import make_test_data, make_review_data from ietf.utils.test_utils import login_testing_unauthorized, TestCase, unicontent -from ietf.stats.models import Registration -from ietf.stats.utils import get_registration_data +from ietf.stats.models import MeetingRegistration +from ietf.stats.utils import get_meeting_registration_data import ietf.stats.views from ietf.submit.models import Submission @@ -154,12 +154,12 @@ class StatisticsTests(TestCase): self.assertTrue(q('.review-stats td:contains("1")')) @patch('requests.get') - def test_get_registration_data(self, mock_get): + def test_get_meeting_registration_data(self, mock_get): response = Response() response.status_code = 200 response._content = '[{"LastName":"Smith","FirstName":"John","Company":"ABC","Country":"US"}]' mock_get.return_value = response meeting = MeetingFactory(type_id='ietf', date=datetime.date(2016,7,14), number="96") - get_registration_data(meeting) - query = Registration.objects.filter(first_name='John',last_name='Smith',country='US') - self.assertTrue(query.count(),1) \ No newline at end of file + get_meeting_registration_data(meeting) + query = MeetingRegistration.objects.filter(first_name='John',last_name='Smith',country_code='US') + self.assertTrue(query.count(), 1) diff --git a/ietf/stats/urls.py b/ietf/stats/urls.py index ed8f8baa5..94fbe6951 100644 --- a/ietf/stats/urls.py +++ b/ietf/stats/urls.py @@ -7,5 +7,7 @@ urlpatterns = [ url("^$", views.stats_index), url("^document/(?:(?Pauthors|pages|words|format|formlang|author/(?:documents|affiliation|country|continent|citations|hindex)|yearly/(?:affiliation|country|continent))/)?$", views.document_stats), url("^knowncountries/$", views.known_countries_list), + url("^meeting/(?P\d+)/(?Pcountry|continent)/$", views.meeting_stats), + url("^meeting/(?:(?Poverview|country|continent)/)?$", views.meeting_stats), url("^review/(?:(?Pcompletion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, views.review_stats), ] diff --git a/ietf/stats/utils.py b/ietf/stats/utils.py index 886bcc02e..9b0f4cd70 100644 --- a/ietf/stats/utils.py +++ b/ietf/stats/utils.py @@ -4,7 +4,7 @@ from collections import defaultdict from django.conf import settings -from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, Registration +from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration from ietf.name.models import CountryName def compile_affiliation_ending_stripping_regexp(): @@ -210,7 +210,7 @@ def compute_hirsch_index(citation_counts): return i -def get_registration_data(meeting): +def get_meeting_registration_data(meeting): """"Retrieve registration attendee data and summary statistics. Returns number of Registration records created.""" num_created = 0 @@ -223,12 +223,12 @@ def get_registration_data(meeting): pass for registration in decoded: - object, created = Registration.objects.get_or_create( + object, created = MeetingRegistration.objects.get_or_create( meeting_id=meeting.pk, first_name=registration['FirstName'], last_name=registration['LastName'], affiliation=registration['Company'], - country=registration['Country']) + country_code=registration['Country']) if created: num_created += 1 return num_created diff --git a/ietf/stats/views.py b/ietf/stats/views.py index 3680ce559..96b68fb4a 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -12,6 +12,7 @@ from django.http import HttpResponseRedirect, HttpResponseForbidden from django.db.models import Count, Q from django.utils.safestring import mark_safe from django.conf import settings +from django.shortcuts import get_object_or_404 import dateutil.relativedelta @@ -26,6 +27,8 @@ from ietf.person.models import Person from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName from ietf.person.name import plain_name from ietf.doc.models import DocAlias, Document, State, DocEvent +from ietf.meeting.models import Meeting +from ietf.stats.models import MeetingRegistration, CountryAlias from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index from ietf.ietfauth.utils import has_role @@ -86,10 +89,40 @@ def prune_unknown_bin_with_known(bins): # named/known bins all_known = { n for b, names in bins.iteritems() if b for n in names } bins[""] = [name for name in bins[""] if name not in all_known] + if not bins[""]: + del bins[""] def count_bins(bins): return len({ n for b, names in bins.iteritems() if b for n in names }) +def add_labeled_top_series_from_bins(chart_data, bins, limit): + """Take bins on the form (x, label): [name1, name2, ...], figure out + how many there are per label, take the overall top ones and put + them into sorted series like [(x1, len(names1)), (x2, len(names2)), ...].""" + aggregated_bins = defaultdict(set) + xs = set() + for (x, label), names in bins.iteritems(): + xs.add(x) + aggregated_bins[label].update(names) + + xs = list(sorted(xs)) + + sorted_bins = sorted(aggregated_bins.iteritems(), key=lambda t: len(t[1]), reverse=True) + top = [ label for label, names in list(sorted_bins)[:limit]] + + for label in top: + series_data = [] + + for x in xs: + names = bins.get((x, label), set()) + + series_data.append((x, len(names))) + + chart_data.append({ + "data": series_data, + "name": label + }) + def document_stats(request, stats_type=None): def build_document_stats_url(stats_type_override=Ellipsis, get_overrides={}): kwargs = { @@ -549,8 +582,7 @@ def document_stats(request, stats_type=None): chart_data.append({ "data": sorted(series_data, key=lambda t: t[0]) }) - elif any(stats_type == t[0] and stats_type.split("/")[1] in ["affiliation", "country", "continent"] - for t in possible_yearly_stats_types): + elif any(stats_type == t[0] for t in possible_yearly_stats_types): person_filters = Q(documentauthor__document__type="draft") @@ -585,32 +617,6 @@ def document_stats(request, stats_type=None): years_from = from_time.year if from_time else 1 years_to = datetime.date.today().year - 1 - def add_yearly_chart_data_from_bins(bins, limit): - aggregated_bins = defaultdict(set) - years = set() - for (year, label), names in bins.iteritems(): - years.add(year) - aggregated_bins[label].update(names) - - years = list(sorted(y for y in years)) - - limit = 8 - sorted_bins = sorted(aggregated_bins.iteritems(), key=lambda t: len(t[1]), reverse=True) - top = [ label for label, names in list(sorted_bins)[:limit]] - - for label in top: - series_data = [] - - for y in years: - names = bins.get((y, label), set()) - - series_data.append((y, len(names))) - - chart_data.append({ - "data": series_data, - "name": label - }) - if stats_type == "yearly/affiliation": stats_title = "Number of {} authors per affiliation over the years".format(doc_label) @@ -632,7 +638,7 @@ def document_stats(request, stats_type=None): if years_from <= year <= years_to: bins[(year, a)].add(name) - add_yearly_chart_data_from_bins(bins, limit=8) + add_labeled_top_series_from_bins(chart_data, bins, limit=8) elif stats_type == "yearly/country": stats_title = "Number of {} authors per country over the years".format(doc_label) @@ -664,7 +670,7 @@ def document_stats(request, stats_type=None): if c and c.in_eu: bins[(year, eu_name)].add(name) - add_yearly_chart_data_from_bins(bins, limit=8) + add_labeled_top_series_from_bins(chart_data, bins, limit=8) elif stats_type == "yearly/continent": @@ -692,7 +698,7 @@ def document_stats(request, stats_type=None): if years_from <= year <= years_to: bins[(year, continent_name)].add(name) - add_yearly_chart_data_from_bins(bins, limit=8) + add_labeled_top_series_from_bins(chart_data, bins, limit=8) return render(request, "stats/document_stats.html", { "chart_data": mark_safe(json.dumps(chart_data)), @@ -728,6 +734,198 @@ def known_countries_list(request, stats_type=None, acronym=None): "ticket_email_address": settings.SECRETARIAT_TICKET_EMAIL, }) +def meeting_stats(request, num=None, stats_type=None): + meeting = None + if num is not None: + meeting = get_object_or_404(Meeting, number=num, type="ietf") + + def build_meeting_stats_url(number=None, stats_type_override=Ellipsis, get_overrides={}): + kwargs = { + "stats_type": stats_type if stats_type_override is Ellipsis else stats_type_override, + } + + if number is not None: + kwargs["num"] = number + + return urlreverse(meeting_stats, kwargs={ k: v for k, v in kwargs.iteritems() if v is not None }) + generate_query_string(request.GET, get_overrides) + + # statistics types + if meeting: + possible_stats_types = add_url_to_choices([ + ("country", "Country"), + ("continent", "Continent"), + ], lambda slug: build_meeting_stats_url(number=meeting.number, stats_type_override=slug)) + else: + possible_stats_types = add_url_to_choices([ + ("overview", "Overview"), + ("country", "Country"), + ("continent", "Continent"), + ], lambda slug: build_meeting_stats_url(number=None, stats_type_override=slug)) + + if not stats_type: + return HttpResponseRedirect(build_meeting_stats_url(number=num, stats_type_override=possible_stats_types[0][0])) + + chart_data = [] + table_data = [] + stats_title = "" + template_name = stats_type + bin_size = 1 + eu_countries = None + + def get_country_mapping(registrations): + return { + alias.alias: alias.country + for alias in CountryAlias.objects.filter(alias__in=set(r.country_code for r in registrations)).select_related("country", "country__continent") + if alias.alias.isupper() + } + + if meeting and any(stats_type == t[0] for t in possible_stats_types): + registrations = MeetingRegistration.objects.filter(meeting=meeting) + + if stats_type == "country": + stats_title = "Number of registrations for {} {} per country".format(meeting.type.name, meeting.number) + + bins = defaultdict(set) + + country_mapping = get_country_mapping(registrations) + + eu_name = "EU" + eu_countries = set(CountryName.objects.filter(in_eu=True)) + + for r in registrations: + name = (r.first_name + " " + r.last_name).strip() + c = country_mapping.get(r.country_code) + bins[c.name if c else None].add(name) + + if c and c.in_eu: + bins[eu_name].add(name) + + prune_unknown_bin_with_known(bins) + total_registrations = count_bins(bins) + + series_data = [] + for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()): + percentage = len(names) * 100.0 / (total_registrations or 1) + if country: + series_data.append((country, len(names))) + table_data.append((country, percentage, names)) + + series_data.sort(key=lambda t: t[1], reverse=True) + series_data = series_data[:30] + + chart_data.append({ "data": series_data }) + + elif stats_type == "continent": + stats_title = "Number of registrations for {} {} per continent".format(meeting.type.name, meeting.number) + + bins = defaultdict(set) + + country_mapping = get_country_mapping(registrations) + + for r in registrations: + name = (r.first_name + " " + r.last_name).strip() + c = country_mapping.get(r.country_code) + bins[c.continent.name if c else None].add(name) + + prune_unknown_bin_with_known(bins) + total_registrations = count_bins(bins) + + series_data = [] + for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()): + percentage = len(names) * 100.0 / (total_registrations or 1) + if continent: + series_data.append((continent, len(names))) + table_data.append((continent, percentage, names)) + + series_data.sort(key=lambda t: t[1], reverse=True) + + chart_data.append({ "data": series_data }) + + + elif not meeting and any(stats_type == t[0] for t in possible_stats_types): + template_name = "overview" + + registrations = MeetingRegistration.objects.filter(meeting__type="ietf") + + if stats_type == "overview": + stats_title = "Number of registrations per meeting" + + bins = defaultdict(set) + + for first_name, last_name, meeting_number in registrations.values_list("first_name", "last_name", "meeting__number"): + meeting_number = int(meeting_number) + name = (first_name + " " + last_name).strip() + + bins[meeting_number].add(name) + + meeting_cities = dict(Meeting.objects.filter(number__in=bins.iterkeys()).values_list("number", "city")) + + series_data = [] + for meeting_number, names in sorted(bins.iteritems()): + series_data.append((meeting_number, len(names))) + url = build_meeting_stats_url(number=meeting_number, stats_type_override="country") + label = "IETF {} - {}".format(meeting_number, meeting_cities.get(str(meeting_number), "")) + table_data.append((meeting_number, label, url, names)) + + series_data.sort(key=lambda t: t[0]) + table_data.sort(key=lambda t: t[0], reverse=True) + + chart_data.append({ "data": series_data }) + + + elif stats_type == "country": + stats_title = "Number of registrations per country across meetings" + + country_mapping = get_country_mapping(registrations) + + eu_name = "EU" + eu_countries = set(CountryName.objects.filter(in_eu=True)) + + bins = defaultdict(set) + + for first_name, last_name, country_code, meeting_number in registrations.values_list("first_name", "last_name", "country_code", "meeting__number"): + meeting_number = int(meeting_number) + name = (first_name + " " + last_name).strip() + c = country_mapping.get(country_code) + + if c: + bins[(meeting_number, c.name)].add(name) + if c.in_eu: + bins[(meeting_number, eu_name)].add(name) + + add_labeled_top_series_from_bins(chart_data, bins, limit=8) + + + elif stats_type == "continent": + stats_title = "Number of registrations per country across meetings" + + country_mapping = get_country_mapping(registrations) + + bins = defaultdict(set) + + for first_name, last_name, country_code, meeting_number in registrations.values_list("first_name", "last_name", "country_code", "meeting__number"): + meeting_number = int(meeting_number) + name = (first_name + " " + last_name).strip() + c = country_mapping.get(country_code) + + if c: + bins[(meeting_number, c.continent.name)].add(name) + + add_labeled_top_series_from_bins(chart_data, bins, limit=8) + + + return render(request, "stats/meeting_stats.html", { + "chart_data": mark_safe(json.dumps(chart_data)), + "table_data": table_data, + "stats_title": stats_title, + "possible_stats_types": possible_stats_types, + "stats_type": stats_type, + "bin_size": bin_size, + "meeting": meeting, + "eu_countries": sorted(eu_countries or [], key=lambda c: c.name), + "content_template": "stats/meeting_stats_{}.html".format(template_name), + }) + @login_required def review_stats(request, stats_type=None, acronym=None): diff --git a/ietf/templates/stats/index.html b/ietf/templates/stats/index.html index 9e8cc2e1f..266d287d3 100644 --- a/ietf/templates/stats/index.html +++ b/ietf/templates/stats/index.html @@ -13,6 +13,7 @@ diff --git a/ietf/templates/stats/meeting_stats.html b/ietf/templates/stats/meeting_stats.html new file mode 100644 index 000000000..1c359215f --- /dev/null +++ b/ietf/templates/stats/meeting_stats.html @@ -0,0 +1,46 @@ +{% extends "base.html" %} + +{% load origin %} + +{% load ietf_filters staticfiles bootstrap3 %} + +{% block title %}{{ stats_title }}{% endblock %} + +{% block pagehead %} + +{% endblock %} + +{% block content %} + {% origin %} + +

Meeting Statistics

+ + {% if meeting %} +

+ « Back to overview +

+ {% endif %} + +
+
+ Registrations: + +
+ {% for slug, label, url in possible_stats_types %} + {{ label }} + {% endfor %} +
+
+
+ +
+ {% include content_template %} +
+{% endblock %} + +{% block js %} + + + + +{% endblock %} diff --git a/ietf/templates/stats/meeting_stats_continent.html b/ietf/templates/stats/meeting_stats_continent.html new file mode 100644 index 000000000..436507f8b --- /dev/null +++ b/ietf/templates/stats/meeting_stats_continent.html @@ -0,0 +1,64 @@ +

{{ stats_title }}

+ +
+ + + +

Data

+ + + + + + + + + + + {% for continent, percentage, names in table_data %} + + + + + + {% endfor %} + +
ContinentPercentage of registrationsRegistrations
{{ continent|default:"(unknown)" }}{{ percentage|floatformat:2 }}%{% include "stats/includes/number_with_details_cell.html" %}
diff --git a/ietf/templates/stats/meeting_stats_country.html b/ietf/templates/stats/meeting_stats_country.html new file mode 100644 index 000000000..22f853079 --- /dev/null +++ b/ietf/templates/stats/meeting_stats_country.html @@ -0,0 +1,69 @@ +

{{ stats_title }}

+ +
+ + + +

Data

+ + + + + + + + + + + {% for country, percentage, names in table_data %} + + + + + + {% endfor %} + +
CountryPercentage of registrationsRegistrations
{{ country|default:"(unknown)" }}{{ percentage|floatformat:2 }}%{% include "stats/includes/number_with_details_cell.html" %}
+ +

EU (European Union) is not a country, but has been added for reference, as the sum of + all current EU member countries: + {% for c in eu_countries %}{{ c.name }}{% if not forloop.last %}, {% endif %}{% endfor %}.

+ diff --git a/ietf/templates/stats/meeting_stats_overview.html b/ietf/templates/stats/meeting_stats_overview.html new file mode 100644 index 000000000..756d6f568 --- /dev/null +++ b/ietf/templates/stats/meeting_stats_overview.html @@ -0,0 +1,75 @@ +

{{ stats_title }}

+ +
+ + + +{% if table_data %} +

Data

+ + + + + + + + + + {% for meeting_number, label, url, names in table_data %} + + + + + {% endfor %} + +
MeetingRegistrations
{{ label }}{% include "stats/includes/number_with_details_cell.html" %}
+ +{% endif %}