Add meeting statistics - overview and country/continent across

meetings + detail pages with country and continent
 - Legacy-Id: 13264
This commit is contained in:
Ole Laursen 2017-05-04 15:55:15 +00:00
parent 07c650428a
commit a2b8819cfe
13 changed files with 504 additions and 49 deletions

View file

@ -25,7 +25,7 @@ import syslog
from ietf.meeting.models import Meeting
from ietf.stats.utils import get_registration_data
from ietf.stats.utils import get_meeting_registration_data
meetings = Meeting.objects.none()
if args.meeting:
@ -39,7 +39,7 @@ else:
sys.exit(1)
for meeting in meetings:
total = get_registration_data(meeting)
total = get_meeting_registration_data(meeting)
msg = "Fetched data for meeting {}: {} registrations added".format(meeting.number, total)
if sys.stdout.isatty():
print(msg) # make debugging a bit easier

View file

@ -14,7 +14,7 @@ $(document).ready(function () {
var chart = Highcharts.chart('chart', window.chartConf);
}
/*
$(".popover-details").each(function () {
var stdNameRegExp = new RegExp("^(rfc|bcp|fyi|std)[0-9]+$", 'i');
var draftRegExp = new RegExp("^draft-", 'i');
@ -49,5 +49,5 @@ $(document).ready(function () {
}).on("click", function (e) {
e.preventDefault();
});
});
});*/
});

View file

@ -16,13 +16,13 @@ class Migration(migrations.Migration):
operations = [
migrations.CreateModel(
name='Registration',
name='MeetingRegistration',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('first_name', models.CharField(max_length=255)),
('last_name', models.CharField(max_length=255)),
('affiliation', models.CharField(blank=True, max_length=255)),
('country', models.CharField(max_length=2)),
('country_code', models.CharField(max_length=2)),
('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='meeting.Meeting')),
('person', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='person.Person')),
],

View file

@ -42,13 +42,13 @@ class CountryAlias(models.Model):
class Meta:
verbose_name_plural = "country aliases"
class Registration(models.Model):
class MeetingRegistration(models.Model):
"""Registration attendee records from the IETF registration system"""
meeting = models.ForeignKey(Meeting)
first_name = models.CharField(max_length=255)
last_name = models.CharField(max_length=255)
affiliation = models.CharField(blank=True, max_length=255)
country = models.CharField(max_length=2) # ISO 3166
country_code = models.CharField(max_length=2) # ISO 3166
person = models.ForeignKey(Person, blank=True, null=True)
def __unicode__(self):

View file

@ -8,8 +8,8 @@ from django.urls import reverse as urlreverse
from ietf.utils.test_data import make_test_data, make_review_data
from ietf.utils.test_utils import login_testing_unauthorized, TestCase, unicontent
from ietf.stats.models import Registration
from ietf.stats.utils import get_registration_data
from ietf.stats.models import MeetingRegistration
from ietf.stats.utils import get_meeting_registration_data
import ietf.stats.views
from ietf.submit.models import Submission
@ -154,12 +154,12 @@ class StatisticsTests(TestCase):
self.assertTrue(q('.review-stats td:contains("1")'))
@patch('requests.get')
def test_get_registration_data(self, mock_get):
def test_get_meeting_registration_data(self, mock_get):
response = Response()
response.status_code = 200
response._content = '[{"LastName":"Smith","FirstName":"John","Company":"ABC","Country":"US"}]'
mock_get.return_value = response
meeting = MeetingFactory(type_id='ietf', date=datetime.date(2016,7,14), number="96")
get_registration_data(meeting)
query = Registration.objects.filter(first_name='John',last_name='Smith',country='US')
self.assertTrue(query.count(),1)
get_meeting_registration_data(meeting)
query = MeetingRegistration.objects.filter(first_name='John',last_name='Smith',country_code='US')
self.assertTrue(query.count(), 1)

View file

@ -7,5 +7,7 @@ urlpatterns = [
url("^$", views.stats_index),
url("^document/(?:(?P<stats_type>authors|pages|words|format|formlang|author/(?:documents|affiliation|country|continent|citations|hindex)|yearly/(?:affiliation|country|continent))/)?$", views.document_stats),
url("^knowncountries/$", views.known_countries_list),
url("^meeting/(?P<num>\d+)/(?P<stats_type>country|continent)/$", views.meeting_stats),
url("^meeting/(?:(?P<stats_type>overview|country|continent)/)?$", views.meeting_stats),
url("^review/(?:(?P<stats_type>completion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, views.review_stats),
]

View file

@ -4,7 +4,7 @@ from collections import defaultdict
from django.conf import settings
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, Registration
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
from ietf.name.models import CountryName
def compile_affiliation_ending_stripping_regexp():
@ -210,7 +210,7 @@ def compute_hirsch_index(citation_counts):
return i
def get_registration_data(meeting):
def get_meeting_registration_data(meeting):
""""Retrieve registration attendee data and summary statistics. Returns number
of Registration records created."""
num_created = 0
@ -223,12 +223,12 @@ def get_registration_data(meeting):
pass
for registration in decoded:
object, created = Registration.objects.get_or_create(
object, created = MeetingRegistration.objects.get_or_create(
meeting_id=meeting.pk,
first_name=registration['FirstName'],
last_name=registration['LastName'],
affiliation=registration['Company'],
country=registration['Country'])
country_code=registration['Country'])
if created:
num_created += 1
return num_created

View file

@ -12,6 +12,7 @@ from django.http import HttpResponseRedirect, HttpResponseForbidden
from django.db.models import Count, Q
from django.utils.safestring import mark_safe
from django.conf import settings
from django.shortcuts import get_object_or_404
import dateutil.relativedelta
@ -26,6 +27,8 @@ from ietf.person.models import Person
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName
from ietf.person.name import plain_name
from ietf.doc.models import DocAlias, Document, State, DocEvent
from ietf.meeting.models import Meeting
from ietf.stats.models import MeetingRegistration, CountryAlias
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index
from ietf.ietfauth.utils import has_role
@ -86,10 +89,40 @@ def prune_unknown_bin_with_known(bins):
# named/known bins
all_known = { n for b, names in bins.iteritems() if b for n in names }
bins[""] = [name for name in bins[""] if name not in all_known]
if not bins[""]:
del bins[""]
def count_bins(bins):
return len({ n for b, names in bins.iteritems() if b for n in names })
def add_labeled_top_series_from_bins(chart_data, bins, limit):
"""Take bins on the form (x, label): [name1, name2, ...], figure out
how many there are per label, take the overall top ones and put
them into sorted series like [(x1, len(names1)), (x2, len(names2)), ...]."""
aggregated_bins = defaultdict(set)
xs = set()
for (x, label), names in bins.iteritems():
xs.add(x)
aggregated_bins[label].update(names)
xs = list(sorted(xs))
sorted_bins = sorted(aggregated_bins.iteritems(), key=lambda t: len(t[1]), reverse=True)
top = [ label for label, names in list(sorted_bins)[:limit]]
for label in top:
series_data = []
for x in xs:
names = bins.get((x, label), set())
series_data.append((x, len(names)))
chart_data.append({
"data": series_data,
"name": label
})
def document_stats(request, stats_type=None):
def build_document_stats_url(stats_type_override=Ellipsis, get_overrides={}):
kwargs = {
@ -549,8 +582,7 @@ def document_stats(request, stats_type=None):
chart_data.append({ "data": sorted(series_data, key=lambda t: t[0]) })
elif any(stats_type == t[0] and stats_type.split("/")[1] in ["affiliation", "country", "continent"]
for t in possible_yearly_stats_types):
elif any(stats_type == t[0] for t in possible_yearly_stats_types):
person_filters = Q(documentauthor__document__type="draft")
@ -585,32 +617,6 @@ def document_stats(request, stats_type=None):
years_from = from_time.year if from_time else 1
years_to = datetime.date.today().year - 1
def add_yearly_chart_data_from_bins(bins, limit):
aggregated_bins = defaultdict(set)
years = set()
for (year, label), names in bins.iteritems():
years.add(year)
aggregated_bins[label].update(names)
years = list(sorted(y for y in years))
limit = 8
sorted_bins = sorted(aggregated_bins.iteritems(), key=lambda t: len(t[1]), reverse=True)
top = [ label for label, names in list(sorted_bins)[:limit]]
for label in top:
series_data = []
for y in years:
names = bins.get((y, label), set())
series_data.append((y, len(names)))
chart_data.append({
"data": series_data,
"name": label
})
if stats_type == "yearly/affiliation":
stats_title = "Number of {} authors per affiliation over the years".format(doc_label)
@ -632,7 +638,7 @@ def document_stats(request, stats_type=None):
if years_from <= year <= years_to:
bins[(year, a)].add(name)
add_yearly_chart_data_from_bins(bins, limit=8)
add_labeled_top_series_from_bins(chart_data, bins, limit=8)
elif stats_type == "yearly/country":
stats_title = "Number of {} authors per country over the years".format(doc_label)
@ -664,7 +670,7 @@ def document_stats(request, stats_type=None):
if c and c.in_eu:
bins[(year, eu_name)].add(name)
add_yearly_chart_data_from_bins(bins, limit=8)
add_labeled_top_series_from_bins(chart_data, bins, limit=8)
elif stats_type == "yearly/continent":
@ -692,7 +698,7 @@ def document_stats(request, stats_type=None):
if years_from <= year <= years_to:
bins[(year, continent_name)].add(name)
add_yearly_chart_data_from_bins(bins, limit=8)
add_labeled_top_series_from_bins(chart_data, bins, limit=8)
return render(request, "stats/document_stats.html", {
"chart_data": mark_safe(json.dumps(chart_data)),
@ -728,6 +734,198 @@ def known_countries_list(request, stats_type=None, acronym=None):
"ticket_email_address": settings.SECRETARIAT_TICKET_EMAIL,
})
def meeting_stats(request, num=None, stats_type=None):
meeting = None
if num is not None:
meeting = get_object_or_404(Meeting, number=num, type="ietf")
def build_meeting_stats_url(number=None, stats_type_override=Ellipsis, get_overrides={}):
kwargs = {
"stats_type": stats_type if stats_type_override is Ellipsis else stats_type_override,
}
if number is not None:
kwargs["num"] = number
return urlreverse(meeting_stats, kwargs={ k: v for k, v in kwargs.iteritems() if v is not None }) + generate_query_string(request.GET, get_overrides)
# statistics types
if meeting:
possible_stats_types = add_url_to_choices([
("country", "Country"),
("continent", "Continent"),
], lambda slug: build_meeting_stats_url(number=meeting.number, stats_type_override=slug))
else:
possible_stats_types = add_url_to_choices([
("overview", "Overview"),
("country", "Country"),
("continent", "Continent"),
], lambda slug: build_meeting_stats_url(number=None, stats_type_override=slug))
if not stats_type:
return HttpResponseRedirect(build_meeting_stats_url(number=num, stats_type_override=possible_stats_types[0][0]))
chart_data = []
table_data = []
stats_title = ""
template_name = stats_type
bin_size = 1
eu_countries = None
def get_country_mapping(registrations):
return {
alias.alias: alias.country
for alias in CountryAlias.objects.filter(alias__in=set(r.country_code for r in registrations)).select_related("country", "country__continent")
if alias.alias.isupper()
}
if meeting and any(stats_type == t[0] for t in possible_stats_types):
registrations = MeetingRegistration.objects.filter(meeting=meeting)
if stats_type == "country":
stats_title = "Number of registrations for {} {} per country".format(meeting.type.name, meeting.number)
bins = defaultdict(set)
country_mapping = get_country_mapping(registrations)
eu_name = "EU"
eu_countries = set(CountryName.objects.filter(in_eu=True))
for r in registrations:
name = (r.first_name + " " + r.last_name).strip()
c = country_mapping.get(r.country_code)
bins[c.name if c else None].add(name)
if c and c.in_eu:
bins[eu_name].add(name)
prune_unknown_bin_with_known(bins)
total_registrations = count_bins(bins)
series_data = []
for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
percentage = len(names) * 100.0 / (total_registrations or 1)
if country:
series_data.append((country, len(names)))
table_data.append((country, percentage, names))
series_data.sort(key=lambda t: t[1], reverse=True)
series_data = series_data[:30]
chart_data.append({ "data": series_data })
elif stats_type == "continent":
stats_title = "Number of registrations for {} {} per continent".format(meeting.type.name, meeting.number)
bins = defaultdict(set)
country_mapping = get_country_mapping(registrations)
for r in registrations:
name = (r.first_name + " " + r.last_name).strip()
c = country_mapping.get(r.country_code)
bins[c.continent.name if c else None].add(name)
prune_unknown_bin_with_known(bins)
total_registrations = count_bins(bins)
series_data = []
for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
percentage = len(names) * 100.0 / (total_registrations or 1)
if continent:
series_data.append((continent, len(names)))
table_data.append((continent, percentage, names))
series_data.sort(key=lambda t: t[1], reverse=True)
chart_data.append({ "data": series_data })
elif not meeting and any(stats_type == t[0] for t in possible_stats_types):
template_name = "overview"
registrations = MeetingRegistration.objects.filter(meeting__type="ietf")
if stats_type == "overview":
stats_title = "Number of registrations per meeting"
bins = defaultdict(set)
for first_name, last_name, meeting_number in registrations.values_list("first_name", "last_name", "meeting__number"):
meeting_number = int(meeting_number)
name = (first_name + " " + last_name).strip()
bins[meeting_number].add(name)
meeting_cities = dict(Meeting.objects.filter(number__in=bins.iterkeys()).values_list("number", "city"))
series_data = []
for meeting_number, names in sorted(bins.iteritems()):
series_data.append((meeting_number, len(names)))
url = build_meeting_stats_url(number=meeting_number, stats_type_override="country")
label = "IETF {} - {}".format(meeting_number, meeting_cities.get(str(meeting_number), ""))
table_data.append((meeting_number, label, url, names))
series_data.sort(key=lambda t: t[0])
table_data.sort(key=lambda t: t[0], reverse=True)
chart_data.append({ "data": series_data })
elif stats_type == "country":
stats_title = "Number of registrations per country across meetings"
country_mapping = get_country_mapping(registrations)
eu_name = "EU"
eu_countries = set(CountryName.objects.filter(in_eu=True))
bins = defaultdict(set)
for first_name, last_name, country_code, meeting_number in registrations.values_list("first_name", "last_name", "country_code", "meeting__number"):
meeting_number = int(meeting_number)
name = (first_name + " " + last_name).strip()
c = country_mapping.get(country_code)
if c:
bins[(meeting_number, c.name)].add(name)
if c.in_eu:
bins[(meeting_number, eu_name)].add(name)
add_labeled_top_series_from_bins(chart_data, bins, limit=8)
elif stats_type == "continent":
stats_title = "Number of registrations per country across meetings"
country_mapping = get_country_mapping(registrations)
bins = defaultdict(set)
for first_name, last_name, country_code, meeting_number in registrations.values_list("first_name", "last_name", "country_code", "meeting__number"):
meeting_number = int(meeting_number)
name = (first_name + " " + last_name).strip()
c = country_mapping.get(country_code)
if c:
bins[(meeting_number, c.continent.name)].add(name)
add_labeled_top_series_from_bins(chart_data, bins, limit=8)
return render(request, "stats/meeting_stats.html", {
"chart_data": mark_safe(json.dumps(chart_data)),
"table_data": table_data,
"stats_title": stats_title,
"possible_stats_types": possible_stats_types,
"stats_type": stats_type,
"bin_size": bin_size,
"meeting": meeting,
"eu_countries": sorted(eu_countries or [], key=lambda c: c.name),
"content_template": "stats/meeting_stats_{}.html".format(template_name),
})
@login_required
def review_stats(request, stats_type=None, acronym=None):

View file

@ -13,6 +13,7 @@
<ul>
<li><a href="{% url "ietf.stats.views.document_stats" %}">Drafts/RFCs (authors, countries, formats, ...)</a></li>
<li><a href="{% url "ietf.stats.views.meeting_stats" %}">Meeting attendance (countries, ...)</a></li>
<li><a rel="nofollow" href="{% url "ietf.stats.views.review_stats" %}">Reviews of drafts in review teams</a> (requires login)</li>
</ul>

View file

@ -0,0 +1,46 @@
{% extends "base.html" %}
{% load origin %}
{% load ietf_filters staticfiles bootstrap3 %}
{% block title %}{{ stats_title }}{% endblock %}
{% block pagehead %}
<link rel="stylesheet" href="{% static 'bootstrap-datepicker/css/bootstrap-datepicker3.min.css' %}">
{% endblock %}
{% block content %}
{% origin %}
<h1>Meeting Statistics</h1>
{% if meeting %}
<p>
<a href="{% url "ietf.stats.views.meeting_stats" %}">&laquo; Back to overview</a>
</p>
{% endif %}
<div class="stats-options well">
<div>
Registrations:
<div class="btn-group">
{% for slug, label, url in possible_stats_types %}
<a class="btn btn-default {% if slug == stats_type %}active{% endif %}" href="{{ url }}">{{ label }}</a>
{% endfor %}
</div>
</div>
</div>
<div class="document-stats">
{% include content_template %}
</div>
{% endblock %}
{% block js %}
<script src="{% static 'highcharts/highcharts.js' %}"></script>
<script src="{% static 'highcharts/modules/exporting.js' %}"></script>
<script src="{% static 'highcharts/modules/offline-exporting.js' %}"></script>
<script src="{% static 'ietf/js/stats.js' %}"></script>
{% endblock %}

View file

@ -0,0 +1,64 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'column'
},
plotOptions: {
column: {
animation: false
}
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
type: "category",
title: {
text: 'Continent'
}
},
yAxis: {
title: {
text: 'Number of registrations'
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.points[0].key + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Continent</th>
<th>Percentage of registrations</th>
<th>Registrations</th>
</tr>
</thead>
<tbody>
{% for continent, percentage, names in table_data %}
<tr>
<td>{{ continent|default:"(unknown)" }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
</tr>
{% endfor %}
</tbody>
</table>

View file

@ -0,0 +1,69 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'column'
},
plotOptions: {
column: {
animation: false
}
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
type: "category",
title: {
text: 'Country'
}
},
yAxis: {
title: {
text: 'Number of registrations'
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.points[0].key + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y;
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Country</th>
<th>Percentage of registrations</th>
<th>Registrations</th>
</tr>
</thead>
<tbody>
{% for country, percentage, names in table_data %}
<tr>
<td>{{ country|default:"(unknown)" }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p>EU (European Union) is not a country, but has been added for reference, as the sum of
all current EU member countries:
{% for c in eu_countries %}{{ c.name }}{% if not forloop.last %}, {% endif %}{% endfor %}.</p>

View file

@ -0,0 +1,75 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'line',
},
plotOptions: {
line: {
marker: {
enabled: false
},
animation: false
}
},
legend: {
align: "right",
verticalAlign: "middle",
layout: "vertical",
enabled: true
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
tickInterval: 1,
title: {
text: 'Meeting'
}
},
yAxis: {
min: 0,
title: {
text: 'Registrations at meeting'
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.x + '</b>';
$.each(this.points, function () {
s += '<br/>' + this.series.name + ': ' + this.y;
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
{% if table_data %}
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Meeting</th>
<th>Registrations</th>
</tr>
</thead>
<tbody>
{% for meeting_number, label, url, names in table_data %}
<tr>
<td><a href="{{ url }}">{{ label }}</a></td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endif %}