diff --git a/ietf/stats/tests.py b/ietf/stats/tests.py index fbb9d4e48..62e4e048d 100644 --- a/ietf/stats/tests.py +++ b/ietf/stats/tests.py @@ -26,7 +26,9 @@ class StatisticsTests(TestCase): # check various stats types for stats_type in ["authors", "pages", "words", "format", "formlang", - "author/documents", "author/affiliation", "author/country", "author/continent"]: + "author/documents", "author/affiliation", "author/country", + "author/continent", "author/citations", "author/hindex", + "yearly/affiliation", "yearly/country", "yearly/continent"]: for document_type in ["", "rfc", "draft"]: for time_choice in ["", "5y"]: url = urlreverse(ietf.stats.views.document_stats, kwargs={ "stats_type": stats_type }) @@ -37,7 +39,8 @@ class StatisticsTests(TestCase): self.assertEqual(r.status_code, 200) q = PyQuery(r.content) self.assertTrue(q('#chart')) - self.assertTrue(q('table.stats-data')) + if not stats_type.startswith("yearly"): + self.assertTrue(q('table.stats-data')) def test_known_country_list(self): make_test_data() diff --git a/ietf/stats/urls.py b/ietf/stats/urls.py index 9ae43f60e..ea802b952 100644 --- a/ietf/stats/urls.py +++ b/ietf/stats/urls.py @@ -5,7 +5,7 @@ import ietf.stats.views urlpatterns = patterns('', url("^$", ietf.stats.views.stats_index), - url("^document/(?:(?Pauthors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citations||author/hindex)/)?$", ietf.stats.views.document_stats), + url("^document/(?:(?Pauthors|pages|words|format|formlang|author/(?:documents|affiliation|country|continent|citations|hindex)|yearly/(?:affiliation|country|continent))/)?$", ietf.stats.views.document_stats), url("^knowncountries/$", ietf.stats.views.known_countries_list), url("^review/(?:(?Pcompletion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, ietf.stats.views.review_stats), ) diff --git a/ietf/stats/views.py b/ietf/stats/views.py index 26439a275..6a5b4dba6 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -25,7 +25,7 @@ from ietf.group.models import Role, Group from ietf.person.models import Person from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName from ietf.person.name import plain_name -from ietf.doc.models import DocAlias, Document, State +from ietf.doc.models import DocAlias, Document, State, DocEvent from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index from ietf.ietfauth.utils import has_role @@ -81,6 +81,15 @@ def put_into_bin(value, bin_size): v = (value // bin_size) * bin_size return (v, "{} - {}".format(v, v + bin_size - 1)) +def prune_unknown_bin_with_known(bins): + # remove from the unknown bin all authors within the + # named/known bins + all_known = { n for b, names in bins.iteritems() if b for n in names } + bins[""] = [name for name in bins[""] if name not in all_known] + +def count_bins(bins): + return len({ n for b, names in bins.iteritems() if b for n in names }) + def document_stats(request, stats_type=None): def build_document_stats_url(stats_type_override=Ellipsis, get_overrides={}): kwargs = { @@ -89,7 +98,7 @@ def document_stats(request, stats_type=None): return urlreverse(document_stats, kwargs={ k: v for k, v in kwargs.iteritems() if v is not None }) + generate_query_string(request.GET, get_overrides) - # statistics type - one of the tables or the chart + # statistics types possible_document_stats_types = add_url_to_choices([ ("authors", "Number of authors"), ("pages", "Pages"), @@ -98,17 +107,22 @@ def document_stats(request, stats_type=None): ("formlang", "Formal languages"), ], lambda slug: build_document_stats_url(stats_type_override=slug)) - # statistics type - one of the tables or the chart possible_author_stats_types = add_url_to_choices([ ("author/documents", "Number of documents"), ("author/affiliation", "Affiliation"), ("author/country", "Country"), ("author/continent", "Continent"), ("author/citations", "Citations"), - ("author/hindex", "Impact"), + ("author/hindex", "h-index"), ], lambda slug: build_document_stats_url(stats_type_override=slug)) - + possible_yearly_stats_types = add_url_to_choices([ + ("yearly/affiliation", "Affiliation"), + ("yearly/country", "Country"), + ("yearly/continent", "Continent"), + ], lambda slug: build_document_stats_url(stats_type_override=slug)) + + if not stats_type: return HttpResponseRedirect(build_document_stats_url(stats_type_override=possible_document_stats_types[0][0])) @@ -132,14 +146,15 @@ def document_stats(request, stats_type=None): from_time = None if "y" in time_choice: try: - years = int(time_choice.rstrip("y")) - from_time = datetime.datetime.today() - dateutil.relativedelta.relativedelta(years=years) + y = int(time_choice.rstrip("y")) + from_time = datetime.datetime.today() - dateutil.relativedelta.relativedelta(years=y) except ValueError: pass chart_data = [] table_data = [] stats_title = "" + template_name = stats_type.replace("/", "_") bin_size = 1 alias_data = [] eu_countries = None @@ -205,10 +220,7 @@ def document_stats(request, stats_type=None): series_data.append((author_count, percentage)) table_data.append((author_count, percentage, names)) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "pages": stats_title = "Number of pages for each {}".format(doc_label) @@ -225,10 +237,7 @@ def document_stats(request, stats_type=None): series_data.append((pages, len(names))) table_data.append((pages, percentage, names)) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "words": stats_title = "Number of words for each {}".format(doc_label) @@ -248,10 +257,7 @@ def document_stats(request, stats_type=None): table_data.append((words, percentage, names)) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "format": stats_title = "Submission formats for each {}".format(doc_label) @@ -302,10 +308,7 @@ def document_stats(request, stats_type=None): table_data.append((fmt, percentage, names)) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "formlang": stats_title = "Formal languages used for each {}".format(doc_label) @@ -322,10 +325,7 @@ def document_stats(request, stats_type=None): series_data.append((formal_language, len(names))) table_data.append((formal_language, percentage, names)) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif any(stats_type == t[0] for t in possible_author_stats_types): person_filters = Q(documentauthor__document__type="draft") @@ -348,7 +348,7 @@ def document_stats(request, stats_type=None): person_filters &= Q(documentauthor__document__in=docs_within_time_constraint) - person_qs = Person.objects.filter(person_filters, documentauthor__document="draft-arkko-dual-stack-extra-lite") + person_qs = Person.objects.filter(person_filters) if document_type == "rfc": doc_label = "RFC" @@ -357,15 +357,6 @@ def document_stats(request, stats_type=None): else: doc_label = "document" - def prune_unknown_bin_with_known(bins): - # remove from the unknown bin all authors within the - # named/known bins - all_known = set(n for b, names in bins.iteritems() if b for n in names) - bins[""] = [name for name in bins[""] if name not in all_known] - - def count_bins(bins): - return len(set(n for b, names in bins.iteritems() if b for n in names)) - if stats_type == "author/documents": stats_title = "Number of {}s per author".format(doc_label) @@ -384,10 +375,7 @@ def document_stats(request, stats_type=None): series_data.append((document_count, percentage)) table_data.append((document_count, percentage, [plain_name(n) for n in names])) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "author/affiliation": stats_title = "Number of {} authors per affiliation".format(doc_label) @@ -400,8 +388,10 @@ def document_stats(request, stats_type=None): # same way, and we don't want to go back and edit them # either, we transform them here. - name_affiliation_set = set((name, affiliation) - for name, affiliation in person_qs.values_list("name", "documentauthor__affiliation")) + name_affiliation_set = { + (name, affiliation) + for name, affiliation in person_qs.values_list("name", "documentauthor__affiliation") + } aliases = get_aliased_affiliations(affiliation for _, affiliation in name_affiliation_set) @@ -421,10 +411,7 @@ def document_stats(request, stats_type=None): series_data.sort(key=lambda t: t[1], reverse=True) series_data = series_data[:30] - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) for alias, name in sorted(aliases.iteritems(), key=lambda t: t[1]): alias_data.append((name, alias)) @@ -440,14 +427,16 @@ def document_stats(request, stats_type=None): # same way, and we don't want to go back and edit them # either, we transform them here. - name_country_set = set((name, country) - for name, country in person_qs.values_list("name", "documentauthor__country")) + name_country_set = { + (name, country) + for name, country in person_qs.values_list("name", "documentauthor__country") + } aliases = get_aliased_countries(country for _, country in name_country_set) countries = { c.name: c for c in CountryName.objects.all() } eu_name = "EU" - eu_countries = set(c for c in countries.itervalues() if c.in_eu) + eu_countries = { c for c in countries.itervalues() if c.in_eu } for name, country in name_country_set: country_name = aliases.get(country, country) @@ -470,10 +459,7 @@ def document_stats(request, stats_type=None): series_data.sort(key=lambda t: t[1], reverse=True) series_data = series_data[:30] - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) for alias, country_name in aliases.iteritems(): alias_data.append((country_name, alias, countries.get(country_name))) @@ -487,8 +473,10 @@ def document_stats(request, stats_type=None): person_qs = Person.objects.filter(person_filters) - name_country_set = set((name, country) - for name, country in person_qs.values_list("name", "documentauthor__country")) + name_country_set = { + (name, country) + for name, country in person_qs.values_list("name", "documentauthor__country") + } aliases = get_aliased_countries(country for _, country in name_country_set) @@ -511,10 +499,7 @@ def document_stats(request, stats_type=None): series_data.sort(key=lambda t: t[1], reverse=True) - chart_data.append({ - "data": series_data, - "animation": False, - }) + chart_data.append({ "data": series_data }) elif stats_type == "author/citations": stats_title = "Number of citations of {}s written by author".format(doc_label) @@ -537,10 +522,7 @@ def document_stats(request, stats_type=None): series_data.append((citations, percentage)) table_data.append((citations, percentage, [plain_name(n) for n in names])) - chart_data.append({ - "data": sorted(series_data, key=lambda t: t[0]), - "animation": False, - }) + chart_data.append({ "data": sorted(series_data, key=lambda t: t[0]) }) elif stats_type == "author/hindex": stats_title = "h-index for {}s written by author".format(doc_label) @@ -565,10 +547,152 @@ def document_stats(request, stats_type=None): series_data.append((citations, percentage)) table_data.append((citations, percentage, [plain_name(n) for n in names])) - chart_data.append({ - "data": sorted(series_data, key=lambda t: t[0]), - "animation": False, - }) + chart_data.append({ "data": sorted(series_data, key=lambda t: t[0]) }) + + elif any(stats_type == t[0] and stats_type.split("/")[1] in ["affiliation", "country", "continent"] + for t in possible_yearly_stats_types): + + person_filters = Q(documentauthor__document__type="draft") + + # filter persons + rfc_state = State.objects.get(type="draft", slug="rfc") + if document_type == "rfc": + person_filters &= Q(documentauthor__document__states=rfc_state) + elif document_type == "draft": + person_filters &= ~Q(documentauthor__document__states=rfc_state) + + doc_years = defaultdict(set) + + docevent_qs = DocEvent.objects.filter( + doc__type="draft", + type__in=["published_rfc", "new_revision"], + ).values_list("doc", "time").order_by("doc") + + for doc, time in docevent_qs.iterator(): + doc_years[doc].add(time.year) + + person_qs = Person.objects.filter(person_filters) + + if document_type == "rfc": + doc_label = "RFC" + elif document_type == "draft": + doc_label = "draft" + else: + doc_label = "document" + + template_name = "yearly" + + years_from = from_time.year if from_time else 1 + years_to = datetime.date.today().year - 1 + + def add_yearly_chart_data_from_bins(bins, limit): + aggregated_bins = defaultdict(set) + years = set() + for (year, label), names in bins.iteritems(): + years.add(year) + aggregated_bins[label].update(names) + + years = list(sorted(y for y in years)) + + limit = 8 + sorted_bins = sorted(aggregated_bins.iteritems(), key=lambda t: len(t[1]), reverse=True) + top = [ label for label, names in list(sorted_bins)[:limit]] + + for label in top: + series_data = [] + + for y in years: + names = bins.get((y, label), set()) + + series_data.append((y, len(names))) + + chart_data.append({ + "data": series_data, + "name": label + }) + + + if stats_type == "yearly/affiliation": + stats_title = "Number of {} authors per affiliation over the years".format(doc_label) + + person_qs = Person.objects.filter(person_filters) + + name_affiliation_doc_set = { + (name, affiliation, doc) + for name, affiliation, doc in person_qs.values_list("name", "documentauthor__affiliation", "documentauthor__document") + } + + aliases = get_aliased_affiliations(affiliation for _, affiliation, _ in name_affiliation_doc_set) + + bins = defaultdict(set) + for name, affiliation, doc in name_affiliation_doc_set: + a = aliases.get(affiliation, affiliation) + if a: + for year in doc_years.get(doc): + if years_from <= year <= years_to: + bins[(year, a)].add(name) + + add_yearly_chart_data_from_bins(bins, limit=8) + + elif stats_type == "yearly/country": + stats_title = "Number of {} authors per country over the years".format(doc_label) + + person_qs = Person.objects.filter(person_filters) + + name_country_doc_set = { + (name, country, doc) + for name, country, doc in person_qs.values_list("name", "documentauthor__country", "documentauthor__document") + } + + aliases = get_aliased_countries(country for _, country, _ in name_country_doc_set) + + countries = { c.name: c for c in CountryName.objects.all() } + eu_name = "EU" + eu_countries = { c for c in countries.itervalues() if c.in_eu } + + bins = defaultdict(set) + + for name, country, doc in name_country_doc_set: + country_name = aliases.get(country, country) + c = countries.get(country_name) + + if country_name: + for year in doc_years.get(doc): + if years_from <= year <= years_to: + bins[(year, country_name)].add(name) + + if c and c.in_eu: + bins[(year, eu_name)].add(name) + + add_yearly_chart_data_from_bins(bins, limit=8) + + + elif stats_type == "yearly/continent": + stats_title = "Number of {} authors per continent".format(doc_label) + + person_qs = Person.objects.filter(person_filters) + + name_country_doc_set = { + (name, country, doc) + for name, country, doc in person_qs.values_list("name", "documentauthor__country", "documentauthor__document") + } + + aliases = get_aliased_countries(country for _, country, _ in name_country_doc_set) + + country_to_continent = dict(CountryName.objects.values_list("name", "continent__name")) + + bins = defaultdict(set) + + for name, country, doc in name_country_doc_set: + country_name = aliases.get(country, country) + continent_name = country_to_continent.get(country_name, "") + + if continent_name: + for year in doc_years.get(doc): + if years_from <= year <= years_to: + bins[(year, continent_name)].add(name) + + add_yearly_chart_data_from_bins(bins, limit=8) return render(request, "stats/document_stats.html", { "chart_data": mark_safe(json.dumps(chart_data)), @@ -576,6 +700,7 @@ def document_stats(request, stats_type=None): "stats_title": stats_title, "possible_document_stats_types": possible_document_stats_types, "possible_author_stats_types": possible_author_stats_types, + "possible_yearly_stats_types": possible_yearly_stats_types, "stats_type": stats_type, "possible_document_types": possible_document_types, "document_type": document_type, @@ -587,7 +712,7 @@ def document_stats(request, stats_type=None): "hide_aliases_url": build_document_stats_url(get_overrides={ "showaliases": None }), "alias_data": alias_data, "eu_countries": sorted(eu_countries or [], key=lambda c: c.name), - "content_template": "stats/document_stats_{}.html".format(stats_type.replace("/", "_")), + "content_template": "stats/document_stats_{}.html".format(template_name), }) diff --git a/ietf/templates/stats/document_stats.html b/ietf/templates/stats/document_stats.html index 8ff53471d..18eb1ee9c 100644 --- a/ietf/templates/stats/document_stats.html +++ b/ietf/templates/stats/document_stats.html @@ -36,6 +36,16 @@ +
+ Yearly: + +
+ {% for slug, label, url in possible_yearly_stats_types %} + {{ label }} + {% endfor %} +
+
+
Options
diff --git a/ietf/templates/stats/document_stats_author_affiliation.html b/ietf/templates/stats/document_stats_author_affiliation.html index 6bec8d3c8..8c6df8cce 100644 --- a/ietf/templates/stats/document_stats_author_affiliation.html +++ b/ietf/templates/stats/document_stats_author_affiliation.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_author_citations.html b/ietf/templates/stats/document_stats_author_citations.html index bcb3cff9e..25c41a978 100644 --- a/ietf/templates/stats/document_stats_author_citations.html +++ b/ietf/templates/stats/document_stats_author_citations.html @@ -7,6 +7,11 @@ chart: { type: 'area' }, + plotOptions: { + area: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_author_continent.html b/ietf/templates/stats/document_stats_author_continent.html index 0e0a1f849..b35334f95 100644 --- a/ietf/templates/stats/document_stats_author_continent.html +++ b/ietf/templates/stats/document_stats_author_continent.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_author_country.html b/ietf/templates/stats/document_stats_author_country.html index 7819e6c2c..ca01636ad 100644 --- a/ietf/templates/stats/document_stats_author_country.html +++ b/ietf/templates/stats/document_stats_author_country.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_author_documents.html b/ietf/templates/stats/document_stats_author_documents.html index 025e8c26f..30d8882a4 100644 --- a/ietf/templates/stats/document_stats_author_documents.html +++ b/ietf/templates/stats/document_stats_author_documents.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_author_hindex.html b/ietf/templates/stats/document_stats_author_hindex.html index d5d67329c..e8ba232e6 100644 --- a/ietf/templates/stats/document_stats_author_hindex.html +++ b/ietf/templates/stats/document_stats_author_hindex.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_authors.html b/ietf/templates/stats/document_stats_authors.html index 70fe249fb..abfc5ff96 100644 --- a/ietf/templates/stats/document_stats_authors.html +++ b/ietf/templates/stats/document_stats_authors.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_format.html b/ietf/templates/stats/document_stats_format.html index ce1512a09..c7f42f7ae 100644 --- a/ietf/templates/stats/document_stats_format.html +++ b/ietf/templates/stats/document_stats_format.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_formlang.html b/ietf/templates/stats/document_stats_formlang.html index e4b586d95..7c9470f8c 100644 --- a/ietf/templates/stats/document_stats_formlang.html +++ b/ietf/templates/stats/document_stats_formlang.html @@ -7,6 +7,11 @@ chart: { type: 'column' }, + plotOptions: { + column: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_pages.html b/ietf/templates/stats/document_stats_pages.html index dca167b1c..40cc55fa9 100644 --- a/ietf/templates/stats/document_stats_pages.html +++ b/ietf/templates/stats/document_stats_pages.html @@ -7,6 +7,11 @@ chart: { type: 'line' }, + plotOptions: { + line: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' }, diff --git a/ietf/templates/stats/document_stats_words.html b/ietf/templates/stats/document_stats_words.html index 956e49ea7..96bcb0e75 100644 --- a/ietf/templates/stats/document_stats_words.html +++ b/ietf/templates/stats/document_stats_words.html @@ -7,6 +7,11 @@ chart: { type: 'line' }, + plotOptions: { + line: { + animation: false + } + }, title: { text: '{{ stats_title|escapejs }}' },