From c61babb418e57c3427239ea61354fd3c1f000078 Mon Sep 17 00:00:00 2001 From: Ole Laursen Date: Fri, 17 Feb 2017 17:43:14 +0000 Subject: [PATCH] Add citation and h-index statistics - Legacy-Id: 12869 --- ietf/person/models.py | 5 +- ietf/person/name.py | 4 + ietf/stats/urls.py | 2 +- ietf/stats/views.py | 79 +++++++++++++++++-- .../document_stats_author_citations.html | 66 ++++++++++++++++ .../document_stats_author_documents.html | 2 +- .../stats/document_stats_author_hindex.html | 74 +++++++++++++++++ .../includes/number_with_details_cell.html | 8 +- 8 files changed, 226 insertions(+), 14 deletions(-) create mode 100644 ietf/templates/stats/document_stats_author_citations.html create mode 100644 ietf/templates/stats/document_stats_author_hindex.html diff --git a/ietf/person/models.py b/ietf/person/models.py index 61fa6b2c0..9ee992930 100644 --- a/ietf/person/models.py +++ b/ietf/person/models.py @@ -14,7 +14,7 @@ from django.utils.text import slugify import debug # pyflakes:ignore -from ietf.person.name import name_parts, initials +from ietf.person.name import name_parts, initials, plain_name from ietf.utils.mail import send_mail_preformatted from ietf.utils.storage import NoLocationMigrationFileSystemStorage @@ -47,8 +47,7 @@ class PersonInfo(models.Model): return (first and first[0]+"." or "")+(middle or "")+" "+last+(suffix and " "+suffix or "") def plain_name(self): if not hasattr(self, '_cached_plain_name'): - prefix, first, middle, last, suffix = name_parts(self.name) - self._cached_plain_name = u" ".join([first, last]) + self._cached_plain_name = plain_name(self.name) return self._cached_plain_name def ascii_name(self): if not hasattr(self, '_cached_ascii_name'): diff --git a/ietf/person/name.py b/ietf/person/name.py index 997f3def8..5337d1a1e 100644 --- a/ietf/person/name.py +++ b/ietf/person/name.py @@ -50,6 +50,10 @@ def initials(name): initials = u" ".join([ n[0]+'.' for n in given.split() ]) return initials +def plain_name(name): + prefix, first, middle, last, suffix = name_parts(name) + return u" ".join([first, last]) + if __name__ == "__main__": import sys name = u" ".join(sys.argv[1:]) diff --git a/ietf/stats/urls.py b/ietf/stats/urls.py index cb626c401..9ae43f60e 100644 --- a/ietf/stats/urls.py +++ b/ietf/stats/urls.py @@ -5,7 +5,7 @@ import ietf.stats.views urlpatterns = patterns('', url("^$", ietf.stats.views.stats_index), - url("^document/(?:(?Pauthors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citation)/)?$", ietf.stats.views.document_stats), + url("^document/(?:(?Pauthors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citations||author/hindex)/)?$", ietf.stats.views.document_stats), url("^knowncountries/$", ietf.stats.views.known_countries_list), url("^review/(?:(?Pcompletion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, ietf.stats.views.review_stats), ) diff --git a/ietf/stats/views.py b/ietf/stats/views.py index 268cc9505..26439a275 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -23,9 +23,10 @@ from ietf.review.utils import (extract_review_request_data, from ietf.submit.models import Submission from ietf.group.models import Role, Group from ietf.person.models import Person -from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName +from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName +from ietf.person.name import plain_name from ietf.doc.models import DocAlias, Document, State -from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries +from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index from ietf.ietfauth.utils import has_role def stats_index(request): @@ -103,7 +104,8 @@ def document_stats(request, stats_type=None): ("author/affiliation", "Affiliation"), ("author/country", "Country"), ("author/continent", "Continent"), - ("author/citation", "Citations"), + ("author/citations", "Citations"), + ("author/hindex", "Impact"), ], lambda slug: build_document_stats_url(stats_type_override=slug)) @@ -346,7 +348,7 @@ def document_stats(request, stats_type=None): person_filters &= Q(documentauthor__document__in=docs_within_time_constraint) - person_qs = Person.objects.filter(person_filters) + person_qs = Person.objects.filter(person_filters, documentauthor__document="draft-arkko-dual-stack-extra-lite") if document_type == "rfc": doc_label = "RFC" @@ -369,6 +371,8 @@ def document_stats(request, stats_type=None): bins = defaultdict(list) + person_qs = Person.objects.filter(person_filters) + for name, document_count in person_qs.values_list("name").annotate(Count("documentauthor")): bins[document_count].append(name) @@ -378,7 +382,7 @@ def document_stats(request, stats_type=None): for document_count, names in sorted(bins.iteritems(), key=lambda t: t[0]): percentage = len(names) * 100.0 / (total_persons or 1) series_data.append((document_count, percentage)) - table_data.append((document_count, percentage, names)) + table_data.append((document_count, percentage, [plain_name(n) for n in names])) chart_data.append({ "data": series_data, @@ -390,6 +394,8 @@ def document_stats(request, stats_type=None): bins = defaultdict(list) + person_qs = Person.objects.filter(person_filters) + # Since people don't write the affiliation names in the # same way, and we don't want to go back and edit them # either, we transform them here. @@ -410,7 +416,7 @@ def document_stats(request, stats_type=None): percentage = len(names) * 100.0 / (total_persons or 1) if affiliation: series_data.append((affiliation, len(names))) - table_data.append((affiliation, percentage, names)) + table_data.append((affiliation, percentage, [plain_name(n) for n in names])) series_data.sort(key=lambda t: t[1], reverse=True) series_data = series_data[:30] @@ -428,6 +434,8 @@ def document_stats(request, stats_type=None): bins = defaultdict(list) + person_qs = Person.objects.filter(person_filters) + # Since people don't write the country names in the # same way, and we don't want to go back and edit them # either, we transform them here. @@ -457,7 +465,7 @@ def document_stats(request, stats_type=None): percentage = len(names) * 100.0 / (total_persons or 1) if country: series_data.append((country, len(names))) - table_data.append((country, percentage, names)) + table_data.append((country, percentage, [plain_name(n) for n in names])) series_data.sort(key=lambda t: t[1], reverse=True) series_data = series_data[:30] @@ -477,6 +485,8 @@ def document_stats(request, stats_type=None): bins = defaultdict(list) + person_qs = Person.objects.filter(person_filters) + name_country_set = set((name, country) for name, country in person_qs.values_list("name", "documentauthor__country")) @@ -497,7 +507,7 @@ def document_stats(request, stats_type=None): percentage = len(names) * 100.0 / (total_persons or 1) if continent: series_data.append((continent, len(names))) - table_data.append((continent, percentage, names)) + table_data.append((continent, percentage, [plain_name(n) for n in names])) series_data.sort(key=lambda t: t[1], reverse=True) @@ -506,6 +516,59 @@ def document_stats(request, stats_type=None): "animation": False, }) + elif stats_type == "author/citations": + stats_title = "Number of citations of {}s written by author".format(doc_label) + + bins = defaultdict(list) + + cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold'])) + person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships) + + person_qs = Person.objects.filter(person_filters) + + for name, citations in person_qs.values_list("name").annotate(Count("documentauthor__document__docalias__relateddocument")): + bins[citations].append(name) + + total_persons = count_bins(bins) + + series_data = [] + for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True): + percentage = len(names) * 100.0 / (total_persons or 1) + series_data.append((citations, percentage)) + table_data.append((citations, percentage, [plain_name(n) for n in names])) + + chart_data.append({ + "data": sorted(series_data, key=lambda t: t[0]), + "animation": False, + }) + + elif stats_type == "author/hindex": + stats_title = "h-index for {}s written by author".format(doc_label) + + bins = defaultdict(list) + + cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold'])) + person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships) + + person_qs = Person.objects.filter(person_filters) + + values = person_qs.values_list("name", "documentauthor__document").annotate(Count("documentauthor__document__docalias__relateddocument")) + for name, ts in itertools.groupby(values.order_by("name"), key=lambda t: t[0]): + h_index = compute_hirsch_index([citations for _, document, citations in ts]) + bins[h_index].append(name) + + total_persons = count_bins(bins) + + series_data = [] + for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True): + percentage = len(names) * 100.0 / (total_persons or 1) + series_data.append((citations, percentage)) + table_data.append((citations, percentage, [plain_name(n) for n in names])) + + chart_data.append({ + "data": sorted(series_data, key=lambda t: t[0]), + "animation": False, + }) return render(request, "stats/document_stats.html", { "chart_data": mark_safe(json.dumps(chart_data)), diff --git a/ietf/templates/stats/document_stats_author_citations.html b/ietf/templates/stats/document_stats_author_citations.html new file mode 100644 index 000000000..bcb3cff9e --- /dev/null +++ b/ietf/templates/stats/document_stats_author_citations.html @@ -0,0 +1,66 @@ +

{{ stats_title }}

+ +
+ + + +

Data

+ + + + + + + + + + + {% for citations, percentage, names in table_data %} + + + + + + {% endfor %} + +
CitationsPercentage of authorsAuthors
{{ citations }}{{ percentage|floatformat:2 }}%{% include "stats/includes/number_with_details_cell.html" with content_limit=10 %}
+ +

Note that the citation counts do not exclude self-references.

diff --git a/ietf/templates/stats/document_stats_author_documents.html b/ietf/templates/stats/document_stats_author_documents.html index 0d21b41d2..025e8c26f 100644 --- a/ietf/templates/stats/document_stats_author_documents.html +++ b/ietf/templates/stats/document_stats_author_documents.html @@ -58,7 +58,7 @@ {{ document_count }} {{ percentage|floatformat:2 }}% - {% include "stats/includes/number_with_details_cell.html" %} + {% include "stats/includes/number_with_details_cell.html" with content_limit=10 %} {% endfor %} diff --git a/ietf/templates/stats/document_stats_author_hindex.html b/ietf/templates/stats/document_stats_author_hindex.html new file mode 100644 index 000000000..d5d67329c --- /dev/null +++ b/ietf/templates/stats/document_stats_author_hindex.html @@ -0,0 +1,74 @@ +

{{ stats_title }}

+ +
+ + + +

Data

+ + + + + + + + + + + {% for h_index, percentage, names in table_data %} + + + + + + {% endfor %} + +
h-indexPercentage of authorsAuthors
{{ h_index }}{{ percentage|floatformat:2 }}%{% include "stats/includes/number_with_details_cell.html" with content_limit=25 %}
+ +

Hirsch index or h-index is a + measure of the + productivity and impact of the publications of an author. An + author with an h-index of 5 has had 5 publications each cited at + least 5 times - to increase the index to 6, the 5 publications plus + 1 more would have to have been cited at least 6 times, each. Thus a + high h-index requires many highly-cited publications.

+ +

Note that the h-index calculations do not exclude self-references.

diff --git a/ietf/templates/stats/includes/number_with_details_cell.html b/ietf/templates/stats/includes/number_with_details_cell.html index cdadc287a..2dac07977 100644 --- a/ietf/templates/stats/includes/number_with_details_cell.html +++ b/ietf/templates/stats/includes/number_with_details_cell.html @@ -1 +1,7 @@ -{{ names|length }} +{% if content_limit and names|length <= content_limit %} + {% for n in names %} + {{ n }}
+ {% endfor %} +{% else %} + {{ names|length }} +{% endif %}