Add citation and h-index statistics

- Legacy-Id: 12869
This commit is contained in:
Ole Laursen 2017-02-17 17:43:14 +00:00
parent f180147cbd
commit c61babb418
8 changed files with 226 additions and 14 deletions

View file

@ -14,7 +14,7 @@ from django.utils.text import slugify
import debug # pyflakes:ignore
from ietf.person.name import name_parts, initials
from ietf.person.name import name_parts, initials, plain_name
from ietf.utils.mail import send_mail_preformatted
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
@ -47,8 +47,7 @@ class PersonInfo(models.Model):
return (first and first[0]+"." or "")+(middle or "")+" "+last+(suffix and " "+suffix or "")
def plain_name(self):
if not hasattr(self, '_cached_plain_name'):
prefix, first, middle, last, suffix = name_parts(self.name)
self._cached_plain_name = u" ".join([first, last])
self._cached_plain_name = plain_name(self.name)
return self._cached_plain_name
def ascii_name(self):
if not hasattr(self, '_cached_ascii_name'):

View file

@ -50,6 +50,10 @@ def initials(name):
initials = u" ".join([ n[0]+'.' for n in given.split() ])
return initials
def plain_name(name):
prefix, first, middle, last, suffix = name_parts(name)
return u" ".join([first, last])
if __name__ == "__main__":
import sys
name = u" ".join(sys.argv[1:])

View file

@ -5,7 +5,7 @@ import ietf.stats.views
urlpatterns = patterns('',
url("^$", ietf.stats.views.stats_index),
url("^document/(?:(?P<stats_type>authors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citation)/)?$", ietf.stats.views.document_stats),
url("^document/(?:(?P<stats_type>authors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citations||author/hindex)/)?$", ietf.stats.views.document_stats),
url("^knowncountries/$", ietf.stats.views.known_countries_list),
url("^review/(?:(?P<stats_type>completion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, ietf.stats.views.review_stats),
)

View file

@ -23,9 +23,10 @@ from ietf.review.utils import (extract_review_request_data,
from ietf.submit.models import Submission
from ietf.group.models import Role, Group
from ietf.person.models import Person
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName
from ietf.person.name import plain_name
from ietf.doc.models import DocAlias, Document, State
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index
from ietf.ietfauth.utils import has_role
def stats_index(request):
@ -103,7 +104,8 @@ def document_stats(request, stats_type=None):
("author/affiliation", "Affiliation"),
("author/country", "Country"),
("author/continent", "Continent"),
("author/citation", "Citations"),
("author/citations", "Citations"),
("author/hindex", "Impact"),
], lambda slug: build_document_stats_url(stats_type_override=slug))
@ -346,7 +348,7 @@ def document_stats(request, stats_type=None):
person_filters &= Q(documentauthor__document__in=docs_within_time_constraint)
person_qs = Person.objects.filter(person_filters)
person_qs = Person.objects.filter(person_filters, documentauthor__document="draft-arkko-dual-stack-extra-lite")
if document_type == "rfc":
doc_label = "RFC"
@ -369,6 +371,8 @@ def document_stats(request, stats_type=None):
bins = defaultdict(list)
person_qs = Person.objects.filter(person_filters)
for name, document_count in person_qs.values_list("name").annotate(Count("documentauthor")):
bins[document_count].append(name)
@ -378,7 +382,7 @@ def document_stats(request, stats_type=None):
for document_count, names in sorted(bins.iteritems(), key=lambda t: t[0]):
percentage = len(names) * 100.0 / (total_persons or 1)
series_data.append((document_count, percentage))
table_data.append((document_count, percentage, names))
table_data.append((document_count, percentage, [plain_name(n) for n in names]))
chart_data.append({
"data": series_data,
@ -390,6 +394,8 @@ def document_stats(request, stats_type=None):
bins = defaultdict(list)
person_qs = Person.objects.filter(person_filters)
# Since people don't write the affiliation names in the
# same way, and we don't want to go back and edit them
# either, we transform them here.
@ -410,7 +416,7 @@ def document_stats(request, stats_type=None):
percentage = len(names) * 100.0 / (total_persons or 1)
if affiliation:
series_data.append((affiliation, len(names)))
table_data.append((affiliation, percentage, names))
table_data.append((affiliation, percentage, [plain_name(n) for n in names]))
series_data.sort(key=lambda t: t[1], reverse=True)
series_data = series_data[:30]
@ -428,6 +434,8 @@ def document_stats(request, stats_type=None):
bins = defaultdict(list)
person_qs = Person.objects.filter(person_filters)
# Since people don't write the country names in the
# same way, and we don't want to go back and edit them
# either, we transform them here.
@ -457,7 +465,7 @@ def document_stats(request, stats_type=None):
percentage = len(names) * 100.0 / (total_persons or 1)
if country:
series_data.append((country, len(names)))
table_data.append((country, percentage, names))
table_data.append((country, percentage, [plain_name(n) for n in names]))
series_data.sort(key=lambda t: t[1], reverse=True)
series_data = series_data[:30]
@ -477,6 +485,8 @@ def document_stats(request, stats_type=None):
bins = defaultdict(list)
person_qs = Person.objects.filter(person_filters)
name_country_set = set((name, country)
for name, country in person_qs.values_list("name", "documentauthor__country"))
@ -497,7 +507,7 @@ def document_stats(request, stats_type=None):
percentage = len(names) * 100.0 / (total_persons or 1)
if continent:
series_data.append((continent, len(names)))
table_data.append((continent, percentage, names))
table_data.append((continent, percentage, [plain_name(n) for n in names]))
series_data.sort(key=lambda t: t[1], reverse=True)
@ -506,6 +516,59 @@ def document_stats(request, stats_type=None):
"animation": False,
})
elif stats_type == "author/citations":
stats_title = "Number of citations of {}s written by author".format(doc_label)
bins = defaultdict(list)
cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold']))
person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships)
person_qs = Person.objects.filter(person_filters)
for name, citations in person_qs.values_list("name").annotate(Count("documentauthor__document__docalias__relateddocument")):
bins[citations].append(name)
total_persons = count_bins(bins)
series_data = []
for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True):
percentage = len(names) * 100.0 / (total_persons or 1)
series_data.append((citations, percentage))
table_data.append((citations, percentage, [plain_name(n) for n in names]))
chart_data.append({
"data": sorted(series_data, key=lambda t: t[0]),
"animation": False,
})
elif stats_type == "author/hindex":
stats_title = "h-index for {}s written by author".format(doc_label)
bins = defaultdict(list)
cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold']))
person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships)
person_qs = Person.objects.filter(person_filters)
values = person_qs.values_list("name", "documentauthor__document").annotate(Count("documentauthor__document__docalias__relateddocument"))
for name, ts in itertools.groupby(values.order_by("name"), key=lambda t: t[0]):
h_index = compute_hirsch_index([citations for _, document, citations in ts])
bins[h_index].append(name)
total_persons = count_bins(bins)
series_data = []
for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True):
percentage = len(names) * 100.0 / (total_persons or 1)
series_data.append((citations, percentage))
table_data.append((citations, percentage, [plain_name(n) for n in names]))
chart_data.append({
"data": sorted(series_data, key=lambda t: t[0]),
"animation": False,
})
return render(request, "stats/document_stats.html", {
"chart_data": mark_safe(json.dumps(chart_data)),

View file

@ -0,0 +1,66 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'area'
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
title: {
text: 'Number of citations of {{ doc_label }}s by author'
},
max: 500
},
yAxis: {
title: {
text: 'Percentage of authors'
},
labels: {
formatter: function () {
return this.value + '%';
}
}
},
tooltip: {
formatter: function () {
var s = '<b>' + this.x + ' ' + (this.x == 1 ? "citation" : 'citations') + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y.toFixed(1) + '%';
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>Citations</th>
<th>Percentage of authors</th>
<th>Authors</th>
</tr>
</thead>
<tbody>
{% for citations, percentage, names in table_data %}
<tr>
<td>{{ citations }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=10 %}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p>Note that the citation counts do not exclude self-references.</p>

View file

@ -58,7 +58,7 @@
<tr>
<td>{{ document_count }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=10 %}</td>
</tr>
{% endfor %}
</tbody>

View file

@ -0,0 +1,74 @@
<h3>{{ stats_title }}</h3>
<div id="chart"></div>
<script>
var chartConf = {
chart: {
type: 'column'
},
title: {
text: '{{ stats_title|escapejs }}'
},
xAxis: {
tickInterval: 1,
title: {
text: 'h-index of {{ doc_label }}s by author'
}
},
yAxis: {
title: {
text: 'Percentage of authors'
},
labels: {
formatter: function () {
return this.value + '%';
}
}
},
tooltip: {
formatter: function () {
var s = '<b>' + ' h-index ' + this.x + '</b>';
$.each(this.points, function () {
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y.toFixed(1) + '%';
});
return s;
},
shared: true
},
series: {{ chart_data }}
};
</script>
<h3>Data</h3>
<table class="table table-condensed stats-data">
<thead>
<tr>
<th>h-index</th>
<th>Percentage of authors</th>
<th>Authors</th>
</tr>
</thead>
<tbody>
{% for h_index, percentage, names in table_data %}
<tr>
<td>{{ h_index }}</td>
<td>{{ percentage|floatformat:2 }}%</td>
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=25 %}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p>Hirsch index or h-index is a
<a href="https://www.wikipedia.org/wiki/H-index">measure of the
productivity and impact of the publications of an author</a>. An
author with an h-index of 5 has had 5 publications each cited at
least 5 times - to increase the index to 6, the 5 publications plus
1 more would have to have been cited at least 6 times, each. Thus a
high h-index requires many highly-cited publications.</p>
<p>Note that the h-index calculations do not exclude self-references.</p>

View file

@ -1 +1,7 @@
<a class="popover-details" href="" data-elements="{% for n in names|slice:":20" %}{{ n }}{% if not forloop.last %}|{% endif %}{% endfor %}" data-sliced="{% if names|length > 20 %}1{% endif %}">{{ names|length }}</a>
{% if content_limit and names|length <= content_limit %}
{% for n in names %}
{{ n }}<br>
{% endfor %}
{% else %}
<a class="popover-details" href="" data-elements="{% for n in names|slice:":20" %}{{ n }}{% if not forloop.last %}|{% endif %}{% endfor %}" data-sliced="{% if names|length > 20 %}1{% endif %}">{{ names|length }}</a>
{% endif %}