Add citation and h-index statistics
- Legacy-Id: 12869
This commit is contained in:
parent
f180147cbd
commit
c61babb418
|
@ -14,7 +14,7 @@ from django.utils.text import slugify
|
|||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.person.name import name_parts, initials
|
||||
from ietf.person.name import name_parts, initials, plain_name
|
||||
from ietf.utils.mail import send_mail_preformatted
|
||||
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
|
||||
|
||||
|
@ -47,8 +47,7 @@ class PersonInfo(models.Model):
|
|||
return (first and first[0]+"." or "")+(middle or "")+" "+last+(suffix and " "+suffix or "")
|
||||
def plain_name(self):
|
||||
if not hasattr(self, '_cached_plain_name'):
|
||||
prefix, first, middle, last, suffix = name_parts(self.name)
|
||||
self._cached_plain_name = u" ".join([first, last])
|
||||
self._cached_plain_name = plain_name(self.name)
|
||||
return self._cached_plain_name
|
||||
def ascii_name(self):
|
||||
if not hasattr(self, '_cached_ascii_name'):
|
||||
|
|
|
@ -50,6 +50,10 @@ def initials(name):
|
|||
initials = u" ".join([ n[0]+'.' for n in given.split() ])
|
||||
return initials
|
||||
|
||||
def plain_name(name):
|
||||
prefix, first, middle, last, suffix = name_parts(name)
|
||||
return u" ".join([first, last])
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
name = u" ".join(sys.argv[1:])
|
||||
|
|
|
@ -5,7 +5,7 @@ import ietf.stats.views
|
|||
|
||||
urlpatterns = patterns('',
|
||||
url("^$", ietf.stats.views.stats_index),
|
||||
url("^document/(?:(?P<stats_type>authors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citation)/)?$", ietf.stats.views.document_stats),
|
||||
url("^document/(?:(?P<stats_type>authors|pages|words|format|formlang|author/documents|author/affiliation|author/country|author/continent|author/citations||author/hindex)/)?$", ietf.stats.views.document_stats),
|
||||
url("^knowncountries/$", ietf.stats.views.known_countries_list),
|
||||
url("^review/(?:(?P<stats_type>completion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, ietf.stats.views.review_stats),
|
||||
)
|
||||
|
|
|
@ -23,9 +23,10 @@ from ietf.review.utils import (extract_review_request_data,
|
|||
from ietf.submit.models import Submission
|
||||
from ietf.group.models import Role, Group
|
||||
from ietf.person.models import Person
|
||||
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName
|
||||
from ietf.name.models import ReviewRequestStateName, ReviewResultName, CountryName, DocRelationshipName
|
||||
from ietf.person.name import plain_name
|
||||
from ietf.doc.models import DocAlias, Document, State
|
||||
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries
|
||||
from ietf.stats.utils import get_aliased_affiliations, get_aliased_countries, compute_hirsch_index
|
||||
from ietf.ietfauth.utils import has_role
|
||||
|
||||
def stats_index(request):
|
||||
|
@ -103,7 +104,8 @@ def document_stats(request, stats_type=None):
|
|||
("author/affiliation", "Affiliation"),
|
||||
("author/country", "Country"),
|
||||
("author/continent", "Continent"),
|
||||
("author/citation", "Citations"),
|
||||
("author/citations", "Citations"),
|
||||
("author/hindex", "Impact"),
|
||||
], lambda slug: build_document_stats_url(stats_type_override=slug))
|
||||
|
||||
|
||||
|
@ -346,7 +348,7 @@ def document_stats(request, stats_type=None):
|
|||
|
||||
person_filters &= Q(documentauthor__document__in=docs_within_time_constraint)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
person_qs = Person.objects.filter(person_filters, documentauthor__document="draft-arkko-dual-stack-extra-lite")
|
||||
|
||||
if document_type == "rfc":
|
||||
doc_label = "RFC"
|
||||
|
@ -369,6 +371,8 @@ def document_stats(request, stats_type=None):
|
|||
|
||||
bins = defaultdict(list)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
for name, document_count in person_qs.values_list("name").annotate(Count("documentauthor")):
|
||||
bins[document_count].append(name)
|
||||
|
||||
|
@ -378,7 +382,7 @@ def document_stats(request, stats_type=None):
|
|||
for document_count, names in sorted(bins.iteritems(), key=lambda t: t[0]):
|
||||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
series_data.append((document_count, percentage))
|
||||
table_data.append((document_count, percentage, names))
|
||||
table_data.append((document_count, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
chart_data.append({
|
||||
"data": series_data,
|
||||
|
@ -390,6 +394,8 @@ def document_stats(request, stats_type=None):
|
|||
|
||||
bins = defaultdict(list)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
# Since people don't write the affiliation names in the
|
||||
# same way, and we don't want to go back and edit them
|
||||
# either, we transform them here.
|
||||
|
@ -410,7 +416,7 @@ def document_stats(request, stats_type=None):
|
|||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
if affiliation:
|
||||
series_data.append((affiliation, len(names)))
|
||||
table_data.append((affiliation, percentage, names))
|
||||
table_data.append((affiliation, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
series_data.sort(key=lambda t: t[1], reverse=True)
|
||||
series_data = series_data[:30]
|
||||
|
@ -428,6 +434,8 @@ def document_stats(request, stats_type=None):
|
|||
|
||||
bins = defaultdict(list)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
# Since people don't write the country names in the
|
||||
# same way, and we don't want to go back and edit them
|
||||
# either, we transform them here.
|
||||
|
@ -457,7 +465,7 @@ def document_stats(request, stats_type=None):
|
|||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
if country:
|
||||
series_data.append((country, len(names)))
|
||||
table_data.append((country, percentage, names))
|
||||
table_data.append((country, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
series_data.sort(key=lambda t: t[1], reverse=True)
|
||||
series_data = series_data[:30]
|
||||
|
@ -477,6 +485,8 @@ def document_stats(request, stats_type=None):
|
|||
|
||||
bins = defaultdict(list)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
name_country_set = set((name, country)
|
||||
for name, country in person_qs.values_list("name", "documentauthor__country"))
|
||||
|
||||
|
@ -497,7 +507,7 @@ def document_stats(request, stats_type=None):
|
|||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
if continent:
|
||||
series_data.append((continent, len(names)))
|
||||
table_data.append((continent, percentage, names))
|
||||
table_data.append((continent, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
series_data.sort(key=lambda t: t[1], reverse=True)
|
||||
|
||||
|
@ -506,6 +516,59 @@ def document_stats(request, stats_type=None):
|
|||
"animation": False,
|
||||
})
|
||||
|
||||
elif stats_type == "author/citations":
|
||||
stats_title = "Number of citations of {}s written by author".format(doc_label)
|
||||
|
||||
bins = defaultdict(list)
|
||||
|
||||
cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold']))
|
||||
person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
for name, citations in person_qs.values_list("name").annotate(Count("documentauthor__document__docalias__relateddocument")):
|
||||
bins[citations].append(name)
|
||||
|
||||
total_persons = count_bins(bins)
|
||||
|
||||
series_data = []
|
||||
for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True):
|
||||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
series_data.append((citations, percentage))
|
||||
table_data.append((citations, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
chart_data.append({
|
||||
"data": sorted(series_data, key=lambda t: t[0]),
|
||||
"animation": False,
|
||||
})
|
||||
|
||||
elif stats_type == "author/hindex":
|
||||
stats_title = "h-index for {}s written by author".format(doc_label)
|
||||
|
||||
bins = defaultdict(list)
|
||||
|
||||
cite_relationships = list(DocRelationshipName.objects.filter(slug__in=['refnorm', 'refinfo', 'refunk', 'refold']))
|
||||
person_filters &= Q(documentauthor__document__docalias__relateddocument__relationship__in=cite_relationships)
|
||||
|
||||
person_qs = Person.objects.filter(person_filters)
|
||||
|
||||
values = person_qs.values_list("name", "documentauthor__document").annotate(Count("documentauthor__document__docalias__relateddocument"))
|
||||
for name, ts in itertools.groupby(values.order_by("name"), key=lambda t: t[0]):
|
||||
h_index = compute_hirsch_index([citations for _, document, citations in ts])
|
||||
bins[h_index].append(name)
|
||||
|
||||
total_persons = count_bins(bins)
|
||||
|
||||
series_data = []
|
||||
for citations, names in sorted(bins.iteritems(), key=lambda t: t[0], reverse=True):
|
||||
percentage = len(names) * 100.0 / (total_persons or 1)
|
||||
series_data.append((citations, percentage))
|
||||
table_data.append((citations, percentage, [plain_name(n) for n in names]))
|
||||
|
||||
chart_data.append({
|
||||
"data": sorted(series_data, key=lambda t: t[0]),
|
||||
"animation": False,
|
||||
})
|
||||
|
||||
return render(request, "stats/document_stats.html", {
|
||||
"chart_data": mark_safe(json.dumps(chart_data)),
|
||||
|
|
66
ietf/templates/stats/document_stats_author_citations.html
Normal file
66
ietf/templates/stats/document_stats_author_citations.html
Normal file
|
@ -0,0 +1,66 @@
|
|||
<h3>{{ stats_title }}</h3>
|
||||
|
||||
<div id="chart"></div>
|
||||
|
||||
<script>
|
||||
var chartConf = {
|
||||
chart: {
|
||||
type: 'area'
|
||||
},
|
||||
title: {
|
||||
text: '{{ stats_title|escapejs }}'
|
||||
},
|
||||
xAxis: {
|
||||
title: {
|
||||
text: 'Number of citations of {{ doc_label }}s by author'
|
||||
},
|
||||
max: 500
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
text: 'Percentage of authors'
|
||||
},
|
||||
labels: {
|
||||
formatter: function () {
|
||||
return this.value + '%';
|
||||
}
|
||||
}
|
||||
},
|
||||
tooltip: {
|
||||
formatter: function () {
|
||||
var s = '<b>' + this.x + ' ' + (this.x == 1 ? "citation" : 'citations') + '</b>';
|
||||
|
||||
$.each(this.points, function () {
|
||||
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y.toFixed(1) + '%';
|
||||
});
|
||||
|
||||
return s;
|
||||
},
|
||||
shared: true
|
||||
},
|
||||
series: {{ chart_data }}
|
||||
};
|
||||
</script>
|
||||
|
||||
<h3>Data</h3>
|
||||
|
||||
<table class="table table-condensed stats-data">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Citations</th>
|
||||
<th>Percentage of authors</th>
|
||||
<th>Authors</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for citations, percentage, names in table_data %}
|
||||
<tr>
|
||||
<td>{{ citations }}</td>
|
||||
<td>{{ percentage|floatformat:2 }}%</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=10 %}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>Note that the citation counts do not exclude self-references.</p>
|
|
@ -58,7 +58,7 @@
|
|||
<tr>
|
||||
<td>{{ document_count }}</td>
|
||||
<td>{{ percentage|floatformat:2 }}%</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" %}</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=10 %}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
|
|
74
ietf/templates/stats/document_stats_author_hindex.html
Normal file
74
ietf/templates/stats/document_stats_author_hindex.html
Normal file
|
@ -0,0 +1,74 @@
|
|||
<h3>{{ stats_title }}</h3>
|
||||
|
||||
<div id="chart"></div>
|
||||
|
||||
<script>
|
||||
var chartConf = {
|
||||
chart: {
|
||||
type: 'column'
|
||||
},
|
||||
title: {
|
||||
text: '{{ stats_title|escapejs }}'
|
||||
},
|
||||
xAxis: {
|
||||
tickInterval: 1,
|
||||
title: {
|
||||
text: 'h-index of {{ doc_label }}s by author'
|
||||
}
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
text: 'Percentage of authors'
|
||||
},
|
||||
labels: {
|
||||
formatter: function () {
|
||||
return this.value + '%';
|
||||
}
|
||||
}
|
||||
},
|
||||
tooltip: {
|
||||
formatter: function () {
|
||||
var s = '<b>' + ' h-index ' + this.x + '</b>';
|
||||
|
||||
$.each(this.points, function () {
|
||||
s += '<br/>' + chartConf.yAxis.title.text + ': ' + this.y.toFixed(1) + '%';
|
||||
});
|
||||
|
||||
return s;
|
||||
},
|
||||
shared: true
|
||||
},
|
||||
series: {{ chart_data }}
|
||||
};
|
||||
</script>
|
||||
|
||||
<h3>Data</h3>
|
||||
|
||||
<table class="table table-condensed stats-data">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>h-index</th>
|
||||
<th>Percentage of authors</th>
|
||||
<th>Authors</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for h_index, percentage, names in table_data %}
|
||||
<tr>
|
||||
<td>{{ h_index }}</td>
|
||||
<td>{{ percentage|floatformat:2 }}%</td>
|
||||
<td>{% include "stats/includes/number_with_details_cell.html" with content_limit=25 %}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>Hirsch index or h-index is a
|
||||
<a href="https://www.wikipedia.org/wiki/H-index">measure of the
|
||||
productivity and impact of the publications of an author</a>. An
|
||||
author with an h-index of 5 has had 5 publications each cited at
|
||||
least 5 times - to increase the index to 6, the 5 publications plus
|
||||
1 more would have to have been cited at least 6 times, each. Thus a
|
||||
high h-index requires many highly-cited publications.</p>
|
||||
|
||||
<p>Note that the h-index calculations do not exclude self-references.</p>
|
|
@ -1 +1,7 @@
|
|||
<a class="popover-details" href="" data-elements="{% for n in names|slice:":20" %}{{ n }}{% if not forloop.last %}|{% endif %}{% endfor %}" data-sliced="{% if names|length > 20 %}1{% endif %}">{{ names|length }}</a>
|
||||
{% if content_limit and names|length <= content_limit %}
|
||||
{% for n in names %}
|
||||
{{ n }}<br>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<a class="popover-details" href="" data-elements="{% for n in names|slice:":20" %}{{ n }}{% if not forloop.last %}|{% endif %}{% endfor %}" data-sliced="{% if names|length > 20 %}1{% endif %}">{{ names|length }}</a>
|
||||
{% endif %}
|
||||
|
|
Loading…
Reference in a new issue