Adjust unknown bin in the affiliation statistics too, adjust wording

- Legacy-Id: 12859
This commit is contained in:
Ole Laursen 2017-02-16 12:32:03 +00:00
parent 1a0e4599c5
commit d06f56fc0d
4 changed files with 23 additions and 20 deletions

View file

@ -353,6 +353,17 @@ def document_stats(request, stats_type=None):
total_persons = person_qs.distinct().count()
def prune_unknown_bin_with_known(bins):
# remove from the unknown bin all authors within the
# named/known bins
all_known = set(n for b, names in bins.iteritems() if b for n in names)
unknown = []
for name in bins[""]:
if name not in all_known:
unknown.append(name)
bins[""] = unknown
if stats_type == "author/documents":
stats_title = "Number of {}s per author".format(doc_label)
@ -389,6 +400,8 @@ def document_stats(request, stats_type=None):
for name, affiliation in name_affiliation_set:
bins[aliases.get(affiliation, affiliation)].append(name)
prune_unknown_bin_with_known(bins)
series_data = []
for affiliation, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
percentage = len(names) * 100.0 / total_persons
@ -433,13 +446,7 @@ def document_stats(request, stats_type=None):
if c and c.in_eu:
bins[eu_name].append(name)
# remove from the unknown bin all authors with a known country
all_known = set(n for b, names in bins.iteritems() if b for n in names)
unknown = []
for name in bins[""]:
if name not in all_known:
unknown.append(name)
bins[""] = unknown
prune_unknown_bin_with_known(bins)
series_data = []
for country, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):
@ -478,13 +485,7 @@ def document_stats(request, stats_type=None):
continent_name = country_to_continent.get(country_name, "")
bins[continent_name].append(name)
# remove from the unknown bin all authors with a known continent
all_known = set(n for b, names in bins.iteritems() if b for n in names)
unknown = []
for name in bins[""]:
if name not in all_known:
unknown.append(name)
bins[""] = unknown
prune_unknown_bin_with_known(bins)
series_data = []
for continent, names in sorted(bins.iteritems(), key=lambda t: t[0].lower()):

View file

@ -58,15 +58,17 @@
</tbody>
</table>
<p>Some authors are authors of multiple documents with different
affiliation information associated, so the sum of multiple rows in the
table can be more than 100%.</p>
<p>The statistics are based entirely on the author affiliation
provided with each draft. Since this may vary across documents, an
author may be counted with more than one affiliation, making the
total sum more than 100%.</p>
<h3>Affiliation Aliases</h3>
<p>In generating the above statistics, some heuristics have been applied to determine the affiliation of each author.</p>
<p>In generating the above statistics, some heuristics have been
applied to determine the affiliations of each author.</p>
{% if request.GET.showaliases %}
<p><a href="{{ hide_aliases_url }}" class="btn btn-default">Hide generated aliases</a></p>

View file

@ -59,6 +59,6 @@
</table>
<p>The statistics are based entirely on the author addresses provided
in each draft. Since this varies across documents, a travelling
with each draft. Since this varies across documents, a travelling
author may be counted in more than country, making the total sum
more than 100%.</p>

View file

@ -59,7 +59,7 @@
</table>
<p>The statistics are based entirely on the author addresses provided
in each draft. Since this varies across documents, a travelling
with each draft. Since this varies across documents, a travelling
author may be counted in more than country, making the total sum
more than 100%.</p>