feat: v3-era aware feed for rfcs (#5828)

* feat: v3-era aware feed for rfcs

* chore: remove obviated comment

* test: improve rfc feed tests
This commit is contained in:
Robert Sparks 2023-06-16 15:52:42 -05:00 committed by GitHub
parent ac65232115
commit 3c016cc5c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 172 additions and 60 deletions

View file

@ -1,6 +1,7 @@
# Copyright The IETF Trust 2007-2020, All Rights Reserved # Copyright The IETF Trust 2007-2020, All Rights Reserved
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import debug # pyflakes:ignore
import datetime import datetime
import unicodedata import unicodedata
@ -8,8 +9,12 @@ import unicodedata
from django.contrib.syndication.views import Feed, FeedDoesNotExist from django.contrib.syndication.views import Feed, FeedDoesNotExist
from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed
from django.urls import reverse as urlreverse from django.urls import reverse as urlreverse
from django.template.defaultfilters import truncatewords, truncatewords_html, date as datefilter from django.template.defaultfilters import (
from django.template.defaultfilters import linebreaks # type: ignore truncatewords,
truncatewords_html,
date as datefilter,
)
from django.template.defaultfilters import linebreaks # type: ignore
from django.utils import timezone from django.utils import timezone
from django.utils.html import strip_tags from django.utils.html import strip_tags
@ -21,12 +26,12 @@ from ietf.utils.timezone import RPC_TZINFO
def strip_control_characters(s): def strip_control_characters(s):
"""Remove Unicode control / non-printing characters from a string""" """Remove Unicode control / non-printing characters from a string"""
replacement_char = unicodedata.lookup('REPLACEMENT CHARACTER') replacement_char = unicodedata.lookup("REPLACEMENT CHARACTER")
return ''.join( return "".join(
replacement_char if unicodedata.category(c)[0] == 'C' else c replacement_char if unicodedata.category(c)[0] == "C" else c for c in s
for c in s
) )
class DocumentChangesFeed(Feed): class DocumentChangesFeed(Feed):
feed_type = Atom1Feed feed_type = Atom1Feed
@ -39,25 +44,37 @@ class DocumentChangesFeed(Feed):
def link(self, obj): def link(self, obj):
if obj is None: if obj is None:
raise FeedDoesNotExist raise FeedDoesNotExist
return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=obj.canonical_name())) return urlreverse(
"ietf.doc.views_doc.document_history",
kwargs=dict(name=obj.canonical_name()),
)
def subtitle(self, obj): def subtitle(self, obj):
return "History of change entries for %s." % obj.display_name() return "History of change entries for %s." % obj.display_name()
def items(self, obj): def items(self, obj):
events = obj.docevent_set.all().order_by("-time","-id").select_related("by", "newrevisiondocevent", "submissiondocevent") events = (
obj.docevent_set.all()
.order_by("-time", "-id")
.select_related("by", "newrevisiondocevent", "submissiondocevent")
)
augment_events_with_revision(obj, events) augment_events_with_revision(obj, events)
return events return events
def item_title(self, item): def item_title(self, item):
return strip_control_characters("[%s] %s [rev. %s]" % ( return strip_control_characters(
item.by, "[%s] %s [rev. %s]"
truncatewords(strip_tags(item.desc), 15), % (
item.rev, item.by,
)) truncatewords(strip_tags(item.desc), 15),
item.rev,
)
)
def item_description(self, item): def item_description(self, item):
return strip_control_characters(truncatewords_html(format_textarea(item.desc), 20)) return strip_control_characters(
truncatewords_html(format_textarea(item.desc), 20)
)
def item_pubdate(self, item): def item_pubdate(self, item):
return item.time return item.time
@ -66,17 +83,28 @@ class DocumentChangesFeed(Feed):
return str(item.by) return str(item.by)
def item_link(self, item): def item_link(self, item):
return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=item.doc.canonical_name())) + "#history-%s" % item.pk return (
urlreverse(
"ietf.doc.views_doc.document_history",
kwargs=dict(name=item.doc.canonical_name()),
)
+ "#history-%s" % item.pk
)
class InLastCallFeed(Feed): class InLastCallFeed(Feed):
title = "Documents in Last Call" title = "Documents in Last Call"
subtitle = "Announcements for documents in last call." subtitle = "Announcements for documents in last call."
feed_type = Atom1Feed feed_type = Atom1Feed
author_name = 'IESG Secretary' author_name = "IESG Secretary"
link = "/doc/iesg/last-call/" link = "/doc/iesg/last-call/"
def items(self): def items(self):
docs = list(Document.objects.filter(type="draft", states=State.objects.get(type="draft-iesg", slug="lc"))) docs = list(
Document.objects.filter(
type="draft", states=State.objects.get(type="draft-iesg", slug="lc")
)
)
for d in docs: for d in docs:
d.lc_event = d.latest_event(LastCallDocEvent, type="sent_last_call") d.lc_event = d.latest_event(LastCallDocEvent, type="sent_last_call")
@ -86,9 +114,11 @@ class InLastCallFeed(Feed):
return docs return docs
def item_title(self, item): def item_title(self, item):
return "%s (%s - %s)" % (item.name, return "%s (%s - %s)" % (
datefilter(item.lc_event.time, "F j"), item.name,
datefilter(item.lc_event.expires, "F j, Y")) datefilter(item.lc_event.time, "F j"),
datefilter(item.lc_event.expires, "F j, Y"),
)
def item_description(self, item): def item_description(self, item):
return strip_control_characters(linebreaks(item.lc_event.desc)) return strip_control_characters(linebreaks(item.lc_event.desc))
@ -96,33 +126,55 @@ class InLastCallFeed(Feed):
def item_pubdate(self, item): def item_pubdate(self, item):
return item.lc_event.time return item.lc_event.time
class Rss201WithNamespacesFeed(Rss201rev2Feed): class Rss201WithNamespacesFeed(Rss201rev2Feed):
def root_attributes(self): def root_attributes(self):
attrs = super(Rss201WithNamespacesFeed, self).root_attributes() attrs = super(Rss201WithNamespacesFeed, self).root_attributes()
attrs['xmlns:dcterms'] = 'http://purl.org/dc/terms/' attrs["xmlns:dcterms"] = "http://purl.org/dc/terms/"
attrs['xmlns:media'] = 'http://search.yahoo.com/mrss/' attrs["xmlns:media"] = "http://search.yahoo.com/mrss/"
attrs['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance' attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance"
return attrs return attrs
def add_item_elements(self, handler, item): def add_item_elements(self, handler, item):
super(Rss201WithNamespacesFeed, self).add_item_elements(handler, item) super(Rss201WithNamespacesFeed, self).add_item_elements(handler, item)
for element_name in ['abstract','accessRights', 'format', 'publisher',]: for element_name in [
dc_item_name = 'dcterms_%s' % element_name "abstract",
dc_element_name = 'dcterms:%s' % element_name "accessRights",
attrs= {'xsi:type':'dcterms:local'} if element_name == 'publisher' else {} "format",
"publisher",
]:
dc_item_name = "dcterms_%s" % element_name
dc_element_name = "dcterms:%s" % element_name
attrs = {"xsi:type": "dcterms:local"} if element_name == "publisher" else {}
if dc_item_name in item and item[dc_item_name] is not None: if dc_item_name in item and item[dc_item_name] is not None:
handler.addQuickElement(dc_element_name,item[dc_item_name],attrs) handler.addQuickElement(dc_element_name, item[dc_item_name], attrs)
if 'doi' in item and item['doi'] is not None: if "doi" in item and item["doi"] is not None:
handler.addQuickElement('dcterms:identifier',item['doi'],{'xsi:type':'dcterms:doi'}) handler.addQuickElement(
if 'doiuri' in item and item['doiuri'] is not None: "dcterms:identifier", item["doi"], {"xsi:type": "dcterms:doi"}
handler.addQuickElement('dcterms:identifier',item['doiuri'],{'xsi:type':'dcterms:uri'}) )
if "doiuri" in item and item["doiuri"] is not None:
handler.addQuickElement(
"dcterms:identifier", item["doiuri"], {"xsi:type": "dcterms:uri"}
)
# TODO: consider using media:group
if "media_contents" in item and item["media_contents"] is not None:
for media_content in item["media_contents"]:
handler.startElement(
"media:content",
{
"url": media_content["url"],
"type": media_content["media_type"],
},
)
if "is_format_of" in media_content:
handler.addQuickElement(
"dcterms:isFormatOf", media_content["is_format_of"]
)
handler.endElement("media:content")
if 'media_content' in item and item['media_content'] is not None:
handler.startElement('media:content',{'url':item['media_content']['url'],'type':'text/plain'})
handler.addQuickElement('dcterms:isFormatOf',item['media_content']['link_url'])
handler.endElement('media:content')
class RfcFeed(Feed): class RfcFeed(Feed):
feed_type = Rss201WithNamespacesFeed feed_type = Rss201WithNamespacesFeed
@ -130,55 +182,96 @@ class RfcFeed(Feed):
author_name = "RFC Editor" author_name = "RFC Editor"
link = "https://www.rfc-editor.org/rfc-index2.html" link = "https://www.rfc-editor.org/rfc-index2.html"
def get_object(self,request,year=None): def get_object(self, request, year=None):
self.year = year self.year = year
def items(self): def items(self):
if self.year: if self.year:
# Find published RFCs based on their official publication year # Find published RFCs based on their official publication year
start_of_year = datetime.datetime(int(self.year), 1, 1, tzinfo=RPC_TZINFO) start_of_year = datetime.datetime(int(self.year), 1, 1, tzinfo=RPC_TZINFO)
start_of_next_year = datetime.datetime(int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO) start_of_next_year = datetime.datetime(
int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO
)
rfc_events = DocEvent.objects.filter( rfc_events = DocEvent.objects.filter(
type='published_rfc', type="published_rfc",
time__gte=start_of_year, time__gte=start_of_year,
time__lt=start_of_next_year, time__lt=start_of_next_year,
).order_by('-time') ).order_by("-time")
else: else:
cutoff = timezone.now() - datetime.timedelta(days=8) cutoff = timezone.now() - datetime.timedelta(days=8)
rfc_events = DocEvent.objects.filter(type='published_rfc',time__gte=cutoff).order_by('-time') rfc_events = DocEvent.objects.filter(
type="published_rfc", time__gte=cutoff
).order_by("-time")
results = [(e.doc, e.time) for e in rfc_events] results = [(e.doc, e.time) for e in rfc_events]
for doc,time in results: for doc, time in results:
doc.publication_time = time doc.publication_time = time
return [doc for doc,time in results] return [doc for doc, time in results]
def item_title(self, item): def item_title(self, item):
return "%s : %s" % (item.canonical_name(),item.title) return "%s : %s" % (item.canonical_name(), item.title)
def item_description(self, item): def item_description(self, item):
return item.abstract return item.abstract
def item_link(self, item): def item_link(self, item):
return "https://rfc-editor.org/info/%s"%item.canonical_name() return "https://rfc-editor.org/info/%s" % item.canonical_name()
def item_pubdate(self, item): def item_pubdate(self, item):
return item.publication_time return item.publication_time
def item_extra_kwargs(self, item): def item_extra_kwargs(self, item):
extra = super(RfcFeed, self).item_extra_kwargs(item) extra = super(RfcFeed, self).item_extra_kwargs(item)
extra.update({'dcterms_accessRights': 'gratis'}) extra.update({"dcterms_accessRights": "gratis"})
extra.update({'dcterms_format': 'text/html'}) extra.update({"dcterms_format": "text/html"})
extra.update({'media_content': {'url': 'https://rfc-editor.org/rfc/%s.txt' % item.canonical_name(), media_contents = []
'link_url': self.item_link(item) if int(item.rfc_number()) < 8650:
} if int(item.rfc_number()) not in [8, 9, 51, 418, 500, 530, 589]:
}) for fmt, media_type in [("txt", "text/plain"), ("html", "text/html")]:
extra.update({'doi':'10.17487/%s' % item.canonical_name().upper()}) media_contents.append(
extra.update({'doiuri':'http://dx.doi.org/10.17487/%s' % item.canonical_name().upper()}) {
"url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}",
"media_type": media_type,
"is_format_of": self.item_link(item),
}
)
if int(item.rfc_number()) not in [571, 587]:
media_contents.append(
{
"url": f"https://www.rfc-editor.org/rfc/pdfrfc/{item.canonical_name()}.txt.pdf",
"media_type": "application/pdf",
"is_format_of": self.item_link(item),
}
)
else:
media_contents.append(
{
"url": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml",
"media_type": "application/rfc+xml",
}
)
for fmt, media_type in [
("txt", "text/plain"),
("html", "text/html"),
("pdf", "application/pdf"),
]:
media_contents.append(
{
"url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}",
"media_type": media_type,
"is_format_of": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml",
}
)
extra.update({"media_contents": media_contents})
extra.update({"doi": "10.17487/%s" % item.canonical_name().upper()})
extra.update(
{"doiuri": "http://dx.doi.org/10.17487/%s" % item.canonical_name().upper()}
)
#TODO
# R104 Publisher (Mandatory - but we need a string from them first) # R104 Publisher (Mandatory - but we need a string from them first)
extra.update({'dcterms_publisher':'rfc-editor.org'}) extra.update({"dcterms_publisher": "rfc-editor.org"})
#TODO MAYBE (Optional stuff) # TODO MAYBE (Optional stuff)
# R108 License # R108 License
# R115 Creator/Contributor (which would we use?) # R115 Creator/Contributor (which would we use?)
# F305 Checksum (do they use it?) (or should we put the our digital signature in here somewhere?) # F305 Checksum (do they use it?) (or should we put the our digital signature in here somewhere?)
@ -188,4 +281,3 @@ class RfcFeed(Feed):
# R118 Keyword # R118 Keyword
return extra return extra

View file

@ -1911,11 +1911,31 @@ class DocTestCase(TestCase):
self.assertContains(r, doc.name) self.assertContains(r, doc.name)
def test_rfc_feed(self): def test_rfc_feed(self):
WgRfcFactory() rfc = WgRfcFactory(alias2__name="rfc9000")
DocEventFactory(doc=rfc, type="published_rfc")
r = self.client.get("/feed/rfc/") r = self.client.get("/feed/rfc/")
self.assertTrue(r.status_code, 200) self.assertTrue(r.status_code, 200)
q = PyQuery(r.content[39:]) # Strip off the xml declaration
self.assertEqual(len(q("item")), 1)
item = q("item")[0]
media_content = item.findall("{http://search.yahoo.com/mrss/}content")
self.assertEqual(len(media_content),4)
types = set([m.attrib["type"] for m in media_content])
self.assertEqual(types, set(["application/rfc+xml", "text/plain", "text/html", "application/pdf"]))
rfcs_2016 = WgRfcFactory.create_batch(3) # rfc numbers will be well below v3
for rfc in rfcs_2016:
e = DocEventFactory(doc=rfc, type="published_rfc")
e.time = e.time.replace(year=2016)
e.save()
r = self.client.get("/feed/rfc/2016") r = self.client.get("/feed/rfc/2016")
self.assertTrue(r.status_code, 200) self.assertTrue(r.status_code, 200)
q = PyQuery(r.content[39:])
self.assertEqual(len(q("item")), 3)
item = q("item")[0]
media_content = item.findall("{http://search.yahoo.com/mrss/}content")
self.assertEqual(len(media_content), 3)
types = set([m.attrib["type"] for m in media_content])
self.assertEqual(types, set(["text/plain", "text/html", "application/pdf"]))
def test_state_help(self): def test_state_help(self):
url = urlreverse('ietf.doc.views_help.state_help', kwargs=dict(type="draft-iesg")) url = urlreverse('ietf.doc.views_help.state_help', kwargs=dict(type="draft-iesg"))