From 3c016cc5c86dcae72fcefc2e09840c52d62cf124 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 16 Jun 2023 15:52:42 -0500 Subject: [PATCH] feat: v3-era aware feed for rfcs (#5828) * feat: v3-era aware feed for rfcs * chore: remove obviated comment * test: improve rfc feed tests --- ietf/doc/feeds.py | 210 +++++++++++++++++++++++++++++++++------------- ietf/doc/tests.py | 22 ++++- 2 files changed, 172 insertions(+), 60 deletions(-) diff --git a/ietf/doc/feeds.py b/ietf/doc/feeds.py index 92871efc3..c5bb467e9 100644 --- a/ietf/doc/feeds.py +++ b/ietf/doc/feeds.py @@ -1,6 +1,7 @@ # Copyright The IETF Trust 2007-2020, All Rights Reserved # -*- coding: utf-8 -*- +import debug # pyflakes:ignore import datetime import unicodedata @@ -8,8 +9,12 @@ import unicodedata from django.contrib.syndication.views import Feed, FeedDoesNotExist from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed from django.urls import reverse as urlreverse -from django.template.defaultfilters import truncatewords, truncatewords_html, date as datefilter -from django.template.defaultfilters import linebreaks # type: ignore +from django.template.defaultfilters import ( + truncatewords, + truncatewords_html, + date as datefilter, +) +from django.template.defaultfilters import linebreaks # type: ignore from django.utils import timezone from django.utils.html import strip_tags @@ -21,12 +26,12 @@ from ietf.utils.timezone import RPC_TZINFO def strip_control_characters(s): """Remove Unicode control / non-printing characters from a string""" - replacement_char = unicodedata.lookup('REPLACEMENT CHARACTER') - return ''.join( - replacement_char if unicodedata.category(c)[0] == 'C' else c - for c in s + replacement_char = unicodedata.lookup("REPLACEMENT CHARACTER") + return "".join( + replacement_char if unicodedata.category(c)[0] == "C" else c for c in s ) + class DocumentChangesFeed(Feed): feed_type = Atom1Feed @@ -39,25 +44,37 @@ class DocumentChangesFeed(Feed): def link(self, obj): if obj is None: raise FeedDoesNotExist - return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=obj.canonical_name())) + return urlreverse( + "ietf.doc.views_doc.document_history", + kwargs=dict(name=obj.canonical_name()), + ) def subtitle(self, obj): return "History of change entries for %s." % obj.display_name() def items(self, obj): - events = obj.docevent_set.all().order_by("-time","-id").select_related("by", "newrevisiondocevent", "submissiondocevent") + events = ( + obj.docevent_set.all() + .order_by("-time", "-id") + .select_related("by", "newrevisiondocevent", "submissiondocevent") + ) augment_events_with_revision(obj, events) return events def item_title(self, item): - return strip_control_characters("[%s] %s [rev. %s]" % ( - item.by, - truncatewords(strip_tags(item.desc), 15), - item.rev, - )) + return strip_control_characters( + "[%s] %s [rev. %s]" + % ( + item.by, + truncatewords(strip_tags(item.desc), 15), + item.rev, + ) + ) def item_description(self, item): - return strip_control_characters(truncatewords_html(format_textarea(item.desc), 20)) + return strip_control_characters( + truncatewords_html(format_textarea(item.desc), 20) + ) def item_pubdate(self, item): return item.time @@ -66,17 +83,28 @@ class DocumentChangesFeed(Feed): return str(item.by) def item_link(self, item): - return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=item.doc.canonical_name())) + "#history-%s" % item.pk + return ( + urlreverse( + "ietf.doc.views_doc.document_history", + kwargs=dict(name=item.doc.canonical_name()), + ) + + "#history-%s" % item.pk + ) + class InLastCallFeed(Feed): title = "Documents in Last Call" subtitle = "Announcements for documents in last call." feed_type = Atom1Feed - author_name = 'IESG Secretary' + author_name = "IESG Secretary" link = "/doc/iesg/last-call/" def items(self): - docs = list(Document.objects.filter(type="draft", states=State.objects.get(type="draft-iesg", slug="lc"))) + docs = list( + Document.objects.filter( + type="draft", states=State.objects.get(type="draft-iesg", slug="lc") + ) + ) for d in docs: d.lc_event = d.latest_event(LastCallDocEvent, type="sent_last_call") @@ -86,9 +114,11 @@ class InLastCallFeed(Feed): return docs def item_title(self, item): - return "%s (%s - %s)" % (item.name, - datefilter(item.lc_event.time, "F j"), - datefilter(item.lc_event.expires, "F j, Y")) + return "%s (%s - %s)" % ( + item.name, + datefilter(item.lc_event.time, "F j"), + datefilter(item.lc_event.expires, "F j, Y"), + ) def item_description(self, item): return strip_control_characters(linebreaks(item.lc_event.desc)) @@ -96,33 +126,55 @@ class InLastCallFeed(Feed): def item_pubdate(self, item): return item.lc_event.time + class Rss201WithNamespacesFeed(Rss201rev2Feed): def root_attributes(self): attrs = super(Rss201WithNamespacesFeed, self).root_attributes() - attrs['xmlns:dcterms'] = 'http://purl.org/dc/terms/' - attrs['xmlns:media'] = 'http://search.yahoo.com/mrss/' - attrs['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance' + attrs["xmlns:dcterms"] = "http://purl.org/dc/terms/" + attrs["xmlns:media"] = "http://search.yahoo.com/mrss/" + attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance" return attrs def add_item_elements(self, handler, item): super(Rss201WithNamespacesFeed, self).add_item_elements(handler, item) - for element_name in ['abstract','accessRights', 'format', 'publisher',]: - dc_item_name = 'dcterms_%s' % element_name - dc_element_name = 'dcterms:%s' % element_name - attrs= {'xsi:type':'dcterms:local'} if element_name == 'publisher' else {} + for element_name in [ + "abstract", + "accessRights", + "format", + "publisher", + ]: + dc_item_name = "dcterms_%s" % element_name + dc_element_name = "dcterms:%s" % element_name + attrs = {"xsi:type": "dcterms:local"} if element_name == "publisher" else {} if dc_item_name in item and item[dc_item_name] is not None: - handler.addQuickElement(dc_element_name,item[dc_item_name],attrs) + handler.addQuickElement(dc_element_name, item[dc_item_name], attrs) - if 'doi' in item and item['doi'] is not None: - handler.addQuickElement('dcterms:identifier',item['doi'],{'xsi:type':'dcterms:doi'}) - if 'doiuri' in item and item['doiuri'] is not None: - handler.addQuickElement('dcterms:identifier',item['doiuri'],{'xsi:type':'dcterms:uri'}) + if "doi" in item and item["doi"] is not None: + handler.addQuickElement( + "dcterms:identifier", item["doi"], {"xsi:type": "dcterms:doi"} + ) + if "doiuri" in item and item["doiuri"] is not None: + handler.addQuickElement( + "dcterms:identifier", item["doiuri"], {"xsi:type": "dcterms:uri"} + ) + + # TODO: consider using media:group + if "media_contents" in item and item["media_contents"] is not None: + for media_content in item["media_contents"]: + handler.startElement( + "media:content", + { + "url": media_content["url"], + "type": media_content["media_type"], + }, + ) + if "is_format_of" in media_content: + handler.addQuickElement( + "dcterms:isFormatOf", media_content["is_format_of"] + ) + handler.endElement("media:content") - if 'media_content' in item and item['media_content'] is not None: - handler.startElement('media:content',{'url':item['media_content']['url'],'type':'text/plain'}) - handler.addQuickElement('dcterms:isFormatOf',item['media_content']['link_url']) - handler.endElement('media:content') class RfcFeed(Feed): feed_type = Rss201WithNamespacesFeed @@ -130,55 +182,96 @@ class RfcFeed(Feed): author_name = "RFC Editor" link = "https://www.rfc-editor.org/rfc-index2.html" - def get_object(self,request,year=None): + def get_object(self, request, year=None): self.year = year - + def items(self): if self.year: # Find published RFCs based on their official publication year start_of_year = datetime.datetime(int(self.year), 1, 1, tzinfo=RPC_TZINFO) - start_of_next_year = datetime.datetime(int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO) + start_of_next_year = datetime.datetime( + int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO + ) rfc_events = DocEvent.objects.filter( - type='published_rfc', + type="published_rfc", time__gte=start_of_year, time__lt=start_of_next_year, - ).order_by('-time') + ).order_by("-time") else: cutoff = timezone.now() - datetime.timedelta(days=8) - rfc_events = DocEvent.objects.filter(type='published_rfc',time__gte=cutoff).order_by('-time') + rfc_events = DocEvent.objects.filter( + type="published_rfc", time__gte=cutoff + ).order_by("-time") results = [(e.doc, e.time) for e in rfc_events] - for doc,time in results: + for doc, time in results: doc.publication_time = time - return [doc for doc,time in results] - + return [doc for doc, time in results] + def item_title(self, item): - return "%s : %s" % (item.canonical_name(),item.title) + return "%s : %s" % (item.canonical_name(), item.title) def item_description(self, item): return item.abstract def item_link(self, item): - return "https://rfc-editor.org/info/%s"%item.canonical_name() + return "https://rfc-editor.org/info/%s" % item.canonical_name() def item_pubdate(self, item): return item.publication_time def item_extra_kwargs(self, item): extra = super(RfcFeed, self).item_extra_kwargs(item) - extra.update({'dcterms_accessRights': 'gratis'}) - extra.update({'dcterms_format': 'text/html'}) - extra.update({'media_content': {'url': 'https://rfc-editor.org/rfc/%s.txt' % item.canonical_name(), - 'link_url': self.item_link(item) - } - }) - extra.update({'doi':'10.17487/%s' % item.canonical_name().upper()}) - extra.update({'doiuri':'http://dx.doi.org/10.17487/%s' % item.canonical_name().upper()}) + extra.update({"dcterms_accessRights": "gratis"}) + extra.update({"dcterms_format": "text/html"}) + media_contents = [] + if int(item.rfc_number()) < 8650: + if int(item.rfc_number()) not in [8, 9, 51, 418, 500, 530, 589]: + for fmt, media_type in [("txt", "text/plain"), ("html", "text/html")]: + media_contents.append( + { + "url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}", + "media_type": media_type, + "is_format_of": self.item_link(item), + } + ) + if int(item.rfc_number()) not in [571, 587]: + media_contents.append( + { + "url": f"https://www.rfc-editor.org/rfc/pdfrfc/{item.canonical_name()}.txt.pdf", + "media_type": "application/pdf", + "is_format_of": self.item_link(item), + } + ) + else: + media_contents.append( + { + "url": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml", + "media_type": "application/rfc+xml", + } + ) + for fmt, media_type in [ + ("txt", "text/plain"), + ("html", "text/html"), + ("pdf", "application/pdf"), + ]: + media_contents.append( + { + "url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}", + "media_type": media_type, + "is_format_of": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml", + } + ) + extra.update({"media_contents": media_contents}) + + extra.update({"doi": "10.17487/%s" % item.canonical_name().upper()}) + extra.update( + {"doiuri": "http://dx.doi.org/10.17487/%s" % item.canonical_name().upper()} + ) - #TODO # R104 Publisher (Mandatory - but we need a string from them first) - extra.update({'dcterms_publisher':'rfc-editor.org'}) + extra.update({"dcterms_publisher": "rfc-editor.org"}) - #TODO MAYBE (Optional stuff) + # TODO MAYBE (Optional stuff) # R108 License # R115 Creator/Contributor (which would we use?) # F305 Checksum (do they use it?) (or should we put the our digital signature in here somewhere?) @@ -188,4 +281,3 @@ class RfcFeed(Feed): # R118 Keyword return extra - diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index 47c4e146c..5a949b091 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -1911,11 +1911,31 @@ class DocTestCase(TestCase): self.assertContains(r, doc.name) def test_rfc_feed(self): - WgRfcFactory() + rfc = WgRfcFactory(alias2__name="rfc9000") + DocEventFactory(doc=rfc, type="published_rfc") r = self.client.get("/feed/rfc/") self.assertTrue(r.status_code, 200) + q = PyQuery(r.content[39:]) # Strip off the xml declaration + self.assertEqual(len(q("item")), 1) + item = q("item")[0] + media_content = item.findall("{http://search.yahoo.com/mrss/}content") + self.assertEqual(len(media_content),4) + types = set([m.attrib["type"] for m in media_content]) + self.assertEqual(types, set(["application/rfc+xml", "text/plain", "text/html", "application/pdf"])) + rfcs_2016 = WgRfcFactory.create_batch(3) # rfc numbers will be well below v3 + for rfc in rfcs_2016: + e = DocEventFactory(doc=rfc, type="published_rfc") + e.time = e.time.replace(year=2016) + e.save() r = self.client.get("/feed/rfc/2016") self.assertTrue(r.status_code, 200) + q = PyQuery(r.content[39:]) + self.assertEqual(len(q("item")), 3) + item = q("item")[0] + media_content = item.findall("{http://search.yahoo.com/mrss/}content") + self.assertEqual(len(media_content), 3) + types = set([m.attrib["type"] for m in media_content]) + self.assertEqual(types, set(["text/plain", "text/html", "application/pdf"])) def test_state_help(self): url = urlreverse('ietf.doc.views_help.state_help', kwargs=dict(type="draft-iesg"))