From 9a7ef90e52ed13dad549afd238072aca5cfa8ba0 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 4 Jan 2022 15:44:25 +0000 Subject: [PATCH] Merged in [19786] from jennifer@painless-security.com: Strip Unicode control characters out of feed content. Fixes #3398. - Legacy-Id: 19796 Note: SVN reference [19786] has been migrated to Git commit 91c46eddae31ad298f62ccfe6a3684f1a0778f14 --- ietf/doc/feeds.py | 20 +++++++++++++++++--- ietf/doc/tests.py | 16 +++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/ietf/doc/feeds.py b/ietf/doc/feeds.py index 4b8d2789a..1169db105 100644 --- a/ietf/doc/feeds.py +++ b/ietf/doc/feeds.py @@ -3,6 +3,7 @@ import datetime +import unicodedata from django.contrib.syndication.views import Feed, FeedDoesNotExist from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed @@ -15,6 +16,15 @@ from ietf.doc.models import Document, State, LastCallDocEvent, DocEvent from ietf.doc.utils import augment_events_with_revision from ietf.doc.templatetags.ietf_filters import format_textarea + +def strip_control_characters(s): + """Remove Unicode control / non-printing characters from a string""" + replacement_char = unicodedata.lookup('REPLACEMENT CHARACTER') + return ''.join( + replacement_char if unicodedata.category(c)[0] == 'C' else c + for c in s + ) + class DocumentChangesFeed(Feed): feed_type = Atom1Feed @@ -38,10 +48,14 @@ class DocumentChangesFeed(Feed): return events def item_title(self, item): - return "[%s] %s [rev. %s]" % (item.by, truncatewords(strip_tags(item.desc), 15), item.rev) + return strip_control_characters("[%s] %s [rev. %s]" % ( + item.by, + truncatewords(strip_tags(item.desc), 15), + item.rev, + )) def item_description(self, item): - return truncatewords_html(format_textarea(item.desc), 20) + return strip_control_characters(truncatewords_html(format_textarea(item.desc), 20)) def item_pubdate(self, item): return item.time @@ -75,7 +89,7 @@ class InLastCallFeed(Feed): datefilter(item.lc_event.expires, "F j, Y")) def item_description(self, item): - return linebreaks(item.lc_event.desc) + return strip_control_characters(linebreaks(item.lc_event.desc)) def item_pubdate(self, item): return item.lc_event.time diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index ec99f1035..249b6ae39 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -1704,6 +1704,20 @@ class DocTestCase(TestCase): self.assertEqual(r.status_code, 200) self.assertContains(r, e.desc) + def test_document_feed_with_control_character(self): + doc = IndividualDraftFactory() + + DocEvent.objects.create( + doc=doc, + rev=doc.rev, + desc="Something happened involving the \x0b character.", + type="added_comment", + by=Person.objects.get(name="(System)")) + + r = self.client.get("/feed/document-changes/%s/" % doc.name) + self.assertEqual(r.status_code, 200) + self.assertContains(r, 'Something happened involving the') + def test_last_call_feed(self): doc = IndividualDraftFactory() @@ -1712,7 +1726,7 @@ class DocTestCase(TestCase): LastCallDocEvent.objects.create( doc=doc, rev=doc.rev, - desc="Last call", + desc="Last call\x0b", # include a control character to be sure it does not break anything type="sent_last_call", by=Person.objects.get(user__username="secretary"), expires=datetime.date.today() + datetime.timedelta(days=7))