Merged in [19786] from jennifer@painless-security.com:

Strip Unicode control characters out of feed content. Fixes #3398.
 - Legacy-Id: 19796
Note: SVN reference [19786] has been migrated to Git commit 91c46eddae31ad298f62ccfe6a3684f1a0778f14
This commit is contained in:
Robert Sparks 2022-01-04 15:44:25 +00:00
parent 54fc0364c9
commit 9a7ef90e52
2 changed files with 32 additions and 4 deletions

View file

@ -3,6 +3,7 @@
import datetime
import unicodedata
from django.contrib.syndication.views import Feed, FeedDoesNotExist
from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed
@ -15,6 +16,15 @@ from ietf.doc.models import Document, State, LastCallDocEvent, DocEvent
from ietf.doc.utils import augment_events_with_revision
from ietf.doc.templatetags.ietf_filters import format_textarea
def strip_control_characters(s):
"""Remove Unicode control / non-printing characters from a string"""
replacement_char = unicodedata.lookup('REPLACEMENT CHARACTER')
return ''.join(
replacement_char if unicodedata.category(c)[0] == 'C' else c
for c in s
)
class DocumentChangesFeed(Feed):
feed_type = Atom1Feed
@ -38,10 +48,14 @@ class DocumentChangesFeed(Feed):
return events
def item_title(self, item):
return "[%s] %s [rev. %s]" % (item.by, truncatewords(strip_tags(item.desc), 15), item.rev)
return strip_control_characters("[%s] %s [rev. %s]" % (
item.by,
truncatewords(strip_tags(item.desc), 15),
item.rev,
))
def item_description(self, item):
return truncatewords_html(format_textarea(item.desc), 20)
return strip_control_characters(truncatewords_html(format_textarea(item.desc), 20))
def item_pubdate(self, item):
return item.time
@ -75,7 +89,7 @@ class InLastCallFeed(Feed):
datefilter(item.lc_event.expires, "F j, Y"))
def item_description(self, item):
return linebreaks(item.lc_event.desc)
return strip_control_characters(linebreaks(item.lc_event.desc))
def item_pubdate(self, item):
return item.lc_event.time

View file

@ -1704,6 +1704,20 @@ class DocTestCase(TestCase):
self.assertEqual(r.status_code, 200)
self.assertContains(r, e.desc)
def test_document_feed_with_control_character(self):
doc = IndividualDraftFactory()
DocEvent.objects.create(
doc=doc,
rev=doc.rev,
desc="Something happened involving the \x0b character.",
type="added_comment",
by=Person.objects.get(name="(System)"))
r = self.client.get("/feed/document-changes/%s/" % doc.name)
self.assertEqual(r.status_code, 200)
self.assertContains(r, 'Something happened involving the')
def test_last_call_feed(self):
doc = IndividualDraftFactory()
@ -1712,7 +1726,7 @@ class DocTestCase(TestCase):
LastCallDocEvent.objects.create(
doc=doc,
rev=doc.rev,
desc="Last call",
desc="Last call\x0b", # include a control character to be sure it does not break anything
type="sent_last_call",
by=Person.objects.get(user__username="secretary"),
expires=datetime.date.today() + datetime.timedelta(days=7))