From 310ea57bc5d898af227c5fa329eb55b385802491 Mon Sep 17 00:00:00 2001
From: Jennifer Richards <jennifer@painless-security.com>
Date: Thu, 9 Dec 2021 19:29:20 +0000
Subject: [PATCH] Accept/replace invalid Unicode bytes when processing ipr
 response emails. Fixes #3489. Commit ready for merge.  - Legacy-Id: 19766

---
 ietf/ipr/mail.py                              |  4 +-
 ietf/ipr/management/commands/process_email.py |  4 +-
 ietf/ipr/management/tests.py                  | 25 ++++---
 ietf/ipr/tests.py                             | 65 ++++++++++++++-----
 4 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/ietf/ipr/mail.py b/ietf/ipr/mail.py
index 3bf842975..ea3551d56 100644
--- a/ietf/ipr/mail.py
+++ b/ietf/ipr/mail.py
@@ -11,7 +11,7 @@ import pytz
 import re
 
 from django.template.loader import render_to_string
-from django.utils.encoding import force_text, force_str
+from django.utils.encoding import force_text, force_bytes
 
 import debug                            # pyflakes:ignore
 
@@ -174,7 +174,7 @@ def process_response_email(msg):
     a matching value in the reply_to field, associated to an IPR disclosure through
     IprEvent.  Create a Message object for the incoming message and associate it to
     the original message via new IprEvent"""
-    message = email.message_from_string(force_str(msg))
+    message = email.message_from_bytes(force_bytes(msg))
     to = message.get('To', '')
 
     # exit if this isn't a response we're interested in (with plus addressing)
diff --git a/ietf/ipr/management/commands/process_email.py b/ietf/ipr/management/commands/process_email.py
index b8ea0a0b5..d04b54b21 100644
--- a/ietf/ipr/management/commands/process_email.py
+++ b/ietf/ipr/management/commands/process_email.py
@@ -25,10 +25,8 @@ class Command(EmailOnFailureCommand):
         email = options.get('email', None)
         binary_input = io.open(email, 'rb') if email else sys.stdin.buffer
         self.msg_bytes = binary_input.read()
-        msg = self.msg_bytes.decode()
-
         try:
-            process_response_email(msg)
+            process_response_email(self.msg_bytes)
         except ValueError as e:
             raise CommandError(e)
 
diff --git a/ietf/ipr/management/tests.py b/ietf/ipr/management/tests.py
index de7e09874..d452f0c4a 100644
--- a/ietf/ipr/management/tests.py
+++ b/ietf/ipr/management/tests.py
@@ -18,9 +18,10 @@ class ProcessEmailTests(TestCase):
         with name_of_file_containing('contents') as filename:
             call_command('process_email', email_file=filename)
         self.assertEqual(process_mock.call_count, 1, 'process_response_email should be called once')
+        (msg,) = process_mock.call_args.args
         self.assertEqual(
-            process_mock.call_args.args,
-            ('contents',),
+            msg.decode(),
+            'contents',
             'process_response_email should receive the correct contents'
         )
 
@@ -52,16 +53,15 @@ class ProcessEmailTests(TestCase):
     @mock.patch('ietf.utils.management.base.send_smtp')
     @mock.patch('ietf.ipr.management.commands.process_email.process_response_email')
     def test_invalid_character_encodings(self, process_mock, send_smtp_mock):
-        """The process_email command should attach messages with invalid encoding when using a file input"""
+        """The process_email command should accept messages with invalid encoding when using a file input"""
         invalid_characters = b'\xfe\xff'
         with name_of_file_containing(invalid_characters, mode='wb') as filename:
             call_command('process_email', email_file=filename)
 
-        self.assertFalse(process_mock.called)  # should not even try to process illegally encoded messages
-        self.assertTrue(send_smtp_mock.called)
-        (msg,) = send_smtp_mock.call_args.args
-        parts = msg.get_payload()
-        self.assertEqual(len(parts), 3, 'Error email should contain message, traceback, and original message')
+        self.assertFalse(send_smtp_mock.called)  # should not send an error email
+        self.assertTrue(process_mock.called)
+        (msg,) = process_mock.call_args.args
+        self.assertEqual(msg, invalid_characters, 'Invalid unicode should be passed to process_email()')
 
     @mock.patch.object(sys.stdin.buffer, 'read')
     @mock.patch('ietf.utils.management.base.send_smtp')
@@ -72,8 +72,7 @@ class ProcessEmailTests(TestCase):
         stdin_read_mock.return_value = invalid_characters
         call_command('process_email')
 
-        self.assertFalse(process_mock.called)  # should not even try to process illegally encoded messages
-        self.assertTrue(send_smtp_mock.called)
-        (msg,) = send_smtp_mock.call_args.args
-        parts = msg.get_payload()
-        self.assertEqual(len(parts), 3, 'Error email should contain message, traceback, and original message')
+        self.assertFalse(send_smtp_mock.called)  # should not send an error email
+        self.assertTrue(process_mock.called)
+        (msg,) = process_mock.call_args.args
+        self.assertEqual(msg, invalid_characters, 'Invalid unicode should be passed to process_email()')
diff --git a/ietf/ipr/tests.py b/ietf/ipr/tests.py
index d89970874..818daa112 100644
--- a/ietf/ipr/tests.py
+++ b/ietf/ipr/tests.py
@@ -592,8 +592,7 @@ I would like to revoke this declaration.
         self.assertEqual(len(outbox),2)
         self.assertIn('Secretariat on '+ipr.get_latest_event_submitted().time.strftime("%Y-%m-%d"), get_payload_text(outbox[1]).replace('\n',' '))
 
-    def test_process_response_email(self):
-        # first send a mail
+    def send_ipr_email_helper(self):
         ipr = HolderIprDisclosureFactory()
         url = urlreverse('ietf.ipr.views.email',kwargs={ "id": ipr.id })
         self.client.login(username="secretary", password="secretary+password")
@@ -607,7 +606,6 @@ I would like to revoke this declaration.
             response_due=yesterday.isoformat())
         empty_outbox()
         r = self.client.post(url,data,follow=True)
-        #print r.content
         self.assertEqual(r.status_code,200)
         q = Message.objects.filter(reply_to=data['reply_to'])
         self.assertEqual(q.count(),1)
@@ -615,19 +613,25 @@ I would like to revoke this declaration.
         self.assertTrue(event.response_past_due())
         self.assertEqual(len(outbox), 1)
         self.assertTrue('joe@test.com' in outbox[0]['To'])
-        
+        return data['reply_to'], event
+
+    uninteresting_ipr_message_strings = [
+        ("To: {to}\nCc: {cc}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),
+        ("Cc: {cc}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),  # no To
+        ("To: {to}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),  # no Cc
+        ("From: joe@test.com\nDate: {date}\nSubject: test\n"),  # no To or Cc
+        ("Cc: {cc}\nDate: {date}\nSubject: test\n"),  # no To
+        ("To: {to}\nDate: {date}\nSubject: test\n"),  # no Cc
+        ("Date: {date}\nSubject: test\n"),  # no To or Cc
+    ]
+
+    def test_process_response_email(self):
+        # first send a mail
+        reply_to, event = self.send_ipr_email_helper()
+
         # test process response uninteresting messages
         addrs = gather_address_lists('ipr_disclosure_submitted').as_strings()
-        uninteresting_message_strings = [
-            ("To: {to}\nCc: {cc}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),
-            ("Cc: {cc}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),  # no To
-            ("To: {to}\nFrom: joe@test.com\nDate: {date}\nSubject: test\n"),  # no Cc
-            ("From: joe@test.com\nDate: {date}\nSubject: test\n"),  # no To or Cc
-            ("Cc: {cc}\nDate: {date}\nSubject: test\n"),  # no To
-            ("To: {to}\nDate: {date}\nSubject: test\n"),  # no Cc
-            ("Date: {date}\nSubject: test\n"),  # no To or Cc
-        ]
-        for message_string in uninteresting_message_strings:
+        for message_string in self.uninteresting_ipr_message_strings:
             result = process_response_email(
                 message_string.format(
                     to=addrs.to,
@@ -642,12 +646,41 @@ I would like to revoke this declaration.
 From: joe@test.com
 Date: {}
 Subject: test
-""".format(data['reply_to'],datetime.datetime.now().ctime())
+""".format(reply_to, datetime.datetime.now().ctime())
         result = process_response_email(message_string)
 
-        self.assertIsInstance(result,Message)
+        self.assertIsInstance(result, Message)
         self.assertFalse(event.response_past_due())
 
+    def test_process_response_email_with_invalid_encoding(self):
+        """Interesting emails with invalid encoding should be handled"""
+        reply_to, _ = self.send_ipr_email_helper()
+        # test process response
+        message_string = """To: {}
+From: joe@test.com
+Date: {}
+Subject: test
+""".format(reply_to, datetime.datetime.now().ctime())
+        message_bytes = message_string.encode('utf8') + b'\nInvalid stuff: \xfe\xff\n'
+        result = process_response_email(message_bytes)
+        self.assertIsInstance(result, Message)
+        # \ufffd is a rhombus character with an inverse ?, used to replace invalid characters
+        self.assertEqual(result.body, 'Invalid stuff: \ufffd\ufffd\n\n',  # not sure where the extra \n is from
+                         'Invalid characters should be replaced with \ufffd characters')
+
+    def test_process_response_email_uninteresting_with_invalid_encoding(self):
+        """Uninteresting emails with invalid encoding should be quietly dropped"""
+        self.send_ipr_email_helper()
+        addrs = gather_address_lists('ipr_disclosure_submitted').as_strings()
+        for message_string in self.uninteresting_ipr_message_strings:
+            message_bytes = message_string.format(
+                                to=addrs.to,
+                                cc=addrs.cc,
+                                date=datetime.datetime.now().ctime(),
+            ).encode('utf8') + b'\nInvalid stuff: \xfe\xff\n'
+            result = process_response_email(message_bytes)
+            self.assertIsNone(result)
+
     def test_ajax_search(self):
         url = urlreverse('ietf.ipr.views.ajax_search')
         response=self.client.get(url+'?q=disclosure')