diff --git a/ietf/submit/models.py b/ietf/submit/models.py index 4a09c0323..6043d4ab7 100644 --- a/ietf/submit/models.py +++ b/ietf/submit/models.py @@ -16,6 +16,7 @@ from ietf.group.models import Group from ietf.message.models import Message from ietf.name.models import DraftSubmissionStateName, FormalLanguageName from ietf.utils.accesstoken import generate_random_key, generate_access_token +from ietf.utils.text import parse_unicode from ietf.utils.models import ForeignKey @@ -25,7 +26,7 @@ def parse_email_line(line): email.utils.parseaddr() but return a dictionary """ name, addr = email.utils.parseaddr(line) if '@' in line else (line, '') - return dict(name=name, email=addr) + return dict(name=parse_unicode(name), email=addr) class Submission(models.Model): state = ForeignKey(DraftSubmissionStateName) @@ -169,4 +170,4 @@ class SubmissionEmailEvent(SubmissionEvent): class SubmissionExtResource(ExtResource): - submission = ForeignKey(Submission, related_name='external_resources') \ No newline at end of file + submission = ForeignKey(Submission, related_name='external_resources') diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 1f16a4e98..4171fcf1c 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -41,6 +41,7 @@ from ietf.utils import log from ietf.utils.accesstoken import generate_random_key from ietf.utils.draft import Draft from ietf.utils.mail import is_valid_email +from ietf.utils.text import parse_unicode from ietf.person.name import unidecode_name @@ -886,7 +887,7 @@ def accept_submission(request, submission, autopost=False): requires_prev_ad_approval = False # Partial message for submission event - sub_event_desc = 'Set submitter to \"%s\", replaces to %s' % (submission.submitter, pretty_replaces) + sub_event_desc = 'Set submitter to \"%s\", replaces to %s' % (parse_unicode(submission.submitter), pretty_replaces) docevent_desc = None address_list = [] if requires_ad_approval or requires_prev_ad_approval: diff --git a/ietf/utils/tests.py b/ietf/utils/tests.py index 58fe99940..bf9f26578 100644 --- a/ietf/utils/tests.py +++ b/ietf/utils/tests.py @@ -43,6 +43,7 @@ from ietf.utils.log import unreachable, assertion from ietf.utils.mail import send_mail_preformatted, send_mail_text, send_mail_mime, outbox, get_payload_text from ietf.utils.test_runner import get_template_paths, set_coverage_checking from ietf.utils.test_utils import TestCase +from ietf.utils.text import parse_unicode skip_wiki_glue_testing = False skip_message_svn = "" @@ -506,3 +507,21 @@ class LogUtilTests(TestCase): settings.SERVER_MODE = 'development' assertion('False') settings.SERVER_MODE = 'test' + +class TestRFC2047Strings(TestCase): + def test_parse_unicode(self): + names = ( + ('=?utf-8?b?4Yuz4YuK4Ym1IOGJoOGJgOGIiA==?=', 'ዳዊት በቀለ'), + ('=?utf-8?b?5Li9IOmDnA==?=', '丽 郜'), + ('=?utf-8?b?4KSV4KSu4KWN4KSs4KWL4KScIOCkoeCkvuCksA==?=', 'कम्बोज डार'), + ('=?utf-8?b?zpfPgc6szrrOu861zrnOsSDOm865z4zOvc+Ezrc=?=', 'Ηράκλεια Λιόντη'), + ('=?utf-8?b?15nXqdeo15DXnCDXqNeV15bXoNek15zXkw==?=', 'ישראל רוזנפלד'), + ('=?utf-8?b?5Li95Y2OIOeahw==?=', '丽华 皇'), + ('=?utf-8?b?77ul77qu766V77qzIO+tlu+7ru+vvu+6ju+7pw==?=', 'ﻥﺮﮕﺳ ﭖﻮﯾﺎﻧ'), + ('=?utf-8?b?77uh77uu77qz77uu76++IO+6su+7tO+7p++6jSDvurDvu6Pvuo7vu6jvr74=?=', 'ﻡﻮﺳﻮﯾ ﺲﻴﻧﺍ ﺰﻣﺎﻨﯾ'), + ('=?utf-8?b?ScOxaWdvIFNhbsOnIEliw6HDsWV6IGRlIGxhIFBlw7Fh?=', 'Iñigo Sanç Ibáñez de la Peña'), + ('Mart van Oostendorp', 'Mart van Oostendorp'), + ('', ''), + ) + for encoded_str, unicode in names: + self.assertEqual(unicode, parse_unicode(encoded_str)) diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 8040f0978..e4c246074 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- +import email import re import textwrap import unicodedata @@ -193,3 +194,16 @@ def unwrap(s): def normalize_text(s): return re.sub(r'[\s\n\r\u2028\u2029]+', ' ', s, flags=re.U).strip() + +def parse_unicode(text): + "Decodes unicode string from string encoded according to RFC2047" + + decoded_string, charset = email.header.decode_header(text)[0] + if charset is not None: + try: + text = decoded_string.decode(charset) + except UnicodeDecodeError: + pass + else: + text = decoded_string + return text