291 lines
12 KiB
Python
291 lines
12 KiB
Python
import unittest, doctest
|
|
import pyzmail
|
|
from pyzmail.parse import *
|
|
|
|
|
|
class Msg:
|
|
"""mimic a email.Message"""
|
|
def __init__(self, value):
|
|
self.value=value
|
|
|
|
def get_all(self, header_name, default):
|
|
if self.value:
|
|
return [self.value, ]
|
|
else:
|
|
return []
|
|
|
|
class TestParse(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
pass
|
|
|
|
def test_decode_mail_header(self):
|
|
"""test decode_mail_header()"""
|
|
self.assertEqual(decode_mail_header(''), '')
|
|
self.assertEqual(decode_mail_header('hello'), 'hello')
|
|
self.assertEqual(decode_mail_header('hello '), 'hello ')
|
|
self.assertEqual(decode_mail_header('=?iso-8859-1?q?Courrier_=E8lectronique_Fran=E7ais?='), 'Courrier \xe8lectronique Fran\xe7ais')
|
|
self.assertEqual(decode_mail_header('=?utf8?q?Courrier_=C3=A8lectronique_Fran=C3=A7ais?='), 'Courrier \xe8lectronique Fran\xe7ais')
|
|
self.assertEqual(decode_mail_header('=?utf-8?b?RnJhbsOnYWlz?='), 'Fran\xe7ais')
|
|
self.assertEqual(decode_mail_header('=?iso-8859-1?q?Courrier_=E8lectronique_?= =?utf8?q?Fran=C3=A7ais?='), 'Courrier \xe8lectronique Fran\xe7ais')
|
|
self.assertEqual(decode_mail_header('=?iso-8859-1?q?Courrier_=E8lectronique_?= =?utf-8?b?RnJhbsOnYWlz?='), 'Courrier \xe8lectronique Fran\xe7ais')
|
|
self.assertEqual(decode_mail_header('h_subject_q_iso_8858_1 : =?ISO-8859-1?Q?Fran=E7ais=E20accentu=E9?= !'), 'h_subject_q_iso_8858_1 :Fran\xe7ais\xe20accentu\xe9!')
|
|
|
|
def test_get_mail_addresses(self):
|
|
"""test get_mail_addresses()"""
|
|
self.assertEqual([ ('foo@example.com', 'foo@example.com') ], get_mail_addresses(Msg('foo@example.com'), 'to'))
|
|
self.assertEqual([ ('Foo', 'foo@example.com'), ], get_mail_addresses(Msg('Foo <foo@example.com>'), 'to'))
|
|
# notice the space around the comma
|
|
self.assertEqual([ ('foo@example.com', 'foo@example.com'), ('bar@example.com', 'bar@example.com')], get_mail_addresses(Msg('foo@example.com , bar@example.com'), 'to'))
|
|
self.assertEqual([ ('Foo', 'foo@example.com'), ( 'Bar', 'bar@example.com')], get_mail_addresses(Msg('Foo <foo@example.com> , Bar <bar@example.com>'), 'to'))
|
|
self.assertEqual([ ('Foo', 'foo@example.com'), ('bar@example.com', 'bar@example.com')], get_mail_addresses(Msg('Foo <foo@example.com> , bar@example.com'), 'to'))
|
|
self.assertEqual([ ('Mr Foo', 'foo@example.com'), ('bar@example.com', 'bar@example.com')], get_mail_addresses(Msg('Mr\nFoo <foo@example.com> , bar@example.com'), 'to'))
|
|
|
|
self.assertEqual([ ('Beno\xeet', 'benoit@example.com')], get_mail_addresses(Msg('=?utf-8?q?Beno=C3=AEt?= <benoit@example.com>'), 'to'))
|
|
|
|
# address already encoded into utf8 (bad)
|
|
address='Ant\xf3nio Foo <a.foo@example.com>'.encode('utf8')
|
|
if sys.version_info<(3, 0):
|
|
self.assertEqual([('Ant\ufffd\ufffdnio Foo', 'a.foo@example.com')], get_mail_addresses(Msg(address), 'to'))
|
|
else:
|
|
# Python 3.2 return header when surrogate characters are used in header
|
|
self.assertEqual([('Ant??nio Foo', 'a.foo@example.com'), ], get_mail_addresses(Msg(email.header.Header(address, charset=email.charset.UNKNOWN8BIT, header_name='to')), 'to'))
|
|
|
|
def test_get_filename(self):
|
|
"""test get_filename()"""
|
|
import email.mime.image
|
|
|
|
filename='Fran\xe7ais.png'
|
|
if sys.version_info<(3, 0):
|
|
encoded_filename=filename.encode('iso-8859-1')
|
|
else:
|
|
encoded_filename=filename
|
|
|
|
payload=b'data'
|
|
attach=email.mime.image.MIMEImage(payload, 'png')
|
|
attach.add_header('Content-Disposition', 'attachment', filename='image.png')
|
|
self.assertEqual('image.png', get_filename(attach))
|
|
|
|
attach=email.mime.image.MIMEImage(payload, 'png')
|
|
attach.add_header('Content-Disposition', 'attachment', filename=('iso-8859-1', 'fr', encoded_filename))
|
|
self.assertEqual('Fran\xe7ais.png', get_filename(attach))
|
|
|
|
attach=email.mime.image.MIMEImage(payload, 'png')
|
|
attach.set_param('name', 'image.png')
|
|
self.assertEqual('image.png', get_filename(attach))
|
|
|
|
attach=email.mime.image.MIMEImage(payload, 'png')
|
|
attach.set_param('name', ('iso-8859-1', 'fr', encoded_filename))
|
|
self.assertEqual('Fran\xe7ais.png', get_filename(attach))
|
|
|
|
attach=email.mime.image.MIMEImage(payload, 'png')
|
|
attach.add_header('Content-Disposition', 'attachment', filename='image.png')
|
|
attach.set_param('name', 'image_wrong.png')
|
|
self.assertEqual('image.png', get_filename(attach))
|
|
|
|
def test_get_mailparts(self):
|
|
"""test get_mailparts()"""
|
|
import email.mime.multipart
|
|
import email.mime.text
|
|
import email.mime.image
|
|
msg=email.mime.multipart.MIMEMultipart(boundary='===limit1==')
|
|
txt=email.mime.text.MIMEText('The text.', 'plain', 'us-ascii')
|
|
msg.attach(txt)
|
|
image=email.mime.image.MIMEImage(b'data', 'png')
|
|
image.add_header('Content-Disposition', 'attachment', filename='image.png')
|
|
image.add_header('Content-Description', 'the description')
|
|
image.add_header('Content-ID', '<this.is.the.normaly.unique.contentid>')
|
|
msg.attach(image)
|
|
|
|
raw=msg.as_string(unixfrom=False)
|
|
expected_raw="""Content-Type: multipart/mixed; boundary="===limit1=="
|
|
MIME-Version: 1.0
|
|
|
|
--===limit1==
|
|
Content-Type: text/plain; charset="us-ascii"
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: 7bit
|
|
|
|
The text.
|
|
--===limit1==
|
|
Content-Type: image/png
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: base64
|
|
Content-Disposition: attachment; filename="image.png"
|
|
Content-Description: the description
|
|
Content-ID: <this.is.the.normaly.unique.contentid>
|
|
|
|
ZGF0YQ==<HERE1>
|
|
--===limit1==--"""
|
|
|
|
if sys.version_info<(3, 0):
|
|
expected_raw=expected_raw.replace('<HERE1>','')
|
|
else:
|
|
expected_raw=expected_raw.replace('<HERE1>','\n')
|
|
|
|
self.assertEqual(raw, expected_raw)
|
|
|
|
parts=get_mail_parts(msg)
|
|
# [MailPart<*text/plain charset=us-ascii len=9>, MailPart<image/png filename=image.png len=4>]
|
|
|
|
self.assertEqual(len(parts), 2)
|
|
|
|
self.assertEqual(parts[0].type, 'text/plain')
|
|
self.assertEqual(parts[0].is_body, 'text/plain') # not a error, is_body must be type
|
|
self.assertEqual(parts[0].charset, 'us-ascii')
|
|
self.assertEqual(parts[0].get_payload().decode(parts[0].charset), 'The text.')
|
|
|
|
self.assertEqual(parts[1].type, 'image/png')
|
|
self.assertEqual(parts[1].is_body, False)
|
|
self.assertEqual(parts[1].charset, None)
|
|
self.assertEqual(parts[1].filename, 'image.png')
|
|
self.assertEqual(parts[1].description, 'the description')
|
|
self.assertEqual(parts[1].content_id, 'this.is.the.normaly.unique.contentid')
|
|
self.assertEqual(parts[1].get_payload(), b'data')
|
|
|
|
|
|
raw_1='''Content-Type: text/plain; charset="us-ascii"
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: 7bit
|
|
Subject: simple test
|
|
From: Me <me@foo.com>
|
|
To: A <a@foo.com>, B <b@foo.com>
|
|
Cc: C <c@foo.com>, d@foo.com
|
|
User-Agent: pyzmail
|
|
|
|
The text.
|
|
'''
|
|
|
|
def check_message_1(self, msg):
|
|
self.assertEqual(msg.get_subject(), 'simple test')
|
|
self.assertEqual(msg.get_decoded_header('subject'), 'simple test')
|
|
self.assertEqual(msg.get_decoded_header('User-Agent'), 'pyzmail')
|
|
self.assertEqual(msg.get('User-Agent'), 'pyzmail')
|
|
self.assertEqual(msg.get_address('from'), ('Me', 'me@foo.com'))
|
|
self.assertEqual(msg.get_addresses('to'), [('A', 'a@foo.com'), ('B', 'b@foo.com')])
|
|
self.assertEqual(msg.get_addresses('cc'), [('C', 'c@foo.com'), ('d@foo.com', 'd@foo.com')])
|
|
self.assertEqual(len(msg.mailparts), 1)
|
|
self.assertEqual(msg.text_part, msg.mailparts[0])
|
|
self.assertEqual(msg.html_part, None)
|
|
|
|
# use 8bits encoding and 2 different charsets ! python 3.0 & 3.1 are not eable to parse this sample
|
|
raw_2=b"""From: sender@domain.com
|
|
To: recipient@domain.com
|
|
Date: Tue, 7 Jun 2011 16:32:17 +0200
|
|
Subject: contains 8bits attachments using different encoding
|
|
Content-Type: multipart/mixed; boundary=mixed
|
|
|
|
--mixed
|
|
Content-Type: text/plain; charset="us-ascii"
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: 7bit
|
|
|
|
body
|
|
--mixed
|
|
Content-Type: text/plain; charset="windows-1252"
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: 8bit
|
|
Content-Disposition: attachment; filename="file1.txt"
|
|
|
|
bo\xeete mail = mailbox
|
|
--mixed
|
|
Content-Type: text/plain; charset="utf-8"
|
|
MIME-Version: 1.0
|
|
Content-Transfer-Encoding: 8bit
|
|
Content-Disposition: attachment; filename="file2.txt"
|
|
|
|
bo\xc3\xaete mail = mailbox
|
|
--mixed--
|
|
"""
|
|
|
|
def check_message_2(self, msg):
|
|
self.assertEqual(msg.get_subject(), 'contains 8bits attachments using different encoding')
|
|
|
|
body, file1, file2=msg.mailparts
|
|
|
|
self.assertEqual('file1.txt', file1.filename)
|
|
self.assertEqual('file2.txt', file2.filename)
|
|
self.assertEqual('windows-1252', file1.charset)
|
|
self.assertEqual('utf-8', file2.charset)
|
|
content=b'bo\xeete mail = mailbox'.decode("windows-1252")
|
|
content1=file1.get_payload().decode(file1.charset)
|
|
content2=file2.get_payload().decode(file2.charset)
|
|
self.assertEqual(content, content1)
|
|
self.assertEqual(content, content2)
|
|
|
|
# this one contain non us-ascii chars in the header
|
|
# py 2x and py3k return different value here
|
|
raw_3=b'Content-Type: text/plain; charset="us-ascii"\n' \
|
|
b'MIME-Version: 1.0\n' \
|
|
b'Content-Transfer-Encoding: 7bit\n' \
|
|
+ 'Subject: Beno\xeet & Ant\xf3nio\n'.encode('utf8') +\
|
|
b'From: =?utf-8?q?Beno=C3=AEt?= <benoit@example.com>\n' \
|
|
+ 'To: Ant\xf3nio Foo <a.foo@example.com>\n'.encode('utf8') \
|
|
+ 'Cc: Beno\xeet <benoit@foo.com>, d@foo.com\n'.encode('utf8') +\
|
|
b'User-Agent: pyzmail\n' \
|
|
b'\n' \
|
|
b'The text.\n'
|
|
|
|
def check_message_3(self, msg):
|
|
subject='Beno\ufffd\ufffdt & Ant\ufffd\ufffdnio' # if sys.version_info<(3, 0) else u'Beno??t & Ant??nio'
|
|
self.assertEqual(msg.get_subject(), subject)
|
|
self.assertEqual(msg.get_decoded_header('subject'), subject)
|
|
self.assertEqual(msg.get_decoded_header('User-Agent'), 'pyzmail')
|
|
self.assertEqual(msg.get('User-Agent'), 'pyzmail')
|
|
self.assertEqual(msg.get_address('from'), ('Beno\xeet', 'benoit@example.com'))
|
|
|
|
to=msg.get_addresses('to')
|
|
self.assertEqual(to[0][1], 'a.foo@example.com')
|
|
self.assertEqual(to[0][0], 'Ant\ufffd\ufffdnio Foo' if sys.version_info<(3, 0) else 'Ant??nio Foo')
|
|
|
|
cc=msg.get_addresses('cc')
|
|
self.assertEqual(cc[0][1], 'benoit@foo.com')
|
|
self.assertEqual(cc[0][0], 'Beno\ufffd\ufffdt' if sys.version_info<(3, 0) else 'Beno??t')
|
|
self.assertEqual(cc[1], ('d@foo.com', 'd@foo.com'))
|
|
|
|
self.assertEqual(len(msg.mailparts), 1)
|
|
self.assertEqual(msg.text_part, msg.mailparts[0])
|
|
self.assertEqual(msg.html_part, None)
|
|
|
|
|
|
def check_pyzmessage_factories(self, input, check):
|
|
"""test PyzMessage from different sources"""
|
|
if isinstance(input, bytes) and sys.version_info>=(3, 2):
|
|
check(PyzMessage.factory(input))
|
|
check(message_from_bytes(input))
|
|
|
|
import io
|
|
check(PyzMessage.factory(io.BytesIO(input)))
|
|
check(message_from_binary_file(io.BytesIO(input)))
|
|
|
|
if isinstance(input, str):
|
|
|
|
check(PyzMessage.factory(input))
|
|
check(message_from_string(input))
|
|
|
|
import io
|
|
check(PyzMessage.factory(io.StringIO(input)))
|
|
check(message_from_file(io.StringIO(input)))
|
|
|
|
def test_pyzmessage_factories(self):
|
|
"""test PyzMessage class different sources"""
|
|
self.check_pyzmessage_factories(self.raw_1, self.check_message_1)
|
|
self.check_pyzmessage_factories(self.raw_2, self.check_message_2)
|
|
self.check_pyzmessage_factories(self.raw_3, self.check_message_3)
|
|
|
|
|
|
# Add doctest
|
|
def load_tests(loader, tests, ignore):
|
|
# this works with python 2.7 and 3.x
|
|
if sys.version_info<(3, 0):
|
|
tests.addTests(doctest.DocTestSuite(pyzmail.parse))
|
|
return tests
|
|
|
|
def additional_tests():
|
|
# Add doctest for python 2.6 and below
|
|
if sys.version_info<(2, 7):
|
|
return doctest.DocTestSuite(pyzmail.parse)
|
|
else:
|
|
return unittest.TestSuite()
|
|
|