155 lines
5.1 KiB
Python
155 lines
5.1 KiB
Python
#
|
|
# pyzmail/utils.py
|
|
# (c) Alain Spineux <alain.spineux@gmail.com>
|
|
# http://www.magiksys.net/pyzmail
|
|
# Released under LGPL
|
|
|
|
"""
|
|
Various functions used by other modules
|
|
@var invalid_chars_in_filename: a mix of characters not permitted in most used filesystems
|
|
@var invalid_windows_name: a list of unauthorized filenames under Windows
|
|
"""
|
|
|
|
import sys
|
|
|
|
invalid_chars_in_filename=b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' \
|
|
b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' \
|
|
b'<>:"/\\|?*%\''
|
|
|
|
invalid_windows_name=[b'CON', b'PRN', b'AUX', b'NUL', b'COM1', b'COM2', b'COM3',
|
|
b'COM4', b'COM5', b'COM6', b'COM7', b'COM8', b'COM9',
|
|
b'LPT1', b'LPT2', b'LPT3', b'LPT4', b'LPT5', b'LPT6', b'LPT7',
|
|
b'LPT8', b'LPT9' ]
|
|
|
|
def sanitize_filename(filename, alt_name, alt_ext):
|
|
"""
|
|
Convert the given filename into a name that should work on all
|
|
platform. Remove non us-ascii characters, and drop invalid filename.
|
|
Use the I{alternative} filename if needed.
|
|
|
|
@type filename: unicode or None
|
|
@param filename: the originale filename or None. Can be unicode.
|
|
@type alt_name: str
|
|
@param alt_name: the alternative filename if filename is None or useless
|
|
@type alt_ext: str
|
|
@param alt_ext: the alternative filename extension (including the '.')
|
|
|
|
@rtype: str
|
|
@returns: a valid filename.
|
|
|
|
>>> sanitize_filename('document.txt', 'file', '.txt')
|
|
'document.txt'
|
|
>>> sanitize_filename('number1.txt', 'file', '.txt')
|
|
'number1.txt'
|
|
>>> sanitize_filename(None, 'file', '.txt')
|
|
'file.txt'
|
|
>>> sanitize_filename(u'R\\xe9pertoir.txt', 'file', '.txt')
|
|
'Rpertoir.txt'
|
|
>>> # the '\\xe9' has been removed
|
|
>>> sanitize_filename(u'\\xe9\\xe6.html', 'file', '.txt')
|
|
'file.html'
|
|
>>> # all non us-ascii characters have been removed, the alternative name
|
|
>>> # has been used the replace empty string. The originale extention
|
|
>>> # is still valid
|
|
>>> sanitize_filename(u'COM1.txt', 'file', '.txt')
|
|
'COM1A.txt'
|
|
>>> # if name match an invalid name or assimilated then a A is added
|
|
"""
|
|
|
|
if not filename:
|
|
return alt_name+alt_ext
|
|
|
|
if ((sys.version_info<(3, 0) and isinstance(filename, str)) or \
|
|
(sys.version_info>=(3, 0) and isinstance(filename, str))):
|
|
filename=filename.encode('ascii', 'ignore')
|
|
|
|
filename=filename.translate(None, invalid_chars_in_filename)
|
|
filename=filename.strip()
|
|
|
|
upper=filename.upper()
|
|
for name in invalid_windows_name:
|
|
if upper==name:
|
|
filename=filename+b'A'
|
|
break
|
|
if upper.startswith(name+b'.'):
|
|
filename=filename[:len(name)]+b'A'+filename[len(name):]
|
|
break
|
|
|
|
if sys.version_info>=(3, 0):
|
|
# back to string
|
|
filename=filename.decode('us-ascii')
|
|
|
|
if filename.rfind('.')==0:
|
|
filename=alt_name+filename
|
|
|
|
return filename
|
|
|
|
def handle_filename_collision(filename, filenames):
|
|
"""
|
|
Avoid filename collision, add a sequence number to the name when required.
|
|
'file.txt' will be renamed into 'file-01.txt' then 'file-02.txt' ...
|
|
until their is no more collision. The file is not added to the list.
|
|
|
|
Windows don't make the difference between lower and upper case. To avoid
|
|
"case" collision, the function compare C{filename.lower()} to the list.
|
|
If you provide a list in lower case only, then any collisions will be avoided.
|
|
|
|
@type filename: str
|
|
@param filename: the filename
|
|
@type filenames: list or set
|
|
@param filenames: a list of filenames.
|
|
|
|
@rtype: str
|
|
@returns: the I{filename} or the appropriately I{indexed} I{filename}
|
|
|
|
>>> handle_filename_collision('file.txt', [ ])
|
|
'file.txt'
|
|
>>> handle_filename_collision('file.txt', [ 'file.txt' ])
|
|
'file-01.txt'
|
|
>>> handle_filename_collision('file.txt', [ 'file.txt', 'file-01.txt',])
|
|
'file-02.txt'
|
|
>>> handle_filename_collision('foo', [ 'foo',])
|
|
'foo-01'
|
|
>>> handle_filename_collision('foo', [ 'foo', 'foo-01',])
|
|
'foo-02'
|
|
>>> handle_filename_collision('FOO', [ 'foo', 'foo-01',])
|
|
'FOO-02'
|
|
"""
|
|
if filename.lower() in filenames:
|
|
try:
|
|
basename, ext=filename.rsplit('.', 1)
|
|
ext='.'+ext
|
|
except ValueError:
|
|
basename, ext=filename, ''
|
|
|
|
i=1
|
|
while True:
|
|
filename='%s-%02d%s' % (basename, i, ext)
|
|
if filename.lower() not in filenames:
|
|
break
|
|
i+=1
|
|
|
|
return filename
|
|
|
|
def is_usascii(value):
|
|
""""
|
|
test if string contains us-ascii characters only
|
|
|
|
>>> is_usascii('foo')
|
|
True
|
|
>>> is_usascii(u'foo')
|
|
True
|
|
>>> is_usascii(u'Fran\xe7ais')
|
|
False
|
|
>>> is_usascii('bad\x81')
|
|
False
|
|
"""
|
|
try:
|
|
# if value is byte string, it will be decoded first using us-ascii
|
|
# and will generate UnicodeEncodeError, this is fine too
|
|
value.encode('us-ascii')
|
|
except UnicodeError:
|
|
return False
|
|
|
|
return True
|
|
|