datatracker/ietf/submit/parsers/plain_parser.py
Henrik Levkowetz 726fcbf27d Removed all __future__ imports.
- Legacy-Id: 17391
2020-03-05 23:53:42 +00:00

73 lines
3.2 KiB
Python

# Copyright The IETF Trust 2011-2020, All Rights Reserved
import re
import debug # pyflakes:ignore
from ietf.submit.parsers.base import FileParser
class PlainParser(FileParser):
ext = 'txt'
mimetypes = ['text/plain', ]
def __init__(self, fd):
super(PlainParser, self).__init__(fd)
# If some error is found after this method invocation
# no other file parsing is recommended
def critical_parse(self):
super(PlainParser, self).critical_parse()
self.check_file_charset()
self.parse_name()
return self.parsed_info
def check_file_charset(self):
charset = self.parsed_info.charset
if not charset in ['us-ascii', 'utf-8',]:
self.parsed_info.add_error('A plain text ASCII document is required. '
'Found an unexpected encoding: "%s". '
'You probably have one or more non-ascii characters in your file.' % charset
)
if self.fd.charset and charset != self.fd.charset:
self.parsed_info.add_error("Unexpected charset mismatch: upload: %s, libmagic: %s" % (self.fd.charset, charset))
def parse_name(self):
self.fd.file.seek(0)
draftre = re.compile(r'(draft-\S+)')
revisionre = re.compile(r'.*-(\d+)$')
limit = 80
if self.parsed_info.charset in ['us-ascii', 'utf-8']:
while limit:
limit -= 1
try:
line = self.fd.readline().decode(self.parsed_info.charset)
except UnicodeDecodeError:
return
match = draftre.search(line)
if not match:
continue
name = match.group(1)
name = re.sub(r'^[^\w]+', '', name)
name = re.sub(r'[^\w]+$', '', name)
name = re.sub(r'\.txt$', '', name)
extra_chars = re.sub(r'[0-9a-z\-]', '', name)
if extra_chars:
if len(extra_chars) == 1:
self.parsed_info.add_error(('The document name on the first page, "%s", contains a disallowed character with byte code: %s ' % (name, ord(extra_chars[0]))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
else:
self.parsed_info.add_error(('The document name on the first page, "%s", contains disallowed characters with byte codes: %s ' % (name, (', '.join([ str(ord(c)) for c in extra_chars] )))) +
'(see https://www.ietf.org/id-info/guidelines.html#naming for details).')
match_revision = revisionre.match(name)
if match_revision:
self.parsed_info.metadata.rev = match_revision.group(1)
else:
self.parsed_info.add_error('The name found on the first page of the document does not contain a revision: "%s"' % (name,))
name = re.sub(r'-\d+$', '', name)
self.parsed_info.metadata.name = name
return
self.parsed_info.add_error('The first page of the document does not contain a legitimate name that starts with draft-*')