From 6c253ee48b86912efc1069adacf7a3c55f0aa63b Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Fri, 7 Aug 2015 12:10:26 +0000 Subject: [PATCH] Improved the verification of submitted file extensions and mimetype. - Legacy-Id: 9986 --- bin/.gitignore | 1 + ietf/submit/forms.py | 2 +- ietf/submit/parsers/base.py | 22 ++++++----- ietf/submit/parsers/pdf_parser.py | 4 +- ietf/submit/parsers/plain_parser.py | 4 +- ietf/submit/parsers/ps_parser.py | 4 +- ietf/submit/parsers/xml_parser.py | 4 +- ietf/submit/test_submission.bad | Bin 0 -> 11 bytes ietf/submit/tests.py | 55 ++++++++++++++++++++++++++++ static/.gitignore | 1 + 10 files changed, 78 insertions(+), 19 deletions(-) create mode 100644 ietf/submit/test_submission.bad diff --git a/bin/.gitignore b/bin/.gitignore index 912f4ac81..062f6f163 100644 --- a/bin/.gitignore +++ b/bin/.gitignore @@ -21,3 +21,4 @@ /ipcontroller* /ipengine* /iptest* +/xml2rfc diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index dfb41a2e3..0d795b15a 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -130,7 +130,7 @@ class SubmissionUploadForm(forms.Form): continue self.file_types.append('.%s' % ext) if not ('.txt' in self.file_types or '.xml' in self.file_types): - raise forms.ValidationError('You must submit either a .txt or an .xml file; didn\'t find either.') + raise forms.ValidationError('You must submit at least a valid .txt or a valid .xml file; didn\'t find either.') #debug.show('self.cleaned_data["xml"]') if self.cleaned_data.get('xml'): diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 8dc405507..27d67aed2 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -1,4 +1,4 @@ -import os + import re import magic import datetime @@ -40,6 +40,8 @@ class ParseInfo(object): class FileParser(object): + ext = None + mimetype = None def __init__(self, fd): self.fd = fd @@ -50,6 +52,8 @@ class FileParser(object): def critical_parse(self): self.parse_invalid_chars_in_filename() self.parse_max_size(); + self.parse_filename_extension() + self.parse_file_type() self.parsed_info.metadata.submission_date = datetime.date.today() return self.parsed_info @@ -61,20 +65,18 @@ class FileParser(object): self.parsed_info.add_error('Invalid characters were found in the name of the file which was just submitted: %s' % ', '.join(set(chars))) def parse_max_size(self): - __, ext = os.path.splitext(self.fd.name) - ext = ext.lstrip('.') - max_size = settings.IDSUBMIT_MAX_DRAFT_SIZE[ext] + max_size = settings.IDSUBMIT_MAX_DRAFT_SIZE[self.ext] if self.fd.size > max_size: self.parsed_info.add_error('File size is larger than the permitted maximum of %s' % filesizeformat(max_size)) self.parsed_info.metadata.file_size = self.fd.size - def parse_filename_extension(self, ext): - if not self.fd.name.lower().endswith('.'+ext): - self.parsed_info.add_error('Expected the %s file to have extension ".%s", found "%s"' % (ext.upper(), ext, self.fd.name)) + def parse_filename_extension(self): + if not self.fd.name.lower().endswith('.'+self.ext): + self.parsed_info.add_error('Expected the %s file to have extension ".%s", found the name "%s"' % (self.ext.upper(), self.ext, self.fd.name)) - def parse_file_type(self, ext, expected): + def parse_file_type(self): self.fd.file.seek(0) content = self.fd.file.read(4096) mimetype = magic.from_buffer(content, mime=True) - if not mimetype == expected: - self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (ext.upper(), expected, mimetype)) + if not mimetype == self.mimetype: + self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), self.mimetype, mimetype)) diff --git a/ietf/submit/parsers/pdf_parser.py b/ietf/submit/parsers/pdf_parser.py index 9a5213d6e..6062920bd 100644 --- a/ietf/submit/parsers/pdf_parser.py +++ b/ietf/submit/parsers/pdf_parser.py @@ -2,11 +2,11 @@ from ietf.submit.parsers.base import FileParser class PDFParser(FileParser): + ext = 'pdf' + mimetype = 'application/pdf' # If some error is found after this method invocation # no other file parsing is recommended def critical_parse(self): super(PDFParser, self).critical_parse() - self.parse_filename_extension('pdf') - self.parse_file_type('pdf', 'application/pdf') return self.parsed_info diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 042d38df1..62a64163f 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -4,6 +4,8 @@ from ietf.submit.parsers.base import FileParser class PlainParser(FileParser): + ext = 'txt' + mimetype = 'text/plain' def __init__(self, fd): super(PlainParser, self).__init__(fd) @@ -12,8 +14,6 @@ class PlainParser(FileParser): # no other file parsing is recommended def critical_parse(self): super(PlainParser, self).critical_parse() - self.parse_filename_extension('txt') - self.parse_file_type('txt', 'text/plain') self.parse_file_charset() self.parse_name() return self.parsed_info diff --git a/ietf/submit/parsers/ps_parser.py b/ietf/submit/parsers/ps_parser.py index aff71da2d..3597de05b 100644 --- a/ietf/submit/parsers/ps_parser.py +++ b/ietf/submit/parsers/ps_parser.py @@ -2,11 +2,11 @@ from ietf.submit.parsers.base import FileParser class PSParser(FileParser): + ext = 'ps' + mimetype = 'application/postscript' # If some error is found after this method invocation # no other file parsing is recommended def critical_parse(self): super(PSParser, self).critical_parse() - self.parse_filename_extension('ps') - self.parse_file_type('ps', 'application/postscript') return self.parsed_info diff --git a/ietf/submit/parsers/xml_parser.py b/ietf/submit/parsers/xml_parser.py index b584b1b46..a4beaecd2 100644 --- a/ietf/submit/parsers/xml_parser.py +++ b/ietf/submit/parsers/xml_parser.py @@ -2,12 +2,12 @@ from ietf.submit.parsers.base import FileParser class XMLParser(FileParser): + ext = 'xml' + mimetype = 'application/xml' # If some error is found after this method invocation # no other file parsing is recommended def critical_parse(self): super(XMLParser, self).critical_parse() - self.parse_filename_extension('xml') - self.parse_file_type('xml', 'application/xml') return self.parsed_info diff --git a/ietf/submit/test_submission.bad b/ietf/submit/test_submission.bad new file mode 100644 index 0000000000000000000000000000000000000000..33261ae14abaaafa514d9311ae2f4091f1e24968 GIT binary patch literal 11 Scmb<-^>JfjWMp7q-~s>*F#%-& literal 0 HcmV?d00001 diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py index d06c40ff0..85e8386a3 100644 --- a/ietf/submit/tests.py +++ b/ietf/submit/tests.py @@ -726,6 +726,61 @@ class SubmitTests(TestCase): q = PyQuery(r.content) self.assertEqual(len(q('input[type=file][name=txt]')), 1) + def submit_bad_file(self, name, formats): + + make_test_data() + + rev = "" + group = None + + # break early in case of missing configuration + self.assertTrue(os.path.exists(settings.IDSUBMIT_IDNITS_BINARY)) + + # get + url = urlreverse('submit_upload_submission') + r = self.client.get(url) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + + # submit + files = {} + for format in formats: + files[format] = self.submission_file(name, rev, group, "bad", "test_submission.bad") + + r = self.client.post(url, files) + + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertTrue(len(q("form .has-error")) > 0) + m = q('div.has-error span.help-block').text() + + return r, q, m + + def test_submit_bad_file_txt(self): + r, q, m = self.submit_bad_file("some name", ["txt"]) + self.assertIn('Invalid characters were found in the name', m) + self.assertIn('Expected the TXT file to have extension ".txt"', m) + self.assertIn('Expected an TXT file of type "text/plain"', m) + self.assertIn('document does not contain a legitimate name', m) + + def test_submit_bad_file_xml(self): + r, q, m = self.submit_bad_file("some name", ["xml"]) + self.assertIn('Invalid characters were found in the name', m) + self.assertIn('Expected the XML file to have extension ".xml"', m) + self.assertIn('Expected an XML file of type "application/xml"', m) + + def test_submit_bad_file_pdf(self): + r, q, m = self.submit_bad_file("some name", ["pdf"]) + self.assertIn('Invalid characters were found in the name', m) + self.assertIn('Expected the PDF file to have extension ".pdf"', m) + self.assertIn('Expected an PDF file of type "application/pdf"', m) + + def test_submit_bad_file_ps(self): + r, q, m = self.submit_bad_file("some name", ["ps"]) + self.assertIn('Invalid characters were found in the name', m) + self.assertIn('Expected the PS file to have extension ".ps"', m) + self.assertIn('Expected an PS file of type "application/postscript"', m) + class ApprovalsTestCase(TestCase): def test_approvals(self): make_test_data() diff --git a/static/.gitignore b/static/.gitignore index a74b07aee..79ea68485 100644 --- a/static/.gitignore +++ b/static/.gitignore @@ -1 +1,2 @@ +/* /*.pyc