From c0f0d2c2378824ba14c54c88fff975fc7d82b905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 08:26:12 +0000 Subject: [PATCH] Two levels of parsing. Fixes #584 - Legacy-Id: 2819 --- ietf/submit/forms.py | 21 +++---------- ietf/submit/parsers/base.py | 23 ++++++++++++-- ietf/submit/parsers/pdf_parser.py | 3 +- ietf/submit/parsers/plain_parser.py | 48 ++++++++++++++++++++++++----- ietf/submit/parsers/ps_parser.py | 3 +- ietf/submit/parsers/xml_parser.py | 3 +- ietf/submit/urls.py | 2 -- ietf/submit/views.py | 12 +------- 8 files changed, 72 insertions(+), 43 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index dc806fb2e..3bee4b43a 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -1,21 +1,8 @@ import datetime -from email.utils import parseaddr from django import forms -from django.conf import settings -from django.db.models import Q -from django.forms.util import ErrorList -from django.forms.fields import email_re from django.template.loader import render_to_string -from ietf.liaisons.accounts import (can_add_outgoing_liaison, can_add_incoming_liaison, - get_person_for_user, is_ietf_liaison_manager) -from ietf.liaisons.models import LiaisonDetail, Uploads, OutgoingLiaisonApproval, SDOs -from ietf.liaisons.utils import IETFHM -from ietf.liaisons.widgets import (FromWidget, ReadOnlyWidget, ButtonWidget, - ShowAttachmentsWidget, RelatedLiaisonWidget) - - from ietf.proceedings.models import Meeting from ietf.submit.parsers.plain_parser import PlainParser from ietf.submit.parsers.pdf_parser import PDFParser @@ -85,22 +72,22 @@ class UploadForm(forms.Form): yield fieldset_dict def clean_txt(self): - parsed_info = PlainParser(self.cleaned_data['txt']).parse_critical() + parsed_info = PlainParser(self.cleaned_data['txt']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_pdf(self): - parsed_info = PDFParser(self.cleaned_data['pdf']).parse_critical() + parsed_info = PDFParser(self.cleaned_data['pdf']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_ps(self): - parsed_info = PSParser(self.cleaned_data['ps']).parse_critical() + parsed_info = PSParser(self.cleaned_data['ps']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_xml(self): - parsed_info = XMLParser(self.cleaned_data['xml']).parse_critical() + parsed_info = XMLParser(self.cleaned_data['xml']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 92578ded2..6571fdfa1 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -5,11 +5,18 @@ import re CUTOFF_HOUR = 17 +class MetaDataDraft(object): + revision = None + filename = None + group = None + + class ParseInfo(object): def __init__(self): self.errors = [] self.warnings = {} + self.metadraft = MetaDataDraft() def add_error(self, error_str): self.errors.append(error_str) @@ -25,7 +32,7 @@ class FileParser(object): self.fd = fd self.parsed_info = ParseInfo() - def parse_critical(self): + def parse(self): if not self.fd: return self.parsed_info for attr in dir(self): @@ -33,9 +40,19 @@ class FileParser(object): method = getattr(self, attr, None) if callable(method): method() - return self.parsed_info + # If some critical parsing has returned an error do not continue + if self.parsed_info.errors: + return self.parsed_info + # Continue with non critical parsing, note that they also can return errors + for attr in dir(self): + if attr.startswith('parse_normal_'): + method = getattr(self, attr, None) + if callable(method): + method() + if self.parsed_info.errors: + return self.parsed_info - def parse_critical_invalid_chars_in_filename(self): + def parse_critical_000_invalid_chars_in_filename(self): name = self.fd.name regexp = re.compile(r'&|\|\/|;|\*|\s|\$') chars = regexp.findall(name) diff --git a/ietf/submit/parsers/pdf_parser.py b/ietf/submit/parsers/pdf_parser.py index b7b7659ca..31e22ffa9 100644 --- a/ietf/submit/parsers/pdf_parser.py +++ b/ietf/submit/parsers/pdf_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class PDFParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.pdf'): self.parsed_info.add_error('Format of this document must be PDF') diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 3979f9ed4..6f560feaa 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -1,36 +1,70 @@ import re +from ietf.idtracker.models import InternetDraft from ietf.submit.error_manager import MainErrorManager from ietf.submit.parsers.base import FileParser MAX_PLAIN_FILE_SIZE = 6000000 +NONE_WG_PK = 1027 + class PlainParser(FileParser): - + def parse_critical_max_size(self): if self.fd.size > MAX_PLAIN_FILE_SIZE: self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) - def parse_critical_file_charset(self): + def parse_critical_001_file_charset(self): import magic self.fd.file.seek(0) m = magic.open(magic.MAGIC_MIME) m.load() - filetype=m.buffer(self.fd.file.read()) + filetype = m.buffer(self.fd.file.read()) if not 'ascii' in filetype: - self.parsed_info.add_error('A plain text document must be submitted.'); + self.parsed_info.add_error('A plain text document must be submitted.') - def parse_filename(self): + def parse_critical_002_filename(self): self.fd.file.seek(0) draftre = re.compile('(draft-\S+)') + revisionre = re.compile('.*-(\d+)$') limit = 80 while limit: + limit -= 1 line = self.fd.readline() - match = draftre.match(line) + match = draftre.search(line) if not match: continue filename = match.group(0) filename = re.sub('^[^\w]+', '', filename) filename = re.sub('[^\w]+$', '', filename) filename = re.sub('\.txt$', '', filename) - line = re.sub('^[^\w]+', '') + extra_chars = re.sub('[0-9a-z\-]', '', filename) + if extra_chars: + self.parsed_info.add_error('Filename contains non alpha-numeric character: %s' % ', '.join(set(extra_chars))) + match_revision = revisionre.match(filename) + if match_revision: + self.parsed_info.metadraft.revision = match_revision.group(0) + filename = re.sub('-\d+$', '', filename) + self.parsed_info.metadraft.filename = filename + return + self.parsed_info.add_error(MainErrorManager.get_error_str('INVALID_FILENAME')) + + def parse_critical_003_wg(self): + filename = self.parsed_info.metadraft.filename + try: + existing_draft = InternetDraft.objects.get(filename=filename) + self.parsed_info.metadraft.wg = existing_draft.group + except InternetDraft.DoesNotExist: + if filename.startswith('draft-ietf-'): + # Extra check for WG that contains dashes + for group in IETFWG.objects.filter(group_acronym__acronym__contains='-'): + if filename.startswith('draft-ietf-%s-' % group.group_acronym.acronym): + self.parsed_info.metadraft.wg = group + return + group_acronym = filename.split('-')[2] + try: + self.parsed_info.metadraft.wg = IETFWG.objects.get(group_acronym__acronym=group_acronym) + except IETFWG.DoesNotExist: + self.parsed_info.add_error('Invalid WG ID: %s' % group_acronym) + else: + self.parsed_info.metadraft.wg = IETFWG.objects.get(pk=NONE_WG_PK) diff --git a/ietf/submit/parsers/ps_parser.py b/ietf/submit/parsers/ps_parser.py index e3d0cea74..e8655bd6c 100644 --- a/ietf/submit/parsers/ps_parser.py +++ b/ietf/submit/parsers/ps_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class PSParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.ps'): self.parsed_info.add_error('Format of this document must be PS') diff --git a/ietf/submit/parsers/xml_parser.py b/ietf/submit/parsers/xml_parser.py index 2edd9e08c..93327e211 100644 --- a/ietf/submit/parsers/xml_parser.py +++ b/ietf/submit/parsers/xml_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class XMLParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.xml'): self.parsed_info.add_error('Format of this document must be XML') diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index d28d5e308..440f0a4de 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -1,6 +1,4 @@ from django.conf.urls.defaults import patterns, url -from django.db.models import Q -from ietf.liaisons.models import LiaisonDetail urlpatterns = patterns('ietf.submit.views', diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 40a5b4c8b..a9c3fb103 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -1,16 +1,6 @@ # Copyright The IETF Trust 2007, All Rights Reserved -import datetime -from email.utils import parseaddr - -from django.conf import settings -from django.core.urlresolvers import reverse -from django.db.models import Q -from django.forms.fields import email_re -from django.http import HttpResponse, HttpResponseRedirect -from django.shortcuts import render_to_response, get_object_or_404 +from django.shortcuts import render_to_response from django.template import RequestContext -from django.utils import simplejson -from django.views.generic.list_detail import object_list, object_detail from ietf.submit.forms import UploadForm