From 8c5ffce3eb8cfe96612578c00a047928c91b96b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Thu, 27 Jan 2011 19:35:59 +0000 Subject: [PATCH 01/61] Submit application skel, some direct templates and a very basic form. Fixes #575 - Legacy-Id: 2766 --- ietf/submit/__init__.py | 0 ietf/submit/forms.py | 49 ++++++++ ietf/submit/urls.py | 22 ++++ ietf/submit/views.py | 30 +++++ ietf/templates/base_leftmenu.html | 2 +- ietf/templates/submit/note_well.html | 27 +++++ ietf/templates/submit/submit_base.html | 26 ++++ ietf/templates/submit/submit_index.html | 20 ++++ ietf/templates/submit/submitform.html | 50 ++++++++ ietf/templates/submit/tool_instructions.html | 118 +++++++++++++++++++ ietf/urls.py | 1 + 11 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 ietf/submit/__init__.py create mode 100644 ietf/submit/forms.py create mode 100644 ietf/submit/urls.py create mode 100644 ietf/submit/views.py create mode 100644 ietf/templates/submit/note_well.html create mode 100644 ietf/templates/submit/submit_base.html create mode 100644 ietf/templates/submit/submit_index.html create mode 100644 ietf/templates/submit/submitform.html create mode 100644 ietf/templates/submit/tool_instructions.html diff --git a/ietf/submit/__init__.py b/ietf/submit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py new file mode 100644 index 000000000..cee907c41 --- /dev/null +++ b/ietf/submit/forms.py @@ -0,0 +1,49 @@ +import datetime +from email.utils import parseaddr + +from django import forms +from django.conf import settings +from django.db.models import Q +from django.forms.util import ErrorList +from django.forms.fields import email_re +from django.template.loader import render_to_string + +from ietf.liaisons.accounts import (can_add_outgoing_liaison, can_add_incoming_liaison, + get_person_for_user, is_ietf_liaison_manager) +from ietf.liaisons.models import LiaisonDetail, Uploads, OutgoingLiaisonApproval, SDOs +from ietf.liaisons.utils import IETFHM +from ietf.liaisons.widgets import (FromWidget, ReadOnlyWidget, ButtonWidget, + ShowAttachmentsWidget, RelatedLiaisonWidget) + + +class UploadForm(forms.Form): + + txt = forms.FileField(label=u'.txt format', required=True) + xml = forms.FileField(label=u'.xml format', required=False) + pdf = forms.FileField(label=u'.pdf format', required=False) + ps = forms.FileField(label=u'.ps format', required=False) + + fieldsets = [('Upload a draft', ('txt', 'xml', 'pdf', 'ps'))] + + class Media: + css = {'all': ("/css/liaisons.css", )} + + def __unicode__(self): + return self.as_div() + + def as_div(self): + return render_to_string('submit/submitform.html', {'form': self}) + + def get_fieldsets(self): + if not self.fieldsets: + yield dict(name=None, fields=self) + else: + for fieldset, fields in self.fieldsets: + fieldset_dict = dict(name=fieldset, fields=[]) + for field_name in fields: + if field_name in self.fields.keyOrder: + fieldset_dict['fields'].append(self[field_name]) + if not fieldset_dict['fields']: + # if there is no fields in this fieldset, we continue to next fieldset + continue + yield fieldset_dict diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py new file mode 100644 index 000000000..d28d5e308 --- /dev/null +++ b/ietf/submit/urls.py @@ -0,0 +1,22 @@ +from django.conf.urls.defaults import patterns, url +from django.db.models import Q +from ietf.liaisons.models import LiaisonDetail + + +urlpatterns = patterns('ietf.submit.views', + url(r'^$', 'submit_index', name='submit_index'), + url(r'^status/$', 'submit_status', name='submit_status'), +) + +urlpatterns += patterns('django.views.generic.simple', + url(r'^note-well/$', 'direct_to_template', + {'template': 'submit/note_well.html', + 'extra_context': {'selected': 'notewell'} + }, + name='submit_note_well'), + url(r'^tool-instructions/$', 'direct_to_template', + {'template': 'submit/tool_instructions.html', + 'extra_context': {'selected': 'instructions'} + }, + name='submit_tool_instructions'), +) diff --git a/ietf/submit/views.py b/ietf/submit/views.py new file mode 100644 index 000000000..458b73835 --- /dev/null +++ b/ietf/submit/views.py @@ -0,0 +1,30 @@ +# Copyright The IETF Trust 2007, All Rights Reserved +import datetime +from email.utils import parseaddr + +from django.conf import settings +from django.core.urlresolvers import reverse +from django.db.models import Q +from django.forms.fields import email_re +from django.http import HttpResponse, HttpResponseRedirect +from django.shortcuts import render_to_response, get_object_or_404 +from django.template import RequestContext +from django.utils import simplejson +from django.views.generic.list_detail import object_list, object_detail + +from ietf.submit.forms import UploadForm + + +def submit_index(request): + if request.method == 'POST': + form = UploadForm(data=request.POST, files=request.FILES) + else: + form = UploadForm() + return render_to_response('submit/submit_index.html', + {'selected': 'index', + 'form': form}, + context_instance=RequestContext(request)) + + +def submit_status(request): + pass diff --git a/ietf/templates/base_leftmenu.html b/ietf/templates/base_leftmenu.html index a54a89c8d..d5de51256 100644 --- a/ietf/templates/base_leftmenu.html +++ b/ietf/templates/base_leftmenu.html @@ -64,7 +64,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  • Drafts & RFCs
  • Search
  • -
  • Submit a draft
  • +
  • Submit a draft
  • Meetings
  • diff --git a/ietf/templates/submit/note_well.html b/ietf/templates/submit/note_well.html new file mode 100644 index 000000000..48ef1f50e --- /dev/null +++ b/ietf/templates/submit/note_well.html @@ -0,0 +1,27 @@ +{% extends "submit/submit_base.html" %} +{% block title %}Note Well{% endblock %} + +{% block submit_content %} +

    Note Well

    +

    Any submission to the IETF intended by the Contributor for publication as all or part of an IETF Internet-Draft or RFC and any statement made within the context of an IETF activity is considered an "IETF Contribution". Such statements include oral statements in IETF sessions, as well as written and electronic communications made at any time or place, which are addressed to:

    + + + +

    All IETF Contributions are subject to the rules of RFC 5378 and RFC 3979 (updated by RFC 4879).

    + +

    Statements made outside of an IETF session, mailing list or other function, that are clearly not intended to be input to an IETF activity, group or function, are not IETF Contributions in the context of this notice.

    + +

    Please consult RFC 5378 and RFC 3979 for details.

    + +

    A participant in any IETF activity is deemed to accept all IETF rules of process, as documented in Best Current Practices RFCs and IESG Statements.

    + +

    A participant in any IETF activity acknowledges that written, audio and video records of meetings may be made and may be available to the public.

    +{% endblock %} diff --git a/ietf/templates/submit/submit_base.html b/ietf/templates/submit/submit_base.html new file mode 100644 index 000000000..4ffc1561e --- /dev/null +++ b/ietf/templates/submit/submit_base.html @@ -0,0 +1,26 @@ +{% extends "base.html" %} + +{% block morecss %} +.ietf-navset { + background:#214197 url(/images/yui/sprite.png) repeat-x left -1400px; + color:white; + border:1px solid black; + padding:4px; +} +.ietf-navset .selected { font-weight:bold; padding: 0 3px; } +.ietf-navset a, .ietf-navset a:visited { color: white; padding:0 3px; } +{% endblock %} + +{% block content %} +

    IETF Internet-Draft Submission

    + +
    +{% ifequal selected "index" %}Upload{% else %}Upload{% endifequal %} | +{% ifequal selected "status" %}Status{% else %}Status{% endifequal %} | +{% ifequal selected "instructions" %}Tool Instructions{% else %}Tool Instructions{% endifequal %} | +{% ifequal selected "notewell" %}NOTE WELL{% else %}NOTE WELL{% endifequal %} +
    + +{% block submit_content %} +{% endblock %} +{% endblock %} diff --git a/ietf/templates/submit/submit_index.html b/ietf/templates/submit/submit_index.html new file mode 100644 index 000000000..604d079ad --- /dev/null +++ b/ietf/templates/submit/submit_index.html @@ -0,0 +1,20 @@ +{% extends "submit/submit_base.html" %} +{% block title %}Upload{% endblock %} + +{% block pagehead %} +{{ form.media }} +{% endblock %} + +{% block submit_content %} +

    This page is used to submit IETF Internet-Drafts to the Internet-Draft repository. The list of current Internet-Drafts can be accessed at http://www.ietf.org/ietf/1id-abstracts.txt

    +

    Internet-Drafts are working documents of the Internet Engineering Task Force (IETF), its areas, and its working groups. Note that other groups may also distribute working documents as Internet-Drafts.

    +

    Internet-Drafts are draft documents, and are valid for a maximum of six months. They may be updated, replaced, or obsoleted by other documents at any time.

    +

    If you run into problems when submitting an Internet-Draft using this and the following pages, you may alternatively submit your draft by email to internet-drafts@ietf.org. However, be advised that manual processing always takes additional time.

    + +{{ form }} + +

    +The IETF is an organized activity of the Internet Society +
    Please send problem reports to ietf-action@ietf.org. +

    +{% endblock %} diff --git a/ietf/templates/submit/submitform.html b/ietf/templates/submit/submitform.html new file mode 100644 index 000000000..9c43ab0a9 --- /dev/null +++ b/ietf/templates/submit/submitform.html @@ -0,0 +1,50 @@ +{% load i18n %} + +
    + + + +
    + {% if form.errors %} +
    + Please correct the errors below. +
    + {% endif %} +{% for fieldset in form.get_fieldsets %} + {% if fieldset.name %} +
    +

    {{ fieldset.name }}

    + {% endif %} + + {% for field in fieldset.fields %} +
    + +
    +
    {{ field.help_text }}
    + {{ field }} + {{ field.errors }} +
    +
    +
    + {% endfor %} + + {% if fieldset.name %} +
    + {% endif %} +{% endfor %} + +
    + +
    +
    + +
    diff --git a/ietf/templates/submit/tool_instructions.html b/ietf/templates/submit/tool_instructions.html new file mode 100644 index 000000000..dfec6c555 --- /dev/null +++ b/ietf/templates/submit/tool_instructions.html @@ -0,0 +1,118 @@ +{% extends "submit/submit_base.html" %} +{% block title %}Note Well{% endblock %} + +{% block submit_content %} +

    I-D Submission Tool Instructions

    +

    Tool URL: http://datatracker.ietf.org/{% url submit_index %}

    +This page will explain the purpose and content of each screen in the I-D Submission Tool, and the actions that result by clicking the form buttons on each screen.
    + +The specification for this tool can be found in RFC 4228. +
      +
    1. +Upload Screen +

      +The Upload screen is the first screen that a user will see when he or she starts the I-D submission process. A user can submit four different formats of an I-D, plain text, XML, PDF, and postscript, at the same time. Failure to submit a plain-text version will cause an error, and an error screen will be displayed. +

      +

      +Form buttons and resulting actions: + + + + + + + + + + + + + + + + + + +
      .txt format: Button to select a plain-text file of an I-D from a user's local file system. A plain-text version is mandatory and leaving the field blank will cause an error.
      .xml format: Button to select an XML file of an I-D from a user's local file system.
      .pdf format: Button to select a PDF file of an I-D from a user's local file system.
      .ps format: Button to select a postscript file of an I-D from a user's local file system.
      Upload:Button to upload the document(s). The tool will begin parsing the plain-text document and validate the document. The parsed meta-data will be displayed for user confirmation along with the validation results.
      +

    2. +
    3. +Validation Screen +

      +After a user uploads a plain-text version, or multiple versions of an I-D, the tool will parse the plain-text version, validate the I-D, and display the validation results with option(s) for next steps. The validation includes: checking for all IPR-related notices and I-D boilerplate described in Guidelines to Authors of Internet-Drafts; the required sections described in the I-D Check List; the version number; and the creation date. +

      +

      + +If the submission does not have any validation errors, then the user will be allowed to proceed with the automated posting process. This process will begin with submitter authentication, which will be done by e-mail. +

      +

      +A user must carefully examine the meta-data that are displayed on this screen, and make sure that these data were extracted correctly. If the data were not extracted correctly, then the user can correct the errors via the Adjust page. In such a case, the user will pass the draft to the Secretariat for manual posting. +

      +Form buttons and resulting actions:
      + + + + + + + + + + + + + + + + + + + + +
      Adjust Meta-Data: Button to proceed to a screen with editable form fields for correcting the meta-data. A user can use this button to request manual posting by the Secretariat.
      Cancel: Button to cancel the current submission. A user will be prompted for a confirmation before the submission is canceled. Once confirmed, the current submission will be canceled, the uploaded document(s) will be deleted permanently from the server, and a notification message will be sent to all authors with the IP address of the user who just canceled the submission.

      When no meta-data error is detected:

      Button(s) with Author's name(s): Button(s) to automatically fill out the given name, family name, and email address of the authors. If the submitter is one of the authors, then the submitter's information will be automatically inserted in the appropriate fields. If the submitter is not one of the authors, then the submitter will need to manually fill out these fields.
      Post Now: Button to start the automated posting process with submitter authentication. Once clicked, an email message will be sent to the submitter whose email address was provided within the form. The submitter will need to open the email message via his or her email application, and click the link provided in the message body. +

      + +Once a link in the email body is clicked, the document gets pushed to the IETF Web and FTP sites, a notification is sent to the authors of the document, and an I-D Action announcement will be sent out within the next 15 minutes. +

      +If the document requires an additional approval from a chair of a working group, i.e., for submission of a 00 version of a working group document, then a message will be sent to the chairs of the working group for the approval. Once approved, the document will be immediately announced and available via the IETF Web and FTP sites.
      +
    4. +
    5. +Adjust Screen +

      +This is the screen where a user can adjust any meta-data that could have been incorrectly parsed from the submitted document. The document with adjusted meta-data will be submitted to the Secretariat for manual posting. +

      +Form buttons and resulting actions:
      + + + + + + + + + + +
      Button(s) with Author's name(s): Button(s) to automatically fill out the given name, family name, and email address of the authors. If the submitter is one of the authors, then the submitter's information will be automatically inserted in the appropriate fields. If the submitter is not one of the authors, then the submitter will need to manually fill out these fields.
      Submit for manual posting: Button to send a manual posting request to the Secretariat including any corrected meta-data and comments for the Secretariat. Once clicked, a notification message will be sent to the Secretariat, and a receipt page will be displayed. +
      Cancel: Button to cancel the current submission. A user will be prompted for a confirmation before the submission is canceled. Once confirmed, the current submission will be canceled, the uploaded document(s) will be deleted permanently from the server, and a notification message will be sent to all authors with the IP address of the user who just canceled the submission.
      +
    6. +
    7. + +Status Screen +

      +The Status screen is the screen where a user can view the current status of a document that has just been submitted by the user, or a document that was submitted previously via the tool. If a link 'Status' is clicked from the tool's first page, then a form field will be provided for a user to look up a document by filename. +

      +Form buttons and resulting actions:
      + + + + +
      Cancel: Button to cancel the current submission. This button will be displayed only when the document is in the process of being submitted. A user will be prompted for a confirmation before the submission is canceled. Once confirmed, the current submission will be canceled, the uploaded document(s) will be deleted permanently from the server, and a notification message will be sent to all authors with the IP address of the user who just canceled the submission.
      +
    8. +
    9. +Problem Report + +

      +Please send problem reports to ietf-action@ietf.org. +

      +
    10. +
    +{% endblock %} diff --git a/ietf/urls.py b/ietf/urls.py index 41ce897bc..413604ecc 100644 --- a/ietf/urls.py +++ b/ietf/urls.py @@ -55,6 +55,7 @@ urlpatterns = patterns('', (r'^accounts/', include('ietf.ietfauth.urls')), (r'^doc/', include('ietf.idrfc.urls')), (r'^wg/', include('ietf.wginfo.urls')), + (r'^submit/', include('ietf.submit.urls')), (r'^$', 'ietf.idrfc.views.main'), ('^admin/', include(admin.site.urls)), From 4bc95f643628d783c730f719727164986d76d2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Thu, 3 Feb 2011 11:46:04 +0000 Subject: [PATCH 02/61] Cutoff dates and basic checkings over the plain file. See #580 - Legacy-Id: 2806 --- ietf/settings.py | 1 + ietf/submit/error_manager.py | 19 ++++++++ ietf/submit/forms.py | 64 ++++++++++++++++++++++++++ ietf/submit/models.py | 42 +++++++++++++++++ ietf/submit/parsers/__init__.py | 0 ietf/submit/parsers/base.py | 43 +++++++++++++++++ ietf/submit/parsers/pdf_parser.py | 7 +++ ietf/submit/parsers/plain_parser.py | 36 +++++++++++++++ ietf/submit/parsers/ps_parser.py | 7 +++ ietf/submit/parsers/xml_parser.py | 7 +++ ietf/submit/views.py | 2 + ietf/templates/submit/submit_base.html | 7 +++ ietf/templates/submit/submitform.html | 2 + 13 files changed, 237 insertions(+) create mode 100644 ietf/submit/error_manager.py create mode 100644 ietf/submit/models.py create mode 100644 ietf/submit/parsers/__init__.py create mode 100644 ietf/submit/parsers/base.py create mode 100644 ietf/submit/parsers/pdf_parser.py create mode 100644 ietf/submit/parsers/plain_parser.py create mode 100644 ietf/submit/parsers/ps_parser.py create mode 100644 ietf/submit/parsers/xml_parser.py diff --git a/ietf/settings.py b/ietf/settings.py index aecc2fb73..be418e92d 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -131,6 +131,7 @@ INSTALLED_APPS = ( 'ietf.redirects', 'ietf.idrfc', 'ietf.wginfo', + 'ietf.submit', ) INTERNAL_IPS = ( diff --git a/ietf/submit/error_manager.py b/ietf/submit/error_manager.py new file mode 100644 index 000000000..b4dcefd8f --- /dev/null +++ b/ietf/submit/error_manager.py @@ -0,0 +1,19 @@ +from ietf.submit.models import IdSubmissionStatus + +class ErrorManager(object): + ERROR_CODES = { + 'DEFAULT': 'Unknow error', + 'INVALID_FILENAME': 111, + 'EXCEEDED_SIZE': 102, + } + + def get_error_str(self, key): + error_code = self.ERROR_CODES.get(key, self.ERROR_CODES['DEFAULT']) + if isinstance(error_code, basestring): + return '%s (%s)' % (key, error_code) + try: + return IdSubmissionStatus.objects.get(status_id=error_code).status_value + except IdSubmissionStatus.DoesNotExist: + return '%s (%s)' % (self.ERROR_CODES['DEFAULT'], key) + +MainErrorManager=ErrorManager() diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index cee907c41..69acd0573 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -16,6 +16,16 @@ from ietf.liaisons.widgets import (FromWidget, ReadOnlyWidget, ButtonWidget, ShowAttachmentsWidget, RelatedLiaisonWidget) +from ietf.submit.models import IdSubmitDateConfig +from ietf.submit.parsers.plain_parser import PlainParser +from ietf.submit.parsers.pdf_parser import PDFParser +from ietf.submit.parsers.ps_parser import PSParser +from ietf.submit.parsers.xml_parser import XMLParser + + +CUTOFF_HOUR = 17 + + class UploadForm(forms.Form): txt = forms.FileField(label=u'.txt format', required=True) @@ -28,6 +38,35 @@ class UploadForm(forms.Form): class Media: css = {'all': ("/css/liaisons.css", )} + def __init__(self, *args, **kwargs): + super(UploadForm, self).__init__(*args, **kwargs) + self.in_first_cut_off = False + self.shutdown = False + self.read_dates() + + def read_dates(self): + now = datetime.datetime.now() + first_cut_off = IdSubmitDateConfig.get_first_cut_off() + second_cut_off = IdSubmitDateConfig.get_second_cut_off() + ietf_monday = IdSubmitDateConfig.get_ietf_monday() + processed_ids_date = IdSubmitDateConfig.get_processed_ids_date() + monday_after_ietf = IdSubmitDateConfig.get_monday_after_ietf() + list_aproved_date = IdSubmitDateConfig.get_list_aproved_date() + + if now.date() >= first_cut_off and now.date() < second_cut_off: # We are in the first_cut_off + if now.date() == first_cut_off and now.hour < CUTOFF_HOUR: + self.cutoff_warning = 'The pre-meeting cutoff date for new documents (i.e., version -00 Internet-Drafts) is %s at 5 PM (PT). You will not be able to submit a new document after this time until %s, at midnight' % (first_cut_off, ietf_monday) + else: # No 00 version allowed + self.cutoff_warning = 'The pre-meeting cutoff date for new documents (i.e., version -00 Internet-Drafts) was %s at 5 PM (PT). You will not be able to submit a new document until %s, at midnight.
    You can still submit a version -01 or higher Internet-Draft until 5 PM (PT), %s' % (first_cut_off, ietf_monday, second_cut_off) + self.in_first_cut_off = True + elif now.date() >= second_cut_off and now.date() < ietf_monday: + if now.date() == second_cut_off and now.hour < CUTOFF_HOUR: # We are in the first_cut_off yet + self.cutoff_warning = 'The pre-meeting cutoff date for new documents (i.e., version -00 Internet-Drafts) was %s at 5 PM (PT). You will not be able to submit a new document until %s, at midnight.
    The I-D submission tool will be shut down at 5 PM (PT) today, and reopened at midnight (PT), %s' % (first_cut_off, ietf_monday, ietf_monday) + self.in_first_cut_off = True + else: # Completely shut down of the tool + self.cutoff_warning = 'The cut off time for the I-D submission was 5 PM (PT), %s.
    The I-D submission tool will be reopened at midnight, %s' % (second_cut_off, ietf_monday) + self.shutdown = True + def __unicode__(self): return self.as_div() @@ -47,3 +86,28 @@ class UploadForm(forms.Form): # if there is no fields in this fieldset, we continue to next fieldset continue yield fieldset_dict + + def clean_txt(self): + parsed_info = PlainParser(self.cleaned_data['txt']).parse_critical() + if parsed_info.errors: + raise forms.ValidationError(parsed_info.errors) + + def clean_pdf(self): + parsed_info = PDFParser(self.cleaned_data['pdf']).parse_critical() + if parsed_info.errors: + raise forms.ValidationError(parsed_info.errors) + + def clean_ps(self): + parsed_info = PSParser(self.cleaned_data['ps']).parse_critical() + if parsed_info.errors: + raise forms.ValidationError(parsed_info.errors) + + def clean_xml(self): + parsed_info = XMLParser(self.cleaned_data['xml']).parse_critical() + if parsed_info.errors: + raise forms.ValidationError(parsed_info.errors) + + def clean(self): + if self.shutdown: + raise forms.ValidationError('The tool is shut down') + return super(UploadForm, self).clean() diff --git a/ietf/submit/models.py b/ietf/submit/models.py new file mode 100644 index 000000000..22be5e1f7 --- /dev/null +++ b/ietf/submit/models.py @@ -0,0 +1,42 @@ +from django.db import models + +class IdSubmissionStatus(models.Model): + status_id = models.IntegerField(primary_key=True) + status_value = models.CharField(blank=True, max_length=255) + + class Meta: + db_table = 'id_submission_status' + + +class IdSubmitDateConfig(models.Model): + id = models.IntegerField(primary_key=True) + id_date = models.DateField(null=True, blank=True) + date_name = models.CharField(blank=True, max_length=255) + f_name = models.CharField(blank=True, max_length=255) + + class Meta: + db_table = 'id_dates' + + @classmethod + def get_first_cut_off(cls): + return cls.objects.get(id=1).id_date + + @classmethod + def get_second_cut_off(cls): + return cls.objects.get(id=2).id_date + + @classmethod + def get_ietf_monday(cls): + return cls.objects.get(id=3).id_date + + @classmethod + def get_processed_ids_date(cls): + return cls.objects.get(id=4).id_date + + @classmethod + def get_monday_after_ietf(cls): + return cls.objects.get(id=5).id_date + + @classmethod + def get_list_aproved_date(cls): + return cls.objects.get(id=6).id_date diff --git a/ietf/submit/parsers/__init__.py b/ietf/submit/parsers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py new file mode 100644 index 000000000..92578ded2 --- /dev/null +++ b/ietf/submit/parsers/base.py @@ -0,0 +1,43 @@ +import datetime +import re + + +CUTOFF_HOUR = 17 + + +class ParseInfo(object): + + def __init__(self): + self.errors = [] + self.warnings = {} + + def add_error(self, error_str): + self.errors.append(error_str) + + def add_warning(self, warning_type, warning_str): + warn_list = self.warnings.get(warning_type, []) + self.warnings[warning_type] = warn_list + [warning_str] + + +class FileParser(object): + + def __init__(self, fd): + self.fd = fd + self.parsed_info = ParseInfo() + + def parse_critical(self): + if not self.fd: + return self.parsed_info + for attr in dir(self): + if attr.startswith('parse_critical_'): + method = getattr(self, attr, None) + if callable(method): + method() + return self.parsed_info + + def parse_critical_invalid_chars_in_filename(self): + name = self.fd.name + regexp = re.compile(r'&|\|\/|;|\*|\s|\$') + chars = regexp.findall(name) + if chars: + self.parsed_info.add_error('Invalid characters were found in the name of the file which was just submitted: %s' % ', '.join(set(chars))) diff --git a/ietf/submit/parsers/pdf_parser.py b/ietf/submit/parsers/pdf_parser.py new file mode 100644 index 000000000..b7b7659ca --- /dev/null +++ b/ietf/submit/parsers/pdf_parser.py @@ -0,0 +1,7 @@ +from ietf.submit.parsers.base import FileParser + +class PDFParser(FileParser): + + def parse_critical_filename_extension(self): + if not self.fd.name.endswith('.pdf'): + self.parsed_info.add_error('Format of this document must be PDF') diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py new file mode 100644 index 000000000..3979f9ed4 --- /dev/null +++ b/ietf/submit/parsers/plain_parser.py @@ -0,0 +1,36 @@ +import re + +from ietf.submit.error_manager import MainErrorManager +from ietf.submit.parsers.base import FileParser + +MAX_PLAIN_FILE_SIZE = 6000000 + +class PlainParser(FileParser): + + def parse_critical_max_size(self): + if self.fd.size > MAX_PLAIN_FILE_SIZE: + self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) + + def parse_critical_file_charset(self): + import magic + self.fd.file.seek(0) + m = magic.open(magic.MAGIC_MIME) + m.load() + filetype=m.buffer(self.fd.file.read()) + if not 'ascii' in filetype: + self.parsed_info.add_error('A plain text document must be submitted.'); + + def parse_filename(self): + self.fd.file.seek(0) + draftre = re.compile('(draft-\S+)') + limit = 80 + while limit: + line = self.fd.readline() + match = draftre.match(line) + if not match: + continue + filename = match.group(0) + filename = re.sub('^[^\w]+', '', filename) + filename = re.sub('[^\w]+$', '', filename) + filename = re.sub('\.txt$', '', filename) + line = re.sub('^[^\w]+', '') diff --git a/ietf/submit/parsers/ps_parser.py b/ietf/submit/parsers/ps_parser.py new file mode 100644 index 000000000..e3d0cea74 --- /dev/null +++ b/ietf/submit/parsers/ps_parser.py @@ -0,0 +1,7 @@ +from ietf.submit.parsers.base import FileParser + +class PSParser(FileParser): + + def parse_critical_filename_extension(self): + if not self.fd.name.endswith('.ps'): + self.parsed_info.add_error('Format of this document must be PS') diff --git a/ietf/submit/parsers/xml_parser.py b/ietf/submit/parsers/xml_parser.py new file mode 100644 index 000000000..2edd9e08c --- /dev/null +++ b/ietf/submit/parsers/xml_parser.py @@ -0,0 +1,7 @@ +from ietf.submit.parsers.base import FileParser + +class XMLParser(FileParser): + + def parse_critical_filename_extension(self): + if not self.fd.name.endswith('.xml'): + self.parsed_info.add_error('Format of this document must be XML') diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 458b73835..40a5b4c8b 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -18,6 +18,8 @@ from ietf.submit.forms import UploadForm def submit_index(request): if request.method == 'POST': form = UploadForm(data=request.POST, files=request.FILES) + if form.is_valid(): + pass else: form = UploadForm() return render_to_response('submit/submit_index.html', diff --git a/ietf/templates/submit/submit_base.html b/ietf/templates/submit/submit_base.html index 4ffc1561e..e89d599a2 100644 --- a/ietf/templates/submit/submit_base.html +++ b/ietf/templates/submit/submit_base.html @@ -9,6 +9,7 @@ } .ietf-navset .selected { font-weight:bold; padding: 0 3px; } .ietf-navset a, .ietf-navset a:visited { color: white; padding:0 3px; } +.cutoff-warning { border: 1px dashed red; background-color: #ffeeaa; padding: 1em 2em; margin: 1em 0px; } {% endblock %} {% block content %} @@ -21,6 +22,12 @@ {% ifequal selected "notewell" %}NOTE WELL{% else %}NOTE WELL{% endifequal %} +{% if form.cutoff_warning %} +
    +{{ form.cutoff_warning|safe }} +
    +{% endif %} + {% block submit_content %} {% endblock %} {% endblock %} diff --git a/ietf/templates/submit/submitform.html b/ietf/templates/submit/submitform.html index 9c43ab0a9..5e889674b 100644 --- a/ietf/templates/submit/submitform.html +++ b/ietf/templates/submit/submitform.html @@ -42,9 +42,11 @@ {% endif %} {% endfor %} +{% if not form.shutdown %}
    +{% endif %} From c0e1084a2066a8666ac305cab524c8e07ce2f3a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Tarrag=C3=B3n?= Date: Mon, 7 Feb 2011 17:08:33 +0000 Subject: [PATCH 03/61] fixing cutoff logic on submission. Closes #583. - Legacy-Id: 2817 --- ietf/proceedings/models.py | 20 +++++++++++++++++++- ietf/settings.py | 4 ++++ ietf/submit/forms.py | 11 ++++------- ietf/submit/models.py | 34 ---------------------------------- 4 files changed, 27 insertions(+), 42 deletions(-) diff --git a/ietf/proceedings/models.py b/ietf/proceedings/models.py index 992b50519..d25d5d9dc 100644 --- a/ietf/proceedings/models.py +++ b/ietf/proceedings/models.py @@ -1,6 +1,7 @@ # Copyright The IETF Trust 2007, All Rights Reserved from django.db import models +from django.conf import settings from ietf.idtracker.models import Acronym, PersonOrOrgInfo, IRTF, AreaGroup, Area, IETFWG import datetime #from ietf.utils import log @@ -141,7 +142,7 @@ class Meeting(models.Model): overview1 = models.TextField(blank=True) overview2 = models.TextField(blank=True) def __str__(self): - return "IETF %s" % (self.meeting_num) + return "IETF %s" % (self.meeting_num) def get_meeting_date (self,offset): return self.start_date + datetime.timedelta(days=offset) def num(self): @@ -149,6 +150,23 @@ class Meeting(models.Model): class Meta: db_table = 'meetings' + @classmethod + def get_first_cut_off(cls): + start_date = cls.objects.all().order_by('-start_date')[0].start_date + offset = datetime.timedelta(days=settings.FIRST_CUTOFF_DAYS) + return start_date - offset + + @classmethod + def get_second_cut_off(cls): + start_date = cls.objects.all().order_by('-start_date')[0].start_date + offset = datetime.timedelta(days=settings.SECOND_CUTOFF_DAYS) + return start_date - offset + + @classmethod + def get_ietf_monday(cls): + start_date = cls.objects.all().order_by('-start_date')[0].start_date + return start_date + datetime.timedelta(days=-start_date.weekday(), weeks=1) + class MeetingVenue(models.Model): meeting_num = models.ForeignKey(Meeting, db_column='meeting_num', unique=True) break_area_name = models.CharField(max_length=255) diff --git a/ietf/settings.py b/ietf/settings.py index be418e92d..7552997a5 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -188,6 +188,10 @@ LIAISON_UNIVERSAL_FROM = 'Liaison Statement Management Tool = first_cut_off and now.date() < second_cut_off: # We are in the first_cut_off if now.date() == first_cut_off and now.hour < CUTOFF_HOUR: diff --git a/ietf/submit/models.py b/ietf/submit/models.py index 22be5e1f7..985905665 100644 --- a/ietf/submit/models.py +++ b/ietf/submit/models.py @@ -6,37 +6,3 @@ class IdSubmissionStatus(models.Model): class Meta: db_table = 'id_submission_status' - - -class IdSubmitDateConfig(models.Model): - id = models.IntegerField(primary_key=True) - id_date = models.DateField(null=True, blank=True) - date_name = models.CharField(blank=True, max_length=255) - f_name = models.CharField(blank=True, max_length=255) - - class Meta: - db_table = 'id_dates' - - @classmethod - def get_first_cut_off(cls): - return cls.objects.get(id=1).id_date - - @classmethod - def get_second_cut_off(cls): - return cls.objects.get(id=2).id_date - - @classmethod - def get_ietf_monday(cls): - return cls.objects.get(id=3).id_date - - @classmethod - def get_processed_ids_date(cls): - return cls.objects.get(id=4).id_date - - @classmethod - def get_monday_after_ietf(cls): - return cls.objects.get(id=5).id_date - - @classmethod - def get_list_aproved_date(cls): - return cls.objects.get(id=6).id_date From c0f0d2c2378824ba14c54c88fff975fc7d82b905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 08:26:12 +0000 Subject: [PATCH 04/61] Two levels of parsing. Fixes #584 - Legacy-Id: 2819 --- ietf/submit/forms.py | 21 +++---------- ietf/submit/parsers/base.py | 23 ++++++++++++-- ietf/submit/parsers/pdf_parser.py | 3 +- ietf/submit/parsers/plain_parser.py | 48 ++++++++++++++++++++++++----- ietf/submit/parsers/ps_parser.py | 3 +- ietf/submit/parsers/xml_parser.py | 3 +- ietf/submit/urls.py | 2 -- ietf/submit/views.py | 12 +------- 8 files changed, 72 insertions(+), 43 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index dc806fb2e..3bee4b43a 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -1,21 +1,8 @@ import datetime -from email.utils import parseaddr from django import forms -from django.conf import settings -from django.db.models import Q -from django.forms.util import ErrorList -from django.forms.fields import email_re from django.template.loader import render_to_string -from ietf.liaisons.accounts import (can_add_outgoing_liaison, can_add_incoming_liaison, - get_person_for_user, is_ietf_liaison_manager) -from ietf.liaisons.models import LiaisonDetail, Uploads, OutgoingLiaisonApproval, SDOs -from ietf.liaisons.utils import IETFHM -from ietf.liaisons.widgets import (FromWidget, ReadOnlyWidget, ButtonWidget, - ShowAttachmentsWidget, RelatedLiaisonWidget) - - from ietf.proceedings.models import Meeting from ietf.submit.parsers.plain_parser import PlainParser from ietf.submit.parsers.pdf_parser import PDFParser @@ -85,22 +72,22 @@ class UploadForm(forms.Form): yield fieldset_dict def clean_txt(self): - parsed_info = PlainParser(self.cleaned_data['txt']).parse_critical() + parsed_info = PlainParser(self.cleaned_data['txt']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_pdf(self): - parsed_info = PDFParser(self.cleaned_data['pdf']).parse_critical() + parsed_info = PDFParser(self.cleaned_data['pdf']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_ps(self): - parsed_info = PSParser(self.cleaned_data['ps']).parse_critical() + parsed_info = PSParser(self.cleaned_data['ps']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_xml(self): - parsed_info = XMLParser(self.cleaned_data['xml']).parse_critical() + parsed_info = XMLParser(self.cleaned_data['xml']).parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 92578ded2..6571fdfa1 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -5,11 +5,18 @@ import re CUTOFF_HOUR = 17 +class MetaDataDraft(object): + revision = None + filename = None + group = None + + class ParseInfo(object): def __init__(self): self.errors = [] self.warnings = {} + self.metadraft = MetaDataDraft() def add_error(self, error_str): self.errors.append(error_str) @@ -25,7 +32,7 @@ class FileParser(object): self.fd = fd self.parsed_info = ParseInfo() - def parse_critical(self): + def parse(self): if not self.fd: return self.parsed_info for attr in dir(self): @@ -33,9 +40,19 @@ class FileParser(object): method = getattr(self, attr, None) if callable(method): method() - return self.parsed_info + # If some critical parsing has returned an error do not continue + if self.parsed_info.errors: + return self.parsed_info + # Continue with non critical parsing, note that they also can return errors + for attr in dir(self): + if attr.startswith('parse_normal_'): + method = getattr(self, attr, None) + if callable(method): + method() + if self.parsed_info.errors: + return self.parsed_info - def parse_critical_invalid_chars_in_filename(self): + def parse_critical_000_invalid_chars_in_filename(self): name = self.fd.name regexp = re.compile(r'&|\|\/|;|\*|\s|\$') chars = regexp.findall(name) diff --git a/ietf/submit/parsers/pdf_parser.py b/ietf/submit/parsers/pdf_parser.py index b7b7659ca..31e22ffa9 100644 --- a/ietf/submit/parsers/pdf_parser.py +++ b/ietf/submit/parsers/pdf_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class PDFParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.pdf'): self.parsed_info.add_error('Format of this document must be PDF') diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 3979f9ed4..6f560feaa 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -1,36 +1,70 @@ import re +from ietf.idtracker.models import InternetDraft from ietf.submit.error_manager import MainErrorManager from ietf.submit.parsers.base import FileParser MAX_PLAIN_FILE_SIZE = 6000000 +NONE_WG_PK = 1027 + class PlainParser(FileParser): - + def parse_critical_max_size(self): if self.fd.size > MAX_PLAIN_FILE_SIZE: self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) - def parse_critical_file_charset(self): + def parse_critical_001_file_charset(self): import magic self.fd.file.seek(0) m = magic.open(magic.MAGIC_MIME) m.load() - filetype=m.buffer(self.fd.file.read()) + filetype = m.buffer(self.fd.file.read()) if not 'ascii' in filetype: - self.parsed_info.add_error('A plain text document must be submitted.'); + self.parsed_info.add_error('A plain text document must be submitted.') - def parse_filename(self): + def parse_critical_002_filename(self): self.fd.file.seek(0) draftre = re.compile('(draft-\S+)') + revisionre = re.compile('.*-(\d+)$') limit = 80 while limit: + limit -= 1 line = self.fd.readline() - match = draftre.match(line) + match = draftre.search(line) if not match: continue filename = match.group(0) filename = re.sub('^[^\w]+', '', filename) filename = re.sub('[^\w]+$', '', filename) filename = re.sub('\.txt$', '', filename) - line = re.sub('^[^\w]+', '') + extra_chars = re.sub('[0-9a-z\-]', '', filename) + if extra_chars: + self.parsed_info.add_error('Filename contains non alpha-numeric character: %s' % ', '.join(set(extra_chars))) + match_revision = revisionre.match(filename) + if match_revision: + self.parsed_info.metadraft.revision = match_revision.group(0) + filename = re.sub('-\d+$', '', filename) + self.parsed_info.metadraft.filename = filename + return + self.parsed_info.add_error(MainErrorManager.get_error_str('INVALID_FILENAME')) + + def parse_critical_003_wg(self): + filename = self.parsed_info.metadraft.filename + try: + existing_draft = InternetDraft.objects.get(filename=filename) + self.parsed_info.metadraft.wg = existing_draft.group + except InternetDraft.DoesNotExist: + if filename.startswith('draft-ietf-'): + # Extra check for WG that contains dashes + for group in IETFWG.objects.filter(group_acronym__acronym__contains='-'): + if filename.startswith('draft-ietf-%s-' % group.group_acronym.acronym): + self.parsed_info.metadraft.wg = group + return + group_acronym = filename.split('-')[2] + try: + self.parsed_info.metadraft.wg = IETFWG.objects.get(group_acronym__acronym=group_acronym) + except IETFWG.DoesNotExist: + self.parsed_info.add_error('Invalid WG ID: %s' % group_acronym) + else: + self.parsed_info.metadraft.wg = IETFWG.objects.get(pk=NONE_WG_PK) diff --git a/ietf/submit/parsers/ps_parser.py b/ietf/submit/parsers/ps_parser.py index e3d0cea74..e8655bd6c 100644 --- a/ietf/submit/parsers/ps_parser.py +++ b/ietf/submit/parsers/ps_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class PSParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.ps'): self.parsed_info.add_error('Format of this document must be PS') diff --git a/ietf/submit/parsers/xml_parser.py b/ietf/submit/parsers/xml_parser.py index 2edd9e08c..93327e211 100644 --- a/ietf/submit/parsers/xml_parser.py +++ b/ietf/submit/parsers/xml_parser.py @@ -1,7 +1,8 @@ from ietf.submit.parsers.base import FileParser + class XMLParser(FileParser): - + def parse_critical_filename_extension(self): if not self.fd.name.endswith('.xml'): self.parsed_info.add_error('Format of this document must be XML') diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index d28d5e308..440f0a4de 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -1,6 +1,4 @@ from django.conf.urls.defaults import patterns, url -from django.db.models import Q -from ietf.liaisons.models import LiaisonDetail urlpatterns = patterns('ietf.submit.views', diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 40a5b4c8b..a9c3fb103 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -1,16 +1,6 @@ # Copyright The IETF Trust 2007, All Rights Reserved -import datetime -from email.utils import parseaddr - -from django.conf import settings -from django.core.urlresolvers import reverse -from django.db.models import Q -from django.forms.fields import email_re -from django.http import HttpResponse, HttpResponseRedirect -from django.shortcuts import render_to_response, get_object_or_404 +from django.shortcuts import render_to_response from django.template import RequestContext -from django.utils import simplejson -from django.views.generic.list_detail import object_list, object_detail from ietf.submit.forms import UploadForm From 020e7f89ff7c0ce2455cbb753310719625ac6650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Tarrag=C3=B3n?= Date: Tue, 8 Feb 2011 10:42:09 +0000 Subject: [PATCH 05/61] merging author parsing in plain parser. Closes #585. - Legacy-Id: 2820 --- ietf/submit/parsers/base.py | 2 + ietf/submit/parsers/plain_parser.py | 265 ++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 6571fdfa1..6891bfd76 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -9,6 +9,7 @@ class MetaDataDraft(object): revision = None filename = None group = None + authors = None class ParseInfo(object): @@ -51,6 +52,7 @@ class FileParser(object): method() if self.parsed_info.errors: return self.parsed_info + return self.parsed_info def parse_critical_000_invalid_chars_in_filename(self): name = self.fd.name diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 6f560feaa..6f88c4d83 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -68,3 +68,268 @@ class PlainParser(FileParser): self.parsed_info.add_error('Invalid WG ID: %s' % group_acronym) else: self.parsed_info.metadraft.wg = IETFWG.objects.get(pk=NONE_WG_PK) + + def parse_critical_authors(self): + """ + comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py + """ + + def _stripheaders(rawlines): + stripped = [] + pages = [] + page = [] + line = "" + debug = False + newpage = False + sentence = False + haveblank = False + + def endpage(pages, page, line): + if line: + page += [ line ] + return begpage(pages, page) + def begpage(pages, page, line=None): + if page and len(page) > 5: + pages += [ "\n".join(page) ] + page = [] + newpage = True + if line: + page += [ line ] + return pages, page + + for line in rawlines: + line = line.rstrip() + if re.search("\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$", line, re.I): + pages, page = endpage(pages, page, line) + continue + if re.search("\f", line, re.I): + pages, page = begpage(pages, page) + continue + if re.search("^ *Internet.Draft.+[12][0-9][0-9][0-9] *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I): + pages, page = endpage(pages, page, line) + continue + if re.search(".{60,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if newpage and re.search("^ *draft-[-a-z0-9_.]+ *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^[^ \t]+", line): + sentence = True + if re.search("[^ \t]", line): + if newpage: + if sentence: + stripped += [""] + else: + if haveblank: + stripped += [""] + haveblank = False + sentence = False + newpage = False + if re.search("[.:]$", line): + sentence = True + if re.search("^[ \t]*$", line): + haveblank = True + page += [ line ] + continue + page += [ line ] + stripped += [ line ] + pages, page = begpage(pages, page) + return stripped, pages + + self.fd.file.seek(0) + raw_lines = self.fd.file.read().split("\n") + draft_lines, draft_pages = _stripheaders(raw_lines) + + longform = { + "Beth": "Elizabeth", + "Bill": "William", + "Bob": "Robert", + "Dick": "Richard", + "Fred": "Alfred", + "Jerry": "Gerald", + "Liz": "Elizabeth", + "Lynn": "Carolyn", + "Ned": "Edward" , + "Ted":"Edward", + } + aux = { + "honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", + "prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von)", + "suffix": r"(jr|II|2nd|III|3rd|IV|4th)", + "first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", + "last" : r"([-A-Za-z']{2,})", + } + authformats = [ + r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux, + r" {6}(((%(prefix)s )?%(last)s)( %(suffix)s)?, %(first)s)([, ]([Ee]d\.?|\([Ee]d\.?\)))?$" % aux, + r" {6}(%(last)s)$" % aux, + ] + + authors = [] + companies = [] + + # Collect first-page author information first + have_blankline = False + have_draftline = False + prev_blankline = False + for line in draft_lines[:15]: + leading_space = len(re.findall("^ *", line)[0]) + line_len = len(line.rstrip()) + trailing_space = line_len <= 72 and 72 - line_len or 0 + # Truncate long lines at the first space past column 80: + trunc_space = line.find(" ", 80) + if line_len > 80 and trunc_space > -1: + line = line[:trunc_space] + if line_len > 60: + # Look for centered title, break if found: + if (leading_space > 5 and abs(leading_space - trailing_space) < 5): + break + for authformat in authformats: + match = re.search(authformat, line) + if match: + author = match.group(1) + authors += [ author ] + if line.strip() == "": + if prev_blankline: + break + have_blankline = True + prev_blankline = True + else: + prev_blankline = False + if "draft-" in line: + have_draftline = True + if have_blankline and have_draftline: + break + + found_pos = [] + for i in range(len(authors)): + author = authors[i] + if author == None: + continue + if "," in author: + last, first = author.split(",",1) + author = "%s %s" % (first.strip(), last.strip()) + if not " " in author: + if "." in author: + first, last = author.rsplit(".", 1) + first += "." + else: + author = "[A-Z].+ " + author + first, last = author.rsplit(" ", 1) + else: + first, last = author.rsplit(" ", 1) + + for author in [ "%s %s"%(first,last), "%s %s"%(last,first), ]: + # Pattern for full author information search, based on first page author name: + authpat = author + # Permit expansion of first name + authpat = re.sub("\. ", ".* ", authpat) + authpat = re.sub("\.$", ".*", authpat) + # Permit insertsion of middle name or initial + authpat = re.sub(" ", "\S*( +[^ ]+)* +", authpat) + # Permit expansion of double-name initials + authpat = re.sub("-", ".*?-", authpat) + # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than + # a single-letter(latin) abbreviation: + authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) + authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux["honor"], authpat) + start = 0 + col = None + + # Find start of author info for this author (if any). + # Scan from the end of the file, looking for a match to authpath + try: + for j in range(len(draft_lines)-1, 15, -1): + line = draft_lines[j].strip() + forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] + for line in forms: + if re.search(authpat, line): + start = j + columns = re.split("( +)", line) + # Find which column: + cols = [ c for c in range(len(columns)) if re.search(authpat+r"$", columns[c].strip()) ] + if cols: + col = cols[0] + if not (start, col) in found_pos: + found_pos += [ (start, col) ] + beg = len("".join(columns[:col])) + if col == len(columns) or col == len(columns)-1: + end = None + else: + end = beg + len("".join(columns[col:col+2])) + author = re.search(authpat, columns[col].strip()).group(1) + if author in companies: + authors[i] = None + else: + authors[i] = author + + raise StopIteration("Found Author") + except StopIteration: + pass + if start and col != None: + break + if not authors[i]: + continue + + if start and col != None: + done = False + count = 0 + keyword = False + blanklines = 0 + for line in draft_lines[start+1:]: + # Break on the second blank line + if not line: + blanklines += 1 + if blanklines >= 3: + break + else: + continue + else: + count += 1 + authmatch = [ a for a in authors[i+1:] if a and not a in companies and re.search((r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) ] + if authmatch: + if count == 1 or (count == 2 and not blanklines): + # First line after an author -- this is a company + companies += authmatch + companies += [ line.strip() ] # XXX fix this for columnized author list + companies = list(set(companies)) + for k in range(i+1, len(authors)): + if authors[k] in companies: + authors[k] = None + elif not "@" in line: + break + else: + pass + + try: + column = line[beg:end].strip() + except: + column = line + column = re.sub(" *\(at\) *", "@", column) + column = re.sub(" *\(dot\) *", ".", column) + + emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column) + if emailmatch and not "@" in authors[i]: + email = emailmatch.group(0).lower() + authors[i] = "%s <%s>" % (authors[i], email) + else: + authors[i] = None + + authors = [ re.sub(r" +"," ", a) for a in authors if a != None ] + if authors: + authors.sort() + self.parsed_info.metadraft.authors = authors + else: + self.parsed_info.errors.append("Draft authors could not be found.") + + return authors From da7808208046acd0772011b72ac31e1ba594ecdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 10:55:37 +0000 Subject: [PATCH 06/61] Extract metadata from plain ascii file. Fixes #586 - Legacy-Id: 2821 --- ietf/submit/parsers/base.py | 9 +-- ietf/submit/parsers/plain_parser.py | 88 +++++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 6891bfd76..7efe9904f 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -1,4 +1,3 @@ -import datetime import re @@ -9,9 +8,13 @@ class MetaDataDraft(object): revision = None filename = None group = None + filesize = None + first_two_pages = None + page_count = None + submission_date = None + creation_date = None authors = None - class ParseInfo(object): def __init__(self): @@ -50,8 +53,6 @@ class FileParser(object): method = getattr(self, attr, None) if callable(method): method() - if self.parsed_info.errors: - return self.parsed_info return self.parsed_info def parse_critical_000_invalid_chars_in_filename(self): diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 6f88c4d83..38f6d75da 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -1,6 +1,7 @@ +import datetime import re -from ietf.idtracker.models import InternetDraft +from ietf.idtracker.models import InternetDraft, IETFWG from ietf.submit.error_manager import MainErrorManager from ietf.submit.parsers.base import FileParser @@ -10,9 +11,24 @@ NONE_WG_PK = 1027 class PlainParser(FileParser): - def parse_critical_max_size(self): + def __init__(self, fd): + super(PlainParser, self).__init__(fd) + self.lines = fd.file.readlines() + fd.file.seek(0) + self.full_text= self.normalize_text(''.join(self.lines)) + + def normalize_text(self, text): + text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis + text = text.replace("\r\n", "\n") # Convert DOS to unix + text = text.replace("\r", "\n") # Convert MAC to unix + text = text.strip() + return text + + def parse_critical_000_max_size(self): if self.fd.size > MAX_PLAIN_FILE_SIZE: self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) + self.parsed_info.metadraft.filesize = self.fd.size + self.parsed_info.metadraft.submission_date = datetime.date.today() def parse_critical_001_file_charset(self): import magic @@ -34,7 +50,7 @@ class PlainParser(FileParser): match = draftre.search(line) if not match: continue - filename = match.group(0) + filename = match.group(1) filename = re.sub('^[^\w]+', '', filename) filename = re.sub('[^\w]+$', '', filename) filename = re.sub('\.txt$', '', filename) @@ -43,7 +59,7 @@ class PlainParser(FileParser): self.parsed_info.add_error('Filename contains non alpha-numeric character: %s' % ', '.join(set(extra_chars))) match_revision = revisionre.match(filename) if match_revision: - self.parsed_info.metadraft.revision = match_revision.group(0) + self.parsed_info.metadraft.revision = match_revision.group(1) filename = re.sub('-\d+$', '', filename) self.parsed_info.metadraft.filename = filename return @@ -69,7 +85,65 @@ class PlainParser(FileParser): else: self.parsed_info.metadraft.wg = IETFWG.objects.get(pk=NONE_WG_PK) - def parse_critical_authors(self): + def parse_normal_000_first_two_pages(self): + first_pages = '' + for line in self.lines: + first_pages += line + if re.search('\[[Pp]age 2', line): + break + self.parsed_info.metadraft.first_two_pages = self.normalize_text(first_pages) + + def parse_normal_001_title(self): + pages = self.parsed_info.metadraft.first_two_pages or self.full_text + title_re = re.compile('(.+\n){1,3}(\s+\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', + r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', + r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', + # 'October 2008' - default day to today's. + r'\s{3,}(?P\w+)\s+(?P\d{4})', + ] + + first = self.parsed_info.metadraft.first_two_pages or self.full_text + for regex in date_regexes: + match = re.search(regex, first) + if match: + md = match.groupdict() + mon = md['month'][0:3].lower() + day = int( md.get( 'day', datetime.date.today().day ) ) + year = int( md['year'] ) + try: + month = month_names.index( mon ) + 1 + self.parsed_info.metadraft.creation_date = datetime.date(year, month, day) + return + except ValueError: + # mon abbreviation not in _MONTH_NAMES + # or month or day out of range + continue + self.parsed_info.add_warning('creation_date', 'Creation Date field is empty or the creation date is not in a proper format.') + + + def parse_normal_004_authors(self): """ comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py """ @@ -333,3 +407,7 @@ class PlainParser(FileParser): self.parsed_info.errors.append("Draft authors could not be found.") return authors + + + def parse_normal_005_abstract(self): + pass From ea57dcbcf6da8e8736f3acb4076648ba3de4c376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 12:53:16 +0000 Subject: [PATCH 07/61] Initial draft.py. See #588 - Legacy-Id: 2822 --- ietf/utils/draft.py | 642 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 642 insertions(+) create mode 100644 ietf/utils/draft.py diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py new file mode 100644 index 000000000..af1d79d07 --- /dev/null +++ b/ietf/utils/draft.py @@ -0,0 +1,642 @@ +#!/usr/bin/python +# -*- python -*- + +""" +NAME + %(program)s - Extract meta-information from an IETF draft. + +SYNOPSIS + %(program)s [OPTIONS] DRAFTLIST_FILE + +DESCRIPTION + Extract information about authors' names and email addresses, + intended status and number of pages from Internet Drafts. + The information is emitted in the form of a line containing + xml-style attributes, prefixed with the name of the draft. + +%(options)s + +AUTHOR + Written by Henrik Levkowetz, + +COPYRIGHT + Copyright 2008 Henrik Levkowetz + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at + your option) any later version. There is NO WARRANTY; not even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the GNU General Public License for more details. + +""" + +import getopt +import os +import os.path +import re +import stat +import sys +import time + +version = "0.13" +program = os.path.basename(sys.argv[0]) +progdir = os.path.dirname(sys.argv[0]) + +# ---------------------------------------------------------------------- +# Data +# ---------------------------------------------------------------------- + + +opt_debug = False +opt_timestamp = False +opt_trace = False + +# The following is an alias list for short forms which starts with a +# different letter than the long form. + +longform = { + "Beth": "Elizabeth", + "Bill": "William", + "Bob": "Robert", + "Dick": "Richard", + "Fred": "Alfred", + "Jerry": "Gerald", + "Liz": "Elizabeth", + "Lynn": "Carolyn", + "Ned": "Edward" , + "Ted":"Edward", +} +longform = dict([ (short+" ", longform[short]+" ") for short in longform ]) + + +# ---------------------------------------------------------------------- +# Functions +# ---------------------------------------------------------------------- +def _debug(string): + if opt_debug: + sys.stderr.write("%s\n" % (string)) + +# ---------------------------------------------------------------------- +def _note(string): + sys.stdout.write("%s: %s\n" % (program, string)) + +# ---------------------------------------------------------------------- +def _warn(string): + sys.stderr.write("%s: Warning: %s\n" % (program, string)) + +# ---------------------------------------------------------------------- +def _err(string): + sys.stderr.write("%s: Error: %s\n" % (program, string)) + sys.exit(1) + +# ---------------------------------------------------------------------- +def _gettext(file): + file = open(file) + text = file.read() + file.close() + + text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis + text = text.replace("\r\n", "\n") # Convert DOS to unix + text = text.replace("\r", "\n") # Convert MAC to unix + text = text.strip() + + return text + +# ---------------------------------------------------------------------- + +class Draft(): + + def __init__(self, text): + self.rawtext = text + + text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis + text = text.replace("\r\n", "\n") # Convert DOS to unix + text = text.replace("\r", "\n") # Convert MAC to unix + text = text.strip() + self.text = text + self.errors = {} + + self.rawlines = self.text.split("\n") + self.lines, self.pages = self._stripheaders() + if not self.pages: + self.pages = [ self.text ] + self.filename, self.revision = self._parse_draftname() + + self._authors = None + self._pagecount = None + self._status = None + self._creation_date = None + + # ------------------------------------------------------------------ + def _parse_draftname(self): + draftname_regex = r"(draft-[a-z0-9-]*)-(\d\d)(\w|\.txt|\n|$)" + draftname_match = re.search(draftname_regex, self.pages[0]) + if draftname_match: + return (draftname_match.group(1), draftname_match.group(2) ) + else: + self.errors["draftname"] = "Could not find the draft name and revision on the first page." + return ("", "") + + # ---------------------------------------------------------------------- + def _stripheaders(self): + stripped = [] + pages = [] + page = [] + line = "" + debug = False + newpage = False + sentence = False + haveblank = False + # two functions with side effects + def endpage(pages, page, line): + if line: + page += [ line ] + return begpage(pages, page) + def begpage(pages, page, line=None): + if page and len(page) > 5: + pages += [ "\n".join(page) ] + page = [] + newpage = True + if line: + page += [ line ] + return pages, page + for line in self.rawlines: + # if re.search("^ *Curtis King", line): + # debug = True + # if re.search("^Intellectual", line): + # debug = False + # if debug: + # _debug( "* newpage: %s; sentence: %s; haveblank: %s" % (newpage, sentence, haveblank)) + # _debug( " " + line) + line = line.rstrip() + if re.search("\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$", line, re.I): + pages, page = endpage(pages, page, line) + continue + if re.search("\f", line, re.I): + pages, page = begpage(pages, page) + continue + if re.search("^ *Internet.Draft.+[12][0-9][0-9][0-9] *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + # if re.search("^ *Internet.Draft +", line, re.I): + # newpage = True + # continue + if re.search("^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I): + pages, page = endpage(pages, page, line) + continue + if re.search(".{60,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if newpage and re.search("^ *draft-[-a-z0-9_.]+ *$", line, re.I): + pages, page = begpage(pages, page, line) + continue + if re.search("^[^ \t]+", line): + sentence = True + if re.search("[^ \t]", line): + if newpage: + if sentence: + stripped += [""] + else: + if haveblank: + stripped += [""] + haveblank = False + sentence = False + newpage = False + if re.search("[.:]$", line): + sentence = True + if re.search("^[ \t]*$", line): + haveblank = True + page += [ line ] + continue + page += [ line ] + stripped += [ line ] + pages, page = begpage(pages, page) + return stripped, pages + + # ---------------------------------------------------------------------- + def get_pagecount(self): + if self._pagecount == None: + self._pagecount = len(re.findall("\[[Pp]age [0-9ixldv]+\]", self.text)) or len(self.lines)/58 + return self._pagecount + + # ---------------------------------------------------------------------- + def get_status(self): + if self._status == None: + for line in self.lines[:10]: + status_match = re.search("^\s*Intended [Ss]tatus:\s*(.*?) ", line) + if status_match: + self._status = status_match.group(1) + break + return self._status + + # ------------------------------------------------------------------ + def get_creation_date(self): + if self._creation_date: + return self._creation_date + month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] + date_regexes = [ + r'\s{3,}(?P\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', + r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', + r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', + # 'October 2008' - default day to today's. + r'\s{3,}(?P\w+)\s+(?P\d{4})', + ] + + for regex in date_regexes: + match = re.search(regex, self.pages[0]) + if match: + md = match.groupdict() + mon = md['month'][0:3].lower() + day = int( md.get( 'day', date.today().day ) ) + year = int( md['year'] ) + try: + month = month_names.index( mon ) + 1 + self._creation_date = date(year, month, day) + return self._creation_date + except ValueError: + # mon abbreviation not in _MONTH_NAMES + # or month or day out of range + pass + self.errors['creation_date'] = 'Creation Date field is empty or the creation date is not in a proper format.' + return self._creation_date + + + # ------------------------------------------------------------------ + def get_authors(self): + """Extract author information from draft text. + + """ + if self._authors == None: + aux = { + "honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", + "prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von)", + "suffix": r"(jr|II|2nd|III|3rd|IV|4th)", + "first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", + "last" : r"([-A-Za-z']{2,})", + } + authformats = [ + r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux, + r" {6}(((%(prefix)s )?%(last)s)( %(suffix)s)?, %(first)s)([, ]([Ee]d\.?|\([Ee]d\.?\)))?$" % aux, + r" {6}(%(last)s)$" % aux, + ] + + ignore = [ + "Standards Track", "Current Practice", "Internet Draft", "Working Group", + "No Affiliation", + ] + # group 12 34 5 6 + authors = [] + companies = [] + + # Collect first-page author information first + have_blankline = False + have_draftline = False + prev_blankline = False + for line in self.lines[:15]: + #_debug( "**" + line) + leading_space = len(re.findall("^ *", line)[0]) + line_len = len(line.rstrip()) + trailing_space = line_len <= 72 and 72 - line_len or 0 + # Truncate long lines at the first space past column 80: + trunc_space = line.find(" ", 80) + if line_len > 80 and trunc_space > -1: + line = line[:trunc_space] + if line_len > 60: + # Look for centered title, break if found: + if (leading_space > 5 and abs(leading_space - trailing_space) < 5): + break + for authformat in authformats: + match = re.search(authformat, line) + if match: + author = match.group(1) + authors += [ author ] + #_debug("\nLine: " + line) + #_debug("Format: " + authformat) + _debug("Author: '%s'" % author) + if line.strip() == "": + if prev_blankline: + break + have_blankline = True + prev_blankline = True + else: + prev_blankline = False + if "draft-" in line: + have_draftline = True + if have_blankline and have_draftline: + break + + found_pos = [] + for i in range(len(authors)): + _debug("1: authors[%s]: %s" % (i, authors[i])) + author = authors[i] + if author == None: + continue + if "," in author: + last, first = author.split(",",1) + author = "%s %s" % (first.strip(), last.strip()) + if not " " in author: + if "." in author: + first, last = author.rsplit(".", 1) + first += "." + else: + author = "[A-Z].+ " + author + first, last = author.rsplit(" ", 1) + else: + first, last = author.rsplit(" ", 1) + _debug("First, Last: '%s' '%s'" % (first, last)) + for author in [ "%s %s"%(first,last), "%s %s"%(last,first), ]: + _debug("\nAuthors: "+str(authors)) + _debug("Author: "+author) + # Pattern for full author information search, based on first page author name: + authpat = author + # Permit expansion of first name + authpat = re.sub("\. ", ".* ", authpat) + authpat = re.sub("\.$", ".*", authpat) + # Permit insertsion of middle name or initial + authpat = re.sub(" ", "\S*( +[^ ]+)* +", authpat) + # Permit expansion of double-name initials + authpat = re.sub("-", ".*?-", authpat) + # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than + # a single-letter(latin) abbreviation: + authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) + authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux["honor"], authpat) + _debug("Authpat: " + authpat) + start = 0 + col = None + # Find start of author info for this author (if any). + # Scan from the end of the file, looking for a match to authpath + try: + for j in range(len(self.lines)-1, 15, -1): + line = self.lines[j].strip() + forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] + for line in forms: + try: + if re.search(authpat, line): + start = j + _debug( " ==> " + line.strip()) + # The author info could be formatted in multiple columns... + columns = re.split("( +)", line) + # _debug( "Columns:" + columns; sys.stdout.flush()) + # Find which column: + #_debug( "Col range:" + range(len(columns)); sys.stdout.flush()) + + cols = [ c for c in range(len(columns)) if re.search(authpat+r"$", columns[c].strip()) ] + if cols: + col = cols[0] + if not (start, col) in found_pos: + found_pos += [ (start, col) ] + _debug( "Col: %d" % col) + beg = len("".join(columns[:col])) + _debug( "Beg: %d '%s'" % (beg, "".join(columns[:col]))) + _debug( "Len: %d" % len(columns)) + if col == len(columns) or col == len(columns)-1: + end = None + _debug( "End1: %s" % end) + else: + end = beg + len("".join(columns[col:col+2])) + _debug( "End2: %d '%s'" % (end, "".join(columns[col:col+2]))) + _debug( "Cut: '%s'" % line[beg:end]) + author = re.search(authpat, columns[col].strip()).group(1) + if author in companies: + authors[i] = None + else: + authors[i] = author + #_debug( "Author: %s: %s" % (author, authors[author])) + # We need to exit 2 for loops -- a break isn't sufficient: + raise StopIteration("Found Author") + except AssertionError, e: + sys.stderr.write("filename: "+self.filename+"\n") + sys.stderr.write("authpat: "+authpat+"\n") + raise + except StopIteration: + pass + if start and col != None: + break + if not authors[i]: + continue + _debug("2: authors[%s]: %s" % (i, authors[i])) + if start and col != None: + _debug("\n *" + authors[i]) + done = False + count = 0 + keyword = False + blanklines = 0 + for line in self.lines[start+1:]: + _debug( " " + line.strip()) + # Break on the second blank line + if not line: + blanklines += 1 + if blanklines >= 3: + _debug( " - Break on blanklines") + break + else: + continue + else: + count += 1 + + # Maybe break on author name + # _debug("Line: %s"%line.strip()) + # for a in authors: + # if a and a not in companies: + # _debug("Search for: %s"%(r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)")) + authmatch = [ a for a in authors[i+1:] if a and not a in companies and re.search((r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) ] + if authmatch: + _debug(" ? Other author or company ? : %s" % authmatch) + _debug(" Line: "+line.strip()) + if count == 1 or (count == 2 and not blanklines): + # First line after an author -- this is a company + companies += authmatch + companies += [ line.strip() ] # XXX fix this for columnized author list + companies = list(set(companies)) + _debug(" -- Companies: " + ", ".join(companies)) + for k in range(i+1, len(authors)): + if authors[k] in companies: + authors[k] = None + elif not "@" in line: + # Break on an author name + _debug( " - Break on other author name") + break + else: + pass + + try: + column = line[beg:end].strip() + except: + column = line + column = re.sub(" *\(at\) *", "@", column) + column = re.sub(" *\(dot\) *", ".", column) + + + # if re.search("^\w+: \w+", column): + # keyword = True + # else: + # if keyword: + # # Break on transition from keyword line to something else + # _debug( " - Break on end of keywords") + # break + + #_debug( " Column text :: " + column) + _debug("3: authors[%s]: %s" % (i, authors[i])) + + emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column) + if emailmatch and not "@" in authors[i]: + email = emailmatch.group(0).lower() + authors[i] = "%s <%s>" % (authors[i], email) + else: + authors[i] = None + if not author in ignore: + _debug("Not an author? '%s'" % (author)) + + authors = [ re.sub(r" +"," ", a) for a in authors if a != None ] + authors.sort() + _debug(" * Final author list: " + ", ".join(authors)) + _debug("-"*72) + self._authors = authors + + return self._authors + +# ---------------------------------------------------------------------- +def _output(fields): + if opt_timestamp: + sys.stdout.write("%s " % (fields["eventdate"])) + sys.stdout.write("%s" % (fields["doctag"].strip())) + + def outputkey(key, fields): + sys.stdout.write(" %s='%s'" % ( key.lower(), fields[key].strip().replace("\\", "\\\\" ).replace("'", "\\x27" ).replace("\n", "\\n"))) + + keys = fields.keys() + keys.sort() + for key in keys: + if fields[key] and not key in ["doctag", "eventdate"]: + outputkey(key, fields) + sys.stdout.write("\n") + +# ---------------------------------------------------------------------- +def _printmeta(timestamp, fn): + # Initial values + fields = {} + fields["eventdate"] = timestamp + fields["eventsource"] = "draft" + + if " " in fn or not fn.endswith(".txt"): + _warn("Skipping unexpected draft name: '%s'" % (fn)) + return + + filename = os.path.join("/www/tools.ietf.org/id", fn) + if not os.path.exists(filename): + _warn("Could not find file: '%s'" % (filename)) + return + + if opt_trace: + t = time.time() + sys.stderr.write("%-58s" % fn[:-4]) + + timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(filename)[stat.ST_MTIME])) + text = _gettext(filename) + draft = Draft(text) + + fields["eventdate"] = timestamp + fields["doctag"] = draft.filename or fn[:-7] + fields["docrev"] = draft.revision + + fields["docpages"] = str(draft.get_pagecount()) + fields["docauthors"] = ", ".join(draft.get_authors()) + deststatus = draft.get_status() + if deststatus: + fields["docdeststatus"] = deststatus + + _output(fields) + + if opt_trace: + sys.stderr.write("%5.1f\n" % ((time.time() - t))) + +# ---------------------------------------------------------------------- +# Main +# ---------------------------------------------------------------------- + +def _main(): + global opt_debug, opt_timestamp, opt_trace, files + # set default values, if any + # ---------------------------------------------------------------------- + # Option processing + # ---------------------------------------------------------------------- + options = "" + for line in re.findall("\n +(if|elif) +opt in \[(.+)\]:\s+#(.+)\n", open(sys.argv[0]).read()): + if not options: + options += "OPTIONS\n" + options += " %-16s %s\n" % (line[1].replace('"', ''), line[2]) + options = options.strip() + + # with ' < 1:' on the next line, this is a no-op: + if len(sys.argv) < 1: + vars = globals() + vars.update(locals()) + print __doc__ % vars + sys.exit(1) + + try: + opts, files = getopt.gnu_getopt(sys.argv[1:], "dhtTv", ["debug", "help", "timestamp", "trace", "version",]) + except Exception, e: + print "%s: %s" % (program, e) + sys.exit(1) + + # parse options + for opt, value in opts: + if opt in ["-d", "--debug"]: # Output debug information + opt_debug = True + elif opt in ["-h", "--help"]: # Output this help text, then exit + vars = globals() + vars.update(locals()) + print __doc__ % vars + sys.exit(1) + elif opt in ["-v", "--version"]: # Output version information, then exit + print program, version + sys.exit(0) + elif opt in ["-t", "--timestamp"]: # Emit leading timestamp information + opt_timestamp = True + elif opt in ["-T", "--trace"]: # Emit trace information while working + opt_trace = True + + if not files: + files = [ "-" ] + + for file in files: + _debug( "Reading drafts from '%s'" % file) + if file == "-": + file = sys.stdin + elif file.endswith(".gz"): + file = gzip.open(file) + else: + file = open(file) + + if os.path.exists(file.name): + timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(file.name)[stat.ST_MTIME])) + else: + timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime()) + + basename = os.path.basename(file.name) + if basename.startswith("draft-"): + draft = basename + _debug( "** Processing '%s'" % draft) + _printmeta(timestamp, draft) + else: + for line in file: + draft = line.strip() + if draft.startswith("#"): + continue + _debug( "** Processing '%s'" % draft) + _printmeta(timestamp, draft) + +if __name__ == "__main__": + try: + _main() + except KeyboardInterrupt: + raise + pass From 8635333293bc538687bbdf1921bc1951b025d76e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 15:17:22 +0000 Subject: [PATCH 08/61] Retrieve title of the draft. See #588 - Legacy-Id: 2826 --- ietf/utils/draft.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index af1d79d07..443130dc0 100644 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -127,6 +127,7 @@ class Draft(): self._pagecount = None self._status = None self._creation_date = None + self._title = None # ------------------------------------------------------------------ def _parse_draftname(self): @@ -502,6 +503,28 @@ class Draft(): return self._authors + # ------------------------------------------------------------------ + def get_title(self): + if self._title: + return self._title + title_re = re.compile('(.+\n){1,3}(\s+ Date: Tue, 8 Feb 2011 15:25:59 +0000 Subject: [PATCH 09/61] make plain parser pep8 compliant. See #585. - Legacy-Id: 2827 --- ietf/submit/parsers/plain_parser.py | 73 ++++++++++++++--------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 38f6d75da..2ed1481cf 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -15,7 +15,7 @@ class PlainParser(FileParser): super(PlainParser, self).__init__(fd) self.lines = fd.file.readlines() fd.file.seek(0) - self.full_text= self.normalize_text(''.join(self.lines)) + self.full_text = self.normalize_text(''.join(self.lines)) def normalize_text(self, text): text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis @@ -111,18 +111,18 @@ class PlainParser(FileParser): self.parsed_info.metadraft.title = title def parse_normal_002_num_pages(self): - pagecount = len(re.findall("\[[Pp]age [0-9ixldv]+\]", self.full_text)) or len(self.lines)/58 + pagecount = len(re.findall("\[[Pp]age [0-9ixldv]+\]", self.full_text)) or len(self.lines) / 58 self.parsed_info.metadraft.pagecount = pagecount def parse_normal_003_creation_date(self): - month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] + month_names = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] date_regexes = [ r'\s{3,}(?P\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', # 'October 2008' - default day to today's. r'\s{3,}(?P\w+)\s+(?P\d{4})', - ] + ] first = self.parsed_info.metadraft.first_two_pages or self.full_text for regex in date_regexes: @@ -130,10 +130,10 @@ class PlainParser(FileParser): if match: md = match.groupdict() mon = md['month'][0:3].lower() - day = int( md.get( 'day', datetime.date.today().day ) ) - year = int( md['year'] ) + day = int(md.get('day', datetime.date.today().day)) + year = int(md['year']) try: - month = month_names.index( mon ) + 1 + month = month_names.index(mon) + 1 self.parsed_info.metadraft.creation_date = datetime.date(year, month, day) return except ValueError: @@ -142,12 +142,11 @@ class PlainParser(FileParser): continue self.parsed_info.add_warning('creation_date', 'Creation Date field is empty or the creation date is not in a proper format.') - def parse_normal_004_authors(self): """ comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py """ - + def _stripheaders(rawlines): stripped = [] pages = [] @@ -160,15 +159,16 @@ class PlainParser(FileParser): def endpage(pages, page, line): if line: - page += [ line ] + page += [line] return begpage(pages, page) + def begpage(pages, page, line=None): if page and len(page) > 5: - pages += [ "\n".join(page) ] + pages += ["\n".join(page)] page = [] newpage = True if line: - page += [ line ] + page += [line] return pages, page for line in rawlines: @@ -213,13 +213,13 @@ class PlainParser(FileParser): sentence = True if re.search("^[ \t]*$", line): haveblank = True - page += [ line ] + page += [line] continue - page += [ line ] - stripped += [ line ] + page += [line] + stripped += [line] pages, page = begpage(pages, page) return stripped, pages - + self.fd.file.seek(0) raw_lines = self.fd.file.read().split("\n") draft_lines, draft_pages = _stripheaders(raw_lines) @@ -233,15 +233,15 @@ class PlainParser(FileParser): "Jerry": "Gerald", "Liz": "Elizabeth", "Lynn": "Carolyn", - "Ned": "Edward" , - "Ted":"Edward", + "Ned": "Edward", + "Ted": "Edward", } aux = { - "honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", + "honor": r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", "prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von)", "suffix": r"(jr|II|2nd|III|3rd|IV|4th)", - "first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", - "last" : r"([-A-Za-z']{2,})", + "first": r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", + "last": r"([-A-Za-z']{2,})", } authformats = [ r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux, @@ -272,7 +272,7 @@ class PlainParser(FileParser): match = re.search(authformat, line) if match: author = match.group(1) - authors += [ author ] + authors += [author] if line.strip() == "": if prev_blankline: break @@ -291,7 +291,7 @@ class PlainParser(FileParser): if author == None: continue if "," in author: - last, first = author.split(",",1) + last, first = author.split(",", 1) author = "%s %s" % (first.strip(), last.strip()) if not " " in author: if "." in author: @@ -303,7 +303,7 @@ class PlainParser(FileParser): else: first, last = author.rsplit(" ", 1) - for author in [ "%s %s"%(first,last), "%s %s"%(last,first), ]: + for author in ["%s %s" % (first, last), "%s %s" % (last, first)]: # Pattern for full author information search, based on first page author name: authpat = author # Permit expansion of first name @@ -315,7 +315,7 @@ class PlainParser(FileParser): authpat = re.sub("-", ".*?-", authpat) # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than # a single-letter(latin) abbreviation: - authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) + authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux["honor"], authpat) start = 0 col = None @@ -323,24 +323,24 @@ class PlainParser(FileParser): # Find start of author info for this author (if any). # Scan from the end of the file, looking for a match to authpath try: - for j in range(len(draft_lines)-1, 15, -1): + for j in range(len(draft_lines) - 1, 15, -1): line = draft_lines[j].strip() - forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] + forms = [line] + [line.replace(short, longform[short]) for short in longform if short in line] for line in forms: if re.search(authpat, line): start = j columns = re.split("( +)", line) # Find which column: - cols = [ c for c in range(len(columns)) if re.search(authpat+r"$", columns[c].strip()) ] + cols = [c for c in range(len(columns)) if re.search(authpat + r"$", columns[c].strip())] if cols: col = cols[0] if not (start, col) in found_pos: - found_pos += [ (start, col) ] + found_pos += [(start, col)] beg = len("".join(columns[:col])) - if col == len(columns) or col == len(columns)-1: + if col == len(columns) or col == len(columns) - 1: end = None else: - end = beg + len("".join(columns[col:col+2])) + end = beg + len("".join(columns[col:col + 2])) author = re.search(authpat, columns[col].strip()).group(1) if author in companies: authors[i] = None @@ -360,7 +360,7 @@ class PlainParser(FileParser): count = 0 keyword = False blanklines = 0 - for line in draft_lines[start+1:]: + for line in draft_lines[start + 1:]: # Break on the second blank line if not line: blanklines += 1 @@ -370,14 +370,14 @@ class PlainParser(FileParser): continue else: count += 1 - authmatch = [ a for a in authors[i+1:] if a and not a in companies and re.search((r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) ] + authmatch = [a for a in authors[i + 1:] if a and not a in companies and re.search((r"(^|\W)" + re.sub("\.? ", ".* ", a) + "(\W|$)"), line.strip())] if authmatch: if count == 1 or (count == 2 and not blanklines): # First line after an author -- this is a company companies += authmatch - companies += [ line.strip() ] # XXX fix this for columnized author list + companies += [line.strip()] # XXX fix this for columnized author list companies = list(set(companies)) - for k in range(i+1, len(authors)): + for k in range(i + 1, len(authors)): if authors[k] in companies: authors[k] = None elif not "@" in line: @@ -399,7 +399,7 @@ class PlainParser(FileParser): else: authors[i] = None - authors = [ re.sub(r" +"," ", a) for a in authors if a != None ] + authors = [re.sub(r" +", " ", a) for a in authors if a != None] if authors: authors.sort() self.parsed_info.metadraft.authors = authors @@ -408,6 +408,5 @@ class PlainParser(FileParser): return authors - def parse_normal_005_abstract(self): pass From 37458917e08076950d66d4a69a26dde374528f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 15:40:45 +0000 Subject: [PATCH 10/61] Make critical parsing explicit. See #584 - Legacy-Id: 2828 --- ietf/submit/forms.py | 16 +- ietf/submit/parsers/base.py | 23 +- ietf/submit/parsers/pdf_parser.py | 9 +- ietf/submit/parsers/plain_parser.py | 351 +--------------------------- ietf/submit/parsers/ps_parser.py | 9 +- ietf/submit/parsers/xml_parser.py | 9 +- 6 files changed, 53 insertions(+), 364 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 3bee4b43a..9076c5da7 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -72,22 +72,30 @@ class UploadForm(forms.Form): yield fieldset_dict def clean_txt(self): - parsed_info = PlainParser(self.cleaned_data['txt']).parse() + if not self.cleaned_data['txt']: + return None + parsed_info = PlainParser(self.cleaned_data['txt']).critical_parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_pdf(self): - parsed_info = PDFParser(self.cleaned_data['pdf']).parse() + if not self.cleaned_data['pdf']: + return None + parsed_info = PDFParser(self.cleaned_data['pdf']).critical_parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_ps(self): - parsed_info = PSParser(self.cleaned_data['ps']).parse() + if not self.cleaned_data['ps']: + return None + parsed_info = PSParser(self.cleaned_data['ps']).critical_parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) def clean_xml(self): - parsed_info = XMLParser(self.cleaned_data['xml']).parse() + if not self.cleaned_data['xml']: + return None + parsed_info = XMLParser(self.cleaned_data['xml']).critical_parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) diff --git a/ietf/submit/parsers/base.py b/ietf/submit/parsers/base.py index 7efe9904f..7dd8618ae 100644 --- a/ietf/submit/parsers/base.py +++ b/ietf/submit/parsers/base.py @@ -36,26 +36,13 @@ class FileParser(object): self.fd = fd self.parsed_info = ParseInfo() - def parse(self): - if not self.fd: - return self.parsed_info - for attr in dir(self): - if attr.startswith('parse_critical_'): - method = getattr(self, attr, None) - if callable(method): - method() - # If some critical parsing has returned an error do not continue - if self.parsed_info.errors: - return self.parsed_info - # Continue with non critical parsing, note that they also can return errors - for attr in dir(self): - if attr.startswith('parse_normal_'): - method = getattr(self, attr, None) - if callable(method): - method() + # If some error is found after this method invocation + # no other file parsing is recommended + def critical_parse(self): + self.parse_invalid_chars_in_filename() return self.parsed_info - def parse_critical_000_invalid_chars_in_filename(self): + def parse_invalid_chars_in_filename(self): name = self.fd.name regexp = re.compile(r'&|\|\/|;|\*|\s|\$') chars = regexp.findall(name) diff --git a/ietf/submit/parsers/pdf_parser.py b/ietf/submit/parsers/pdf_parser.py index 31e22ffa9..88a58fc25 100644 --- a/ietf/submit/parsers/pdf_parser.py +++ b/ietf/submit/parsers/pdf_parser.py @@ -3,6 +3,13 @@ from ietf.submit.parsers.base import FileParser class PDFParser(FileParser): - def parse_critical_filename_extension(self): + # If some error is found after this method invocation + # no other file parsing is recommended + def critical_parse(self): + super(PDFParser, self).critical_parse() + self.parse_filename_extension() + return self.parsed_info + + def parse_filename_extension(self): if not self.fd.name.endswith('.pdf'): self.parsed_info.add_error('Format of this document must be PDF') diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 2ed1481cf..61c4fb6f8 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -13,24 +13,23 @@ class PlainParser(FileParser): def __init__(self, fd): super(PlainParser, self).__init__(fd) - self.lines = fd.file.readlines() - fd.file.seek(0) - self.full_text = self.normalize_text(''.join(self.lines)) - def normalize_text(self, text): - text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis - text = text.replace("\r\n", "\n") # Convert DOS to unix - text = text.replace("\r", "\n") # Convert MAC to unix - text = text.strip() - return text + # If some error is found after this method invocation + # no other file parsing is recommended + def critical_parse(self): + super(PlainParser, self).critical_parse() + self.parse_max_size() + self.parse_file_charset() + self.parse_filename() + return self.parsed_info - def parse_critical_000_max_size(self): + def parse_max_size(self): if self.fd.size > MAX_PLAIN_FILE_SIZE: self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) self.parsed_info.metadraft.filesize = self.fd.size self.parsed_info.metadraft.submission_date = datetime.date.today() - def parse_critical_001_file_charset(self): + def parse_file_charset(self): import magic self.fd.file.seek(0) m = magic.open(magic.MAGIC_MIME) @@ -39,7 +38,7 @@ class PlainParser(FileParser): if not 'ascii' in filetype: self.parsed_info.add_error('A plain text document must be submitted.') - def parse_critical_002_filename(self): + def parse_filename(self): self.fd.file.seek(0) draftre = re.compile('(draft-\S+)') revisionre = re.compile('.*-(\d+)$') @@ -65,7 +64,7 @@ class PlainParser(FileParser): return self.parsed_info.add_error(MainErrorManager.get_error_str('INVALID_FILENAME')) - def parse_critical_003_wg(self): + def parse_wg(self): filename = self.parsed_info.metadraft.filename try: existing_draft = InternetDraft.objects.get(filename=filename) @@ -84,329 +83,3 @@ class PlainParser(FileParser): self.parsed_info.add_error('Invalid WG ID: %s' % group_acronym) else: self.parsed_info.metadraft.wg = IETFWG.objects.get(pk=NONE_WG_PK) - - def parse_normal_000_first_two_pages(self): - first_pages = '' - for line in self.lines: - first_pages += line - if re.search('\[[Pp]age 2', line): - break - self.parsed_info.metadraft.first_two_pages = self.normalize_text(first_pages) - - def parse_normal_001_title(self): - pages = self.parsed_info.metadraft.first_two_pages or self.full_text - title_re = re.compile('(.+\n){1,3}(\s+\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', - r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', - r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', - # 'October 2008' - default day to today's. - r'\s{3,}(?P\w+)\s+(?P\d{4})', - ] - - first = self.parsed_info.metadraft.first_two_pages or self.full_text - for regex in date_regexes: - match = re.search(regex, first) - if match: - md = match.groupdict() - mon = md['month'][0:3].lower() - day = int(md.get('day', datetime.date.today().day)) - year = int(md['year']) - try: - month = month_names.index(mon) + 1 - self.parsed_info.metadraft.creation_date = datetime.date(year, month, day) - return - except ValueError: - # mon abbreviation not in _MONTH_NAMES - # or month or day out of range - continue - self.parsed_info.add_warning('creation_date', 'Creation Date field is empty or the creation date is not in a proper format.') - - def parse_normal_004_authors(self): - """ - comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py - """ - - def _stripheaders(rawlines): - stripped = [] - pages = [] - page = [] - line = "" - debug = False - newpage = False - sentence = False - haveblank = False - - def endpage(pages, page, line): - if line: - page += [line] - return begpage(pages, page) - - def begpage(pages, page, line=None): - if page and len(page) > 5: - pages += ["\n".join(page)] - page = [] - newpage = True - if line: - page += [line] - return pages, page - - for line in rawlines: - line = line.rstrip() - if re.search("\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$", line, re.I): - pages, page = endpage(pages, page, line) - continue - if re.search("\f", line, re.I): - pages, page = begpage(pages, page) - continue - if re.search("^ *Internet.Draft.+[12][0-9][0-9][0-9] *$", line, re.I): - pages, page = begpage(pages, page, line) - continue - if re.search("^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I): - pages, page = begpage(pages, page, line) - continue - if re.search("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I): - pages, page = begpage(pages, page, line) - continue - if re.search("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I): - pages, page = endpage(pages, page, line) - continue - if re.search(".{60,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I): - pages, page = begpage(pages, page, line) - continue - if newpage and re.search("^ *draft-[-a-z0-9_.]+ *$", line, re.I): - pages, page = begpage(pages, page, line) - continue - if re.search("^[^ \t]+", line): - sentence = True - if re.search("[^ \t]", line): - if newpage: - if sentence: - stripped += [""] - else: - if haveblank: - stripped += [""] - haveblank = False - sentence = False - newpage = False - if re.search("[.:]$", line): - sentence = True - if re.search("^[ \t]*$", line): - haveblank = True - page += [line] - continue - page += [line] - stripped += [line] - pages, page = begpage(pages, page) - return stripped, pages - - self.fd.file.seek(0) - raw_lines = self.fd.file.read().split("\n") - draft_lines, draft_pages = _stripheaders(raw_lines) - - longform = { - "Beth": "Elizabeth", - "Bill": "William", - "Bob": "Robert", - "Dick": "Richard", - "Fred": "Alfred", - "Jerry": "Gerald", - "Liz": "Elizabeth", - "Lynn": "Carolyn", - "Ned": "Edward", - "Ted": "Edward", - } - aux = { - "honor": r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", - "prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von)", - "suffix": r"(jr|II|2nd|III|3rd|IV|4th)", - "first": r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", - "last": r"([-A-Za-z']{2,})", - } - authformats = [ - r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux, - r" {6}(((%(prefix)s )?%(last)s)( %(suffix)s)?, %(first)s)([, ]([Ee]d\.?|\([Ee]d\.?\)))?$" % aux, - r" {6}(%(last)s)$" % aux, - ] - - authors = [] - companies = [] - - # Collect first-page author information first - have_blankline = False - have_draftline = False - prev_blankline = False - for line in draft_lines[:15]: - leading_space = len(re.findall("^ *", line)[0]) - line_len = len(line.rstrip()) - trailing_space = line_len <= 72 and 72 - line_len or 0 - # Truncate long lines at the first space past column 80: - trunc_space = line.find(" ", 80) - if line_len > 80 and trunc_space > -1: - line = line[:trunc_space] - if line_len > 60: - # Look for centered title, break if found: - if (leading_space > 5 and abs(leading_space - trailing_space) < 5): - break - for authformat in authformats: - match = re.search(authformat, line) - if match: - author = match.group(1) - authors += [author] - if line.strip() == "": - if prev_blankline: - break - have_blankline = True - prev_blankline = True - else: - prev_blankline = False - if "draft-" in line: - have_draftline = True - if have_blankline and have_draftline: - break - - found_pos = [] - for i in range(len(authors)): - author = authors[i] - if author == None: - continue - if "," in author: - last, first = author.split(",", 1) - author = "%s %s" % (first.strip(), last.strip()) - if not " " in author: - if "." in author: - first, last = author.rsplit(".", 1) - first += "." - else: - author = "[A-Z].+ " + author - first, last = author.rsplit(" ", 1) - else: - first, last = author.rsplit(" ", 1) - - for author in ["%s %s" % (first, last), "%s %s" % (last, first)]: - # Pattern for full author information search, based on first page author name: - authpat = author - # Permit expansion of first name - authpat = re.sub("\. ", ".* ", authpat) - authpat = re.sub("\.$", ".*", authpat) - # Permit insertsion of middle name or initial - authpat = re.sub(" ", "\S*( +[^ ]+)* +", authpat) - # Permit expansion of double-name initials - authpat = re.sub("-", ".*?-", authpat) - # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than - # a single-letter(latin) abbreviation: - authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) - authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux["honor"], authpat) - start = 0 - col = None - - # Find start of author info for this author (if any). - # Scan from the end of the file, looking for a match to authpath - try: - for j in range(len(draft_lines) - 1, 15, -1): - line = draft_lines[j].strip() - forms = [line] + [line.replace(short, longform[short]) for short in longform if short in line] - for line in forms: - if re.search(authpat, line): - start = j - columns = re.split("( +)", line) - # Find which column: - cols = [c for c in range(len(columns)) if re.search(authpat + r"$", columns[c].strip())] - if cols: - col = cols[0] - if not (start, col) in found_pos: - found_pos += [(start, col)] - beg = len("".join(columns[:col])) - if col == len(columns) or col == len(columns) - 1: - end = None - else: - end = beg + len("".join(columns[col:col + 2])) - author = re.search(authpat, columns[col].strip()).group(1) - if author in companies: - authors[i] = None - else: - authors[i] = author - - raise StopIteration("Found Author") - except StopIteration: - pass - if start and col != None: - break - if not authors[i]: - continue - - if start and col != None: - done = False - count = 0 - keyword = False - blanklines = 0 - for line in draft_lines[start + 1:]: - # Break on the second blank line - if not line: - blanklines += 1 - if blanklines >= 3: - break - else: - continue - else: - count += 1 - authmatch = [a for a in authors[i + 1:] if a and not a in companies and re.search((r"(^|\W)" + re.sub("\.? ", ".* ", a) + "(\W|$)"), line.strip())] - if authmatch: - if count == 1 or (count == 2 and not blanklines): - # First line after an author -- this is a company - companies += authmatch - companies += [line.strip()] # XXX fix this for columnized author list - companies = list(set(companies)) - for k in range(i + 1, len(authors)): - if authors[k] in companies: - authors[k] = None - elif not "@" in line: - break - else: - pass - - try: - column = line[beg:end].strip() - except: - column = line - column = re.sub(" *\(at\) *", "@", column) - column = re.sub(" *\(dot\) *", ".", column) - - emailmatch = re.search("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+", column) - if emailmatch and not "@" in authors[i]: - email = emailmatch.group(0).lower() - authors[i] = "%s <%s>" % (authors[i], email) - else: - authors[i] = None - - authors = [re.sub(r" +", " ", a) for a in authors if a != None] - if authors: - authors.sort() - self.parsed_info.metadraft.authors = authors - else: - self.parsed_info.errors.append("Draft authors could not be found.") - - return authors - - def parse_normal_005_abstract(self): - pass diff --git a/ietf/submit/parsers/ps_parser.py b/ietf/submit/parsers/ps_parser.py index e8655bd6c..084a1329a 100644 --- a/ietf/submit/parsers/ps_parser.py +++ b/ietf/submit/parsers/ps_parser.py @@ -3,6 +3,13 @@ from ietf.submit.parsers.base import FileParser class PSParser(FileParser): - def parse_critical_filename_extension(self): + # If some error is found after this method invocation + # no other file parsing is recommended + def critical_parse(self): + super(PSParser, self).critical_parse() + self.parse_filename_extension() + return self.parsed_info + + def parse_filename_extension(self): if not self.fd.name.endswith('.ps'): self.parsed_info.add_error('Format of this document must be PS') diff --git a/ietf/submit/parsers/xml_parser.py b/ietf/submit/parsers/xml_parser.py index 93327e211..243acb544 100644 --- a/ietf/submit/parsers/xml_parser.py +++ b/ietf/submit/parsers/xml_parser.py @@ -3,6 +3,13 @@ from ietf.submit.parsers.base import FileParser class XMLParser(FileParser): - def parse_critical_filename_extension(self): + # If some error is found after this method invocation + # no other file parsing is recommended + def critical_parse(self): + super(XMLParser, self).critical_parse() + self.parse_filename_extension() + return self.parsed_info + + def parse_filename_extension(self): if not self.fd.name.endswith('.xml'): self.parsed_info.add_error('Format of this document must be XML') From 38da2ad5bfc8ed7da3637589f163fa093f9c28a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Tue, 8 Feb 2011 17:04:18 +0000 Subject: [PATCH 11/61] Extend creation_date retrieving with dates at the begining of a line. See #588 - Legacy-Id: 2833 --- ietf/utils/draft.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index 443130dc0..131daa2bc 100644 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -31,6 +31,7 @@ COPYRIGHT """ +import datetime import getopt import os import os.path @@ -242,7 +243,11 @@ class Draft(): if self._creation_date: return self._creation_date month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] - date_regexes = [ + date_regexes = [ + r'^(?P\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', + r'^(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', + r'^(?P\d{1,2})-(?P\w+)-(?P\d{4})', + r'^(?P\w+)\s+(?P\d{4})', r'\s{3,}(?P\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', @@ -251,20 +256,20 @@ class Draft(): ] for regex in date_regexes: - match = re.search(regex, self.pages[0]) + match = re.search(regex, self.pages[0], re.MULTILINE) if match: - md = match.groupdict() - mon = md['month'][0:3].lower() - day = int( md.get( 'day', date.today().day ) ) - year = int( md['year'] ) - try: - month = month_names.index( mon ) + 1 - self._creation_date = date(year, month, day) + md = match.groupdict() + mon = md['month'][0:3].lower() + day = int( md.get( 'day', datetime.date.today().day ) ) + year = int( md['year'] ) + try: + month = month_names.index( mon ) + 1 + self._creation_date = datetime.date(year, month, day) return self._creation_date - except ValueError: - # mon abbreviation not in _MONTH_NAMES - # or month or day out of range - pass + except ValueError: + # mon abbreviation not in _MONTH_NAMES + # or month or day out of range + pass self.errors['creation_date'] = 'Creation Date field is empty or the creation date is not in a proper format.' return self._creation_date @@ -572,6 +577,7 @@ def _printmeta(timestamp, fn): fields["doctitle"] = draft.get_title() fields["docpages"] = str(draft.get_pagecount()) fields["docauthors"] = ", ".join(draft.get_authors()) + fields["doccreationdate"] = str(draft.get_creation_date()) deststatus = draft.get_status() if deststatus: fields["docdeststatus"] = deststatus From 86229aa478acc26462cd0b6183809512663f34cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Wed, 9 Feb 2011 10:42:37 +0000 Subject: [PATCH 12/61] Saving files in staging area. Saving information of upload in database. Executing idnits. Fixes #589 - Legacy-Id: 2835 --- ietf/settings.py | 7 ++ ietf/submit/error_manager.py | 19 ----- ietf/submit/forms.py | 101 ++++++++++++++++++++--- ietf/submit/models.py | 34 ++++++++ ietf/submit/parsers/plain_parser.py | 10 +-- ietf/submit/urls.py | 1 + ietf/submit/utils.py | 7 ++ ietf/submit/views.py | 34 +++++++- ietf/templates/submit/draft_status.html | 70 ++++++++++++++++ ietf/templates/submit/submit_status.html | 27 ++++++ ietf/templates/submit/submitform.html | 1 + 11 files changed, 272 insertions(+), 39 deletions(-) delete mode 100644 ietf/submit/error_manager.py create mode 100644 ietf/submit/utils.py create mode 100644 ietf/templates/submit/draft_status.html create mode 100644 ietf/templates/submit/submit_status.html diff --git a/ietf/settings.py b/ietf/settings.py index 7552997a5..b4a0e1762 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -188,10 +188,17 @@ LIAISON_UNIVERSAL_FROM = 'Liaison Statement Management Tool MAX_PLAIN_FILE_SIZE: - self.parsed_info.add_error(MainErrorManager.get_error_str('EXCEEDED_SIZE')) + if self.fd.size > settings.MAX_PLAIN_DRAFT_SIZE: + self.parsed_info.add_error('File size is larger than %s' % filesizeformat(settings.MAX_PLAIN_DRAFT_SIZE)) self.parsed_info.metadraft.filesize = self.fd.size self.parsed_info.metadraft.submission_date = datetime.date.today() @@ -62,7 +62,7 @@ class PlainParser(FileParser): filename = re.sub('-\d+$', '', filename) self.parsed_info.metadraft.filename = filename return - self.parsed_info.add_error(MainErrorManager.get_error_str('INVALID_FILENAME')) + self.parsed_info.add_error('The document does not contain a legitimate filename that start with draft-*') def parse_wg(self): filename = self.parsed_info.metadraft.filename diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index 440f0a4de..db400366d 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -4,6 +4,7 @@ from django.conf.urls.defaults import patterns, url urlpatterns = patterns('ietf.submit.views', url(r'^$', 'submit_index', name='submit_index'), url(r'^status/$', 'submit_status', name='submit_status'), + url(r'^status/(?P\d+)/$', 'draft_status', name='draft_status'), ) urlpatterns += patterns('django.views.generic.simple', diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py new file mode 100644 index 000000000..c7cab1e05 --- /dev/null +++ b/ietf/submit/utils.py @@ -0,0 +1,7 @@ +import re + +def check_idnits_success(idnits_message): + success_re = re.compile('\s+Summary:\s+0\s+|No nits found') + if success_re.search(idnits_message): + return True + return False diff --git a/ietf/submit/views.py b/ietf/submit/views.py index a9c3fb103..8e14a1dc7 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -1,15 +1,21 @@ # Copyright The IETF Trust 2007, All Rights Reserved +from django.core.urlresolvers import reverse +from django.http import HttpResponseRedirect +from django.shortcuts import get_object_or_404 from django.shortcuts import render_to_response from django.template import RequestContext +from ietf.submit.models import IdSubmissionDetail from ietf.submit.forms import UploadForm +from ietf.submit.utils import check_idnits_success def submit_index(request): if request.method == 'POST': form = UploadForm(data=request.POST, files=request.FILES) if form.is_valid(): - pass + submit = form.save() + return HttpResponseRedirect(reverse(draft_status, None, kwargs={'submission_id': submit.submission_id})) else: form = UploadForm() return render_to_response('submit/submit_index.html', @@ -19,4 +25,28 @@ def submit_index(request): def submit_status(request): - pass + error = None + filename = None + if request.method == 'POST': + filename = request.POST.get('filename', '') + detail = IdSubmissionDetail.objects.filter(filename=filename) + if detail: + return HttpResponseRedirect(reverse(draft_status, None, kwargs={'submission_id': detail[0].submission_id})) + error = 'No valid history found for %s' % filename + return render_to_response('submit/submit_status.html', + {'selected': 'status', + 'error': error, + 'filename': filename}, + context_instance=RequestContext(request)) + + + +def draft_status(request, submission_id): + detail = get_object_or_404(IdSubmissionDetail, submission_id=submission_id) + idnits_success = check_idnits_success(detail.idnits_message) + return render_to_response('submit/draft_status.html', + {'selected': 'status', + 'detail': detail, + 'idnits_success': idnits_success, + }, + context_instance=RequestContext(request)) diff --git a/ietf/templates/submit/draft_status.html b/ietf/templates/submit/draft_status.html new file mode 100644 index 000000000..730560fd0 --- /dev/null +++ b/ietf/templates/submit/draft_status.html @@ -0,0 +1,70 @@ +{% extends "submit/submit_base.html" %} +{% block title %}Submission status{% endblock %} + +{% block pagehead %} + + +{% endblock %} + +{% block submit_content %} +

    Check Page

    +

    +{% if idnits_success %} +Your draft has been verified to meet IDNITS requirements. +{% else %} +Your draft has NOT been verified to meet IDNITS requirements. +{% endif %} +(View IDNITS Results) +

    + + + +

    Meta-Data from the Draft

    + +

    +The IETF is an organized activity of the Internet Society +
    Please send problem reports to ietf-action@ietf.org. +

    +{% endblock %} diff --git a/ietf/templates/submit/submit_status.html b/ietf/templates/submit/submit_status.html new file mode 100644 index 000000000..4f5b43958 --- /dev/null +++ b/ietf/templates/submit/submit_status.html @@ -0,0 +1,27 @@ +{% extends "submit/submit_base.html" %} +{% block title %}Submission status{% endblock %} + +{% block pagehead %} +{{ form.media }} +{% endblock %} + +{% block submit_content %} +

    +Please enter the filename of the Internet-Draft who's status you wish to view: +

    + +
    +{% if error %}
    {{ error }}
    {% endif %} + + +
    + +

    + +Please note that the Status page only displays the status of an Internet-Draft whose posting is still in progress or an Internet-Draft that has been successfully posted. +

    +

    +The IETF is an organized activity of the Internet Society +
    Please send problem reports to ietf-action@ietf.org. +

    +{% endblock %} diff --git a/ietf/templates/submit/submitform.html b/ietf/templates/submit/submitform.html index 5e889674b..a07d866e5 100644 --- a/ietf/templates/submit/submitform.html +++ b/ietf/templates/submit/submitform.html @@ -13,6 +13,7 @@
    Please correct the errors below.
    + {{ form.non_field_errors }} {% endif %} {% for fieldset in form.get_fieldsets %} {% if fieldset.name %} From 6aa16f27622a82dc4fbc376cd4a1b4b69f341f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20A=2E=20S=C3=A1nchez=20L=C3=B3pez?= Date: Wed, 9 Feb 2011 17:14:50 +0000 Subject: [PATCH 13/61] Show state of a previous submission. Fixes #590 - Legacy-Id: 2836 --- ietf/submit/forms.py | 49 +++++++- ietf/submit/models.py | 17 +++ ietf/submit/urls.py | 1 + ietf/submit/utils.py | 89 ++++++++++++++- ietf/submit/views.py | 22 +++- ietf/templates/submit/draft_status.html | 143 +++++++++++++++++++----- ietf/utils/draft.py | 2 +- 7 files changed, 282 insertions(+), 41 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index b99b863b5..976ec1480 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -6,8 +6,9 @@ from django import forms from django.conf import settings from django.template.loader import render_to_string +from ietf.idtracker.models import InternetDraft from ietf.proceedings.models import Meeting -from ietf.submit.models import IdSubmissionDetail +from ietf.submit.models import IdSubmissionDetail, TempIdAuthors from ietf.submit.parsers.pdf_parser import PDFParser from ietf.submit.parsers.plain_parser import PlainParser from ietf.submit.parsers.ps_parser import PSParser @@ -36,6 +37,7 @@ class UploadForm(forms.Form): self.idnits_message = None self.shutdown = False self.draft = None + self.filesize = None self.read_dates() def read_dates(self): @@ -85,6 +87,7 @@ class UploadForm(forms.Form): parsed_info = PlainParser(txt_file).critical_parse() if parsed_info.errors: raise forms.ValidationError(parsed_info.errors) + self.filesize=txt_file.size return txt_file def clean_pdf(self): @@ -168,13 +171,57 @@ class UploadForm(forms.Form): self.idnits_message = p.stdout.read() def save_draft_info(self, draft): + document_id = 0 + existing_draft = InternetDraft.objects.filter(filename=draft.filename) + if existing_draft: + document_id = existing_draft[0].id_document_tag detail = IdSubmissionDetail.objects.create( id_document_name=draft.get_title(), filename=draft.filename, revision=draft.revision, txt_page_count=draft.get_pagecount(), + filesize=self.filesize, creation_date=draft.get_creation_date(), + submission_date=datetime.date.today(), idnits_message=self.idnits_message, + temp_id_document_tag=document_id, + first_two_pages=''.join(draft.pages[:2]), status_id=1, # Status 1 - upload ) + order = 0 + for author in draft.get_authors(): + name, email = author.rsplit(' ', 1) + first_name, last_name = name.split(' ', 1) + email = email.replace('<', '').replace('>', '') + order += 1 + TempIdAuthors.objects.create( + id_document_tag=document_id, + first_name=first_name, + last_name=last_name, + email_address=email, + author_order=order, + submission=detail) return detail + + +class AutoPostForm(forms.Form): + + first_name = forms.CharField(label=u'Given name', required=True) + last_name = forms.CharField(label=u'Last name', required=True) + email = forms.EmailField(label=u'Email address', required=True) + + def __init__(self, *args, **kwargs): + self.draft = kwargs.pop('draft', None) + self.validation = kwargs.pop('validation', None) + super(AutoPostForm, self).__init__(*args, **kwargs) + + def get_author_buttons(self): + button_template = '' + buttons = [] + for i in self.validation.authors: + full_name = '%s. %s' % (i.first_name[0], i.last_name) + buttons.append(button_template % {'first_name': i.first_name, + 'last_name': i.last_name, + 'email': i.email()[1], + 'full_name': full_name}) + return ''.join(buttons) diff --git a/ietf/submit/models.py b/ietf/submit/models.py index 11c520e79..a370d667e 100644 --- a/ietf/submit/models.py +++ b/ietf/submit/models.py @@ -40,3 +40,20 @@ class IdSubmissionDetail(models.Model): class Meta: db_table = 'id_submission_detail' + +class TempIdAuthors(models.Model): + id = models.AutoField(primary_key=True) + id_document_tag = models.IntegerField() + first_name = models.CharField(blank=True, max_length=255) + last_name = models.CharField(blank=True, max_length=255) + email_address = models.CharField(blank=True, max_length=255) + last_modified_date = models.DateField(null=True, blank=True) + last_modified_time = models.CharField(blank=True, max_length=100) + author_order = models.IntegerField(null=True, blank=True) + submission = models.ForeignKey(IdSubmissionDetail) + + class Meta: + db_table = 'temp_id_authors' + + def email(self): + return ('%s %s' % (self.first_name, self.last_name), self.email_address) diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index db400366d..9f5ae1ed8 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -5,6 +5,7 @@ urlpatterns = patterns('ietf.submit.views', url(r'^$', 'submit_index', name='submit_index'), url(r'^status/$', 'submit_status', name='submit_status'), url(r'^status/(?P\d+)/$', 'draft_status', name='draft_status'), + url(r'^status/(?P\d+)/edit/$', 'draft_edit', name='draft_edit'), ) urlpatterns += patterns('django.views.generic.simple', diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index c7cab1e05..7c802ada0 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -1,7 +1,88 @@ import re -def check_idnits_success(idnits_message): - success_re = re.compile('\s+Summary:\s+0\s+|No nits found') - if success_re.search(idnits_message): +from ietf.idtracker.models import InternetDraft, EmailAddress + + +class DraftValidation(object): + + def __init__(self, draft): + self.draft = draft + self.warnings = {} + self.passes_idnits = self.passes_idnits() + self.wg = self.get_working_group() + self.authors = self.get_authors() + + def passes_idnits(self): + passes_idnits = self.check_idnits_success(self.draft.idnits_message) + return passes_idnits + + def get_working_group(self): + filename = self.draft.filename + existing_draft = InternetDraft.objects.filter(filename=filename) + if existing_draft: + return existing_draft[0].group and existing_draft[0].group.ietfwg or None + else: + if filename.startswith('draft-ietf-'): + # Extra check for WG that contains dashes + for group in IETFWG.objects.filter(group_acronym__acronym__contains='-'): + if filename.startswith('draft-ietf-%s-' % group.group_acronym.acronym): + return group + group_acronym = filename.split('-')[2] + try: + return IETFWG.objects.get(group_acronym__acronym=group_acronym) + except IETFWG.DoesNotExist: + self.add_warning('group', 'Invalid WG ID: %s' % group_acronym) + return None + else: + return None + + def check_idnits_success(self, idnits_message): + success_re = re.compile('\s+Summary:\s+0\s+|No nits found') + if success_re.search(idnits_message): + return True + return False + + def is_valid_attr(self, key): + if key in self.warnings.keys(): + return False return True - return False + + def is_valid(self): + self.validate_metadata() + return not bool(self.warnings.keys()) and self.passes_idnits + + def validate_metadata(self): + self.validate_revision() + self.validate_authors() + + def add_warning(self, key, value): + self.warnings.update({key: value}) + + def validate_revision(self): + revision = self.draft.revision + existing_revisions = [int(i.revision) for i in InternetDraft.objects.filter(filename=self.draft.filename)] + expected = 0 + if existing_revisions: + expected = max(existing_revisions) + 1 + if int(revision) != expected: + self.add_warning('revision', 'Invalid Version Number (Version %00d is expected)' % expected) + + def validate_authors(self): + if not self.authors: + self.add_warning('authors', 'No authors found') + + def get_authors(self): + tmpauthors = self.draft.tempidauthors_set.all().order_by('author_order') + authors = [] + for i in tmpauthors: + person = None + for existing in EmailAddress.objects.filter(address=i.email_address): + try: + person = existing.person_or_org + except PersonOrOrgInfo.DoesNotExist: + pass + if not person: + authors.append(i) + else: + authors.append(person) + return authors diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 8e14a1dc7..2d0954c27 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -6,8 +6,8 @@ from django.shortcuts import render_to_response from django.template import RequestContext from ietf.submit.models import IdSubmissionDetail -from ietf.submit.forms import UploadForm -from ietf.submit.utils import check_idnits_success +from ietf.submit.forms import UploadForm, AutoPostForm +from ietf.submit.utils import DraftValidation def submit_index(request): @@ -43,10 +43,24 @@ def submit_status(request): def draft_status(request, submission_id): detail = get_object_or_404(IdSubmissionDetail, submission_id=submission_id) - idnits_success = check_idnits_success(detail.idnits_message) + validation = DraftValidation(detail) + is_valid = validation.is_valid() + if request.method=='POST': + if request.POST.get('autopost', False): + auto_post_form = AutoPostForm(draft=detail, validation=validation, data=request.POST) + else: + return HttpResponseRedirect(reverse(draft_edit, None, kwargs={'submission_id': detail.submission_id})) + else: + auto_post_form = AutoPostForm(draft=detail, validation=validation) return render_to_response('submit/draft_status.html', {'selected': 'status', 'detail': detail, - 'idnits_success': idnits_success, + 'validation': validation, + 'auto_post_form': auto_post_form, + 'is_valid': is_valid, }, context_instance=RequestContext(request)) + + +def draft_edit(request, submission_id): + pass diff --git a/ietf/templates/submit/draft_status.html b/ietf/templates/submit/draft_status.html index 730560fd0..e886fe3cc 100644 --- a/ietf/templates/submit/draft_status.html +++ b/ietf/templates/submit/draft_status.html @@ -1,40 +1,58 @@ {% extends "submit/submit_base.html" %} {% block title %}Submission status{% endblock %} +{% block morecss %} +{{ block.super }} +div.metadata-errors { border: 1px solid red; background-color: #ffeebb; padding: 5px 10px; margin: 1em 0px; } +table.metadata-table th { white-space: nowrap; font-weight: bold; } +table.metadata-table #id_first_name, table.metadata-table #id_last_name { width: 200px; } +table.metadata-table #id_email { width: 400px; } +table.metadata-table th, table.metadata-table td { text-align: left; background: #ddddff; padding: 5px 10px; } +table.metadata-table th.author { text-align: right; } +table.metadata-table tr { vertical-align: top; } +table.metadata-table tr.warning td, table.metadata-table tr.warning th { background-color: #ffaaaa; } +table.metadata-table div.warn_message { color: red; } +table.metadata-table ul.errorlist { color: red; padding: 0px; margin: 0px; list-style-type: none; } +{% endblock morecss %} + {% block pagehead %} +{% endblock %} + +{% block submit_content %} +

    Adjust External Meta-Data

    + + + + + + + + + + +

    Adjust data

    +{% if form.errors %} + +{% endif %} +
    + + + + + + + + + + + + + + +

    Authors

    + + + + + +{% for author in form.get_authors %} + + + + + +{% endfor %} + +
    First nameLast nameEmail address
    {{ author.first_name }}{{ author.errors.first_name }}{{ author.last_name }}{{ author.errors.last_name }}{{ author.email.1 }}{{ author.errors.email }}
    +
    + +
    + +
    + +

    +The IETF is an organized activity of the Internet Society +
    Please send problem reports to ietf-action@ietf.org. +

    +{% endblock %} diff --git a/ietf/templates/submit/draft_status.html b/ietf/templates/submit/draft_status.html index 7196a28fd..cb6b567ec 100644 --- a/ietf/templates/submit/draft_status.html +++ b/ietf/templates/submit/draft_status.html @@ -4,6 +4,8 @@ {% block morecss %} {{ block.super }} div.metadata-errors { border: 1px solid red; background-color: #ffeebb; padding: 5px 10px; margin: 1em 0px; } +div.info-message-error { border: 1px solid red; background-color: #ffeebb; padding: 5px 10px; margin: 1em 0px; color: red; } +div.info-message-success { border: 1px solid green; background-color: #eeffbb; padding: 5px 10px; margin: 1em 0px; color: green; } table.metadata-table th { white-space: nowrap; font-weight: bold; } table.metadata-table #id_first_name, table.metadata-table #id_last_name { width: 200px; } table.metadata-table #id_email { width: 400px; } @@ -60,6 +62,14 @@ table.metadata-table ul.errorlist { color: red; padding: 0px; margin: 0px; list- {% endblock %} {% block submit_content %} +{% if status %} +

    Status of the submission: {{ status.status_value }}

    +{% endif %} + +{% if message %} +
    {{ message.1 }}
    +{% endif %} +

    Check Page

    {% if validation.passes_idnits %} @@ -114,9 +124,9 @@ returned to the submitter. Revision{{ detail.revision }}

    {{ validation.warnings.revision }}
    Submission date{{ detail.submission_date }} Title{{ detail.id_document_name }} -WG{{ validation.wg|default:"Individual Submission" }}
    {{ validation.warnings.group }} +WG{{ validation.wg|default:"Individual Submission" }}
    {{ validation.warnings.group }}
    File size{{ detail.filesize|filesizeformat }} -Creation date{{ detail.creation_date }}
    {{ validation.warnings.creation_date }} +Creation date{{ detail.creation_date }}
    {{ validation.warnings.creation_date }}
    Author(s) information {% if not validation.authors %}
    {{ validation.warning.authors }}
    @@ -125,9 +135,20 @@ returned to the submitter. Author {{ forloop.counter }}{{ author.email.0 }} <{{ author.email.1 }}> {% endfor %} {% endif %} -Pages{{ detail.txt_page_count }}
    {{ validation.warnings.pages }} -Abstract{{ detail.abstract|linebreaksbr }} +Pages{{ detail.txt_page_count }}
    {{ validation.warnings.pages }}
    +Abstract{{ detail.abstract|linebreaksbr }}
    {{ validation.warnings.abstract }}
    + +{% if validation.submitter %} +

    Submitter information

    + + + + + +{% endif %} + +{% if allow_edit %}
    (Leads to manual post by the Secretariat)
    @@ -144,6 +165,8 @@ If you are one of the authors of this document, then please click the button wit {% endif %} +{% endif %} +

    The IETF is an organized activity of the Internet Society
    Please send problem reports to ietf-action@ietf.org. diff --git a/ietf/templates/submit/manual_post_mail.txt b/ietf/templates/submit/manual_post_mail.txt new file mode 100644 index 000000000..03c2c11e6 --- /dev/null +++ b/ietf/templates/submit/manual_post_mail.txt @@ -0,0 +1,21 @@ +Manual Posting Requested for following Internet-Draft: + +I-D Submission Tool URL: /submit/status/{{ draft.submission_id }}/ + +File name: {{ draft.filename }} +Submission date: {{ draft.submission_date }} +WG: {{ draft.wg|default:"Individual Submission" }} +File size: {{ draft.filesize }} + +Title: {{ draft.id_document_name }} +Version: {{ draft.revision }} +Creation date: {{ draft.id_document_name }} +Pages: {{ draft.txt_page_count }} +Abstract: {{ draft.abstract }} +Submitter: {{ form.cleaned_data.first_name }} {{ form.cleaned_data.last_name }} <{{ form.cleaned_data.email }}> + +Author(s): +{% for author in form.get_authors %}{{ author.first_name }} {{ author.last_name }} <{{ author.email.1 }}> +{% endfor %} + +{{ draft.comment_to_sec }} From 04c4043e2547ff15b1c8f8a509548f13add87abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Tarrag=C3=B3n?= Date: Thu, 10 Feb 2011 19:08:29 +0000 Subject: [PATCH 22/61] allowing submissions to be canceled. Closes #591. - Legacy-Id: 2846 --- ietf/submit/models.py | 28 +++++------ ietf/submit/urls.py | 2 +- ietf/submit/utils.py | 1 + ietf/submit/views.py | 13 +++-- ietf/templates/submit/draft_status.html | 65 +++++++++++++------------ 5 files changed, 58 insertions(+), 51 deletions(-) diff --git a/ietf/submit/models.py b/ietf/submit/models.py index df3d1372a..d79d635af 100644 --- a/ietf/submit/models.py +++ b/ietf/submit/models.py @@ -16,28 +16,28 @@ class IdSubmissionDetail(models.Model): temp_id_document_tag = models.IntegerField(null=True, blank=True) status = models.ForeignKey(IdSubmissionStatus, db_column='status_id', null=True, blank=True) last_updated_date = models.DateField(null=True, blank=True) - last_updated_time = models.CharField(blank=True, max_length=25) - id_document_name = models.CharField(blank=True, max_length=255) + last_updated_time = models.CharField(null=True, blank=True, max_length=25) + id_document_name = models.CharField(null=True, blank=True, max_length=255) group_acronym = models.ForeignKey(IETFWG, null=True, blank=True) - filename = models.CharField(blank=True, max_length=255) + filename = models.CharField(null=True, blank=True, max_length=255) creation_date = models.DateField(null=True, blank=True) submission_date = models.DateField(null=True, blank=True) - remote_ip = models.CharField(blank=True, max_length=100) - revision = models.CharField(blank=True, max_length=3) + remote_ip = models.CharField(null=True, blank=True, max_length=100) + revision = models.CharField(null=True, blank=True, max_length=3) submitter_tag = models.IntegerField(null=True, blank=True) - auth_key = models.CharField(blank=True, max_length=255) - idnits_message = models.TextField(blank=True) - file_type = models.CharField(blank=True, max_length=50) - comment_to_sec = models.TextField(blank=True) - abstract = models.TextField(blank=True) + auth_key = models.CharField(null=True, blank=True, max_length=255) + idnits_message = models.TextField(null=True, blank=True) + file_type = models.CharField(null=True, blank=True, max_length=50) + comment_to_sec = models.TextField(null=True, blank=True) + abstract = models.TextField(null=True, blank=True) txt_page_count = models.IntegerField(null=True, blank=True) - error_message = models.CharField(blank=True, max_length=255) - warning_message = models.TextField(blank=True) + error_message = models.CharField(null=True, blank=True, max_length=255) + warning_message = models.TextField(null=True, blank=True) wg_submission = models.IntegerField(null=True, blank=True) filesize = models.IntegerField(null=True, blank=True) man_posted_date = models.DateField(null=True, blank=True) - man_posted_by = models.CharField(blank=True, max_length=255) - first_two_pages = models.TextField(blank=True) + man_posted_by = models.CharField(null=True, blank=True, max_length=255) + first_two_pages = models.TextField(null=True, blank=True) sub_email_priority = models.IntegerField(null=True, blank=True) invalid_version = models.IntegerField(null=True, blank=True) idnits_failed = models.IntegerField(null=True, blank=True) diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index 5b1e75a2b..b7de99106 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -6,7 +6,7 @@ urlpatterns = patterns('ietf.submit.views', url(r'^status/$', 'submit_status', name='submit_status'), url(r'^status/(?P\d+)/$', 'draft_status', name='draft_status'), url(r'^status/(?P\d+)/edit/$', 'draft_edit', name='draft_edit'), - url(r'^status/(?P\d+)/confirm/(?P[a-f\d]+)/$', 'draft_confirm', name='draft_confirm'), + url(r'^status/(?P\d+)/cancel/$', 'draft_cancel', name='draft_cancel'), ) urlpatterns += patterns('django.views.generic.simple', diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index c50ba8275..59ff2dc73 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -11,6 +11,7 @@ WAITING_AUTHENTICATION = 4 MANUAL_POST_REQUESTED = 5 POSTED = -1 POSTED_BY_SECRETARIAT = -2 +CANCELED = -4 # Not a real WG diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 740b38de7..3e534a67b 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -7,7 +7,7 @@ from django.template import RequestContext from ietf.submit.models import IdSubmissionDetail from ietf.submit.forms import UploadForm, AutoPostForm, MetaDataForm -from ietf.submit.utils import (DraftValidation, UPLOADED, WAITING_AUTHENTICATION, +from ietf.submit.utils import (DraftValidation, UPLOADED, WAITING_AUTHENTICATION, CANCELED, perform_post) @@ -67,13 +67,18 @@ def draft_status(request, submission_id, message=None): 'validation': validation, 'auto_post_form': auto_post_form, 'is_valid': is_valid, - 'status': status, - 'allow_edit': allow_edit, - 'message': message, + 'canceled': detail.status_id == CANCELED }, context_instance=RequestContext(request)) +def draft_cancel(request, submission_id): + detail = get_object_or_404(IdSubmissionDetail, submission_id=submission_id) + detail.status_id = CANCELED + detail.save() + return HttpResponseRedirect(reverse(draft_status, None, kwargs={'submission_id': submission_id})) + + def draft_edit(request, submission_id): detail = get_object_or_404(IdSubmissionDetail, submission_id=submission_id) if detail.status_id != UPLOADED: diff --git a/ietf/templates/submit/draft_status.html b/ietf/templates/submit/draft_status.html index cb6b567ec..fc8c3018f 100644 --- a/ietf/templates/submit/draft_status.html +++ b/ietf/templates/submit/draft_status.html @@ -20,6 +20,11 @@ table.metadata-table ul.errorlist { color: red; padding: 0px; margin: 0px; list- {% block pagehead %} +{% if can_cancel %} +{% endif %} +