Changed regexes to use r strings. Rewrote code to extract From: name and email from iana email text. Fixed charset decode of iana email messages.

- Legacy-Id: 16373
This commit is contained in:
Henrik Levkowetz 2019-07-04 15:38:15 +00:00
parent b7e9fd14b0
commit d460721415

View file

@ -9,10 +9,13 @@ import urllib.request, urllib.error, urllib.parse
from django.utils.http import urlquote
from django.conf import settings
import debug # pyflakes:ignore
from ietf.doc.mails import email_state_changed
from ietf.doc.models import Document, DocEvent, State, StateDocEvent, StateType
from ietf.doc.utils import add_state_change_event
from ietf.person.models import Person
from ietf.utils.mail import parseaddr
from ietf.utils.timezone import local_timezone_to_utc, email_time_to_local_timezone, utc_to_local_timezone
@ -216,11 +219,11 @@ def update_history_with_changes(changes, send_email=True):
def find_document_name(text):
prefixes = ['draft','conflict-review','status-change','charter']
leading_delimiter_re = '(?<![-a-zA-Z0-9])'
prefix_re = '(%s)' % '|'.join(prefixes)
tail_re = '(-[a-z0-9]+)+?(-\d\d\.txt)?'
trailing_delimiter_re = '((?![-a-zA-Z0-9])|$)'
name_re = '%s(%s%s)%s' % (leading_delimiter_re, prefix_re, tail_re, trailing_delimiter_re)
leading_delimiter_re = r'(?<![-a-zA-Z0-9])'
prefix_re = r'(%s)' % '|'.join(prefixes)
tail_re = r'(-[a-z0-9]+)+?(-\d\d\.txt)?'
trailing_delimiter_re = r'((?![-a-zA-Z0-9])|$)'
name_re = r'%s(%s%s)%s' % (leading_delimiter_re, prefix_re, tail_re, trailing_delimiter_re)
m = re.search(name_re,text)
return m and m.group(0).lower()
@ -231,9 +234,8 @@ def strip_version_extension(text):
text = text[:-3]
return text
def parse_review_email(text):
msg = email.message_from_string(text)
def parse_review_email(bytes):
msg = email.message_from_bytes(bytes)
# doc
doc_name = find_document_name(msg["Subject"]) or ""
doc_name = strip_version_extension(doc_name)
@ -245,25 +247,23 @@ def parse_review_email(text):
# by
by = None
m = re.search(r"\"(.*)\"", msg["From"])
if m:
name = m.group(1).strip()
if name.endswith(" via RT"):
name = name[:-len(" via RT")]
try:
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
except Person.DoesNotExist:
pass
name, __ = parseaddr(msg["From"])
if name.endswith(" via RT"):
name = name[:-len(" via RT")]
try:
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
except Person.DoesNotExist:
pass
if not by:
by = Person.objects.get(name="(System)")
# comment
body = msg.get_payload().decode('quoted-printable').replace("\r", "")
charset = msg.get_content_charset()
body = msg.get_payload(decode=True).decode(charset or 'utf-8').replace("\r", "")
begin_search = re.search('\(BEGIN\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?(\s*:\s*[a-zA-Z0-9-\.]*)?\s*\)',body)
end_search = re.search('\(END\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?\)',body)
begin_search = re.search(r'\(BEGIN\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?(\s*:\s*[a-zA-Z0-9-\.]*)?\s*\)',body)
end_search = re.search(r'\(END\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?\)',body)
if begin_search and end_search:
begin_string = begin_search.group(0)
end_string = end_search.group(0)