diff --git a/ietf/doc/mails.py b/ietf/doc/mails.py index b3cd75f1d..fd7c2501c 100644 --- a/ietf/doc/mails.py +++ b/ietf/doc/mails.py @@ -8,6 +8,8 @@ from django.utils.html import strip_tags from django.conf import settings from django.urls import reverse as urlreverse +import debug # pyflakes:ignore + from ietf.utils.mail import send_mail, send_mail_text from ietf.ipr.utils import iprs_from_docs, related_docs from ietf.doc.models import WriteupDocEvent, LastCallDocEvent, DocAlias, ConsensusDocEvent @@ -15,6 +17,7 @@ from ietf.doc.utils import needed_ballot_positions, get_document_content from ietf.group.models import Role from ietf.doc.models import Document from ietf.mailtrigger.utils import gather_address_lists +from ietf.utils import log def email_state_changed(request, doc, text, mailtrigger_id=None): (to,cc) = gather_address_lists(mailtrigger_id or 'doc_state_edited',doc=doc) @@ -515,7 +518,13 @@ def email_charter_internal_review(request, charter): os.path.join(settings.CHARTER_PATH,filename), split=False, markup=False, - ) + ).decode('utf-8') + utext = charter.text_or_error() # pyflakes:ignore + if charter_text and charter_text != utext and not 'Error; cannot read' in charter_text: + debug.show('charter_text[:64]') + debug.show('utext[:64]') + log.assertion('charter_text == utext') + send_mail(request, addrs.to, settings.DEFAULT_FROM_EMAIL, 'Internal %s Review: %s (%s)'%(charter.group.type.name,charter.group.name,charter.group.acronym), 'doc/mail/charter_internal_review.txt', diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 6e254b8f4..6e6f7f100 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -449,6 +449,9 @@ class DocumentInfo(models.Model): # return text + def text_or_error(self): + return self.text() or "Error; cannot read (%s)"%self.get_file_name() + def htmlized(self): name = self.get_base_name() text = self.text() diff --git a/ietf/doc/templatetags/ietf_filters.py b/ietf/doc/templatetags/ietf_filters.py index fad6bb321..b1db0b417 100644 --- a/ietf/doc/templatetags/ietf_filters.py +++ b/ietf/doc/templatetags/ietf_filters.py @@ -18,7 +18,7 @@ import debug # pyflakes:ignore from ietf.doc.models import ConsensusDocEvent from ietf.doc.utils import get_document_content from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped - +from ietf.utils import log register = template.Library() @@ -509,7 +509,13 @@ def document_content(doc): if doc is None: return None path = os.path.join(doc.get_file_path(),doc.filename_with_rev()) - return get_document_content(doc.name,path,markup=False) + content = get_document_content(doc.name,path,markup=False) + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + return content @register.filter def format_timedelta(timedelta): diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index e8ea4e9e0..64291853d 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -22,7 +22,7 @@ from ietf.doc.models import TelechatDocEvent from ietf.name.models import DocReminderTypeName, DocRelationshipName from ietf.group.models import Role from ietf.ietfauth.utils import has_role -from ietf.utils import draft, markup_txt +from ietf.utils import draft from ietf.utils.mail import send_mail from ietf.mailtrigger.utils import gather_address_lists @@ -299,6 +299,7 @@ def get_unicode_document_content(key, filename, codec='utf-8', errors='ignore'): return raw_content def get_document_content(key, filename, split=True, markup=True): + #log.unreachable("2017-12-05") try: with open(filename, 'rb') as f: raw_content = f.read() @@ -306,10 +307,11 @@ def get_document_content(key, filename, split=True, markup=True): error = "Error; cannot read ("+key+")" return error - if markup: - return markup_txt.markup(raw_content, split) - else: - return raw_content +# if markup: +# return markup_txt.markup(raw_content, split) +# else: +# return raw_content + return raw_content def tags_suffix(tags): return (u"::" + u"::".join(t.name for t in tags)) if tags else u"" diff --git a/ietf/doc/views_conflict_review.py b/ietf/doc/views_conflict_review.py index 833deac9a..e7375a20b 100644 --- a/ietf/doc/views_conflict_review.py +++ b/ietf/doc/views_conflict_review.py @@ -254,7 +254,12 @@ def edit_ad(request, name): def default_approval_text(review): filename = "%s-%s.txt" % (review.canonical_name(), review.rev) - current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False) + current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False).decode('utf-8') + utext = review.text_or_error() # pyflakes:ignore + if current_text and current_text != utext and not 'Error; cannot read' in current_text: + debug.show('current_text[:64]') + debug.show('utext[:64]') + log.assertion('current_text == utext') conflictdoc = review.relateddocument_set.get(relationship__slug='conflrev').target.document if conflictdoc.stream_id=='ise': diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index a7063ef2e..4097fd581 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -66,6 +66,8 @@ from ietf.meeting.utils import group_sessions, get_upcoming_manageable_sessions, from ietf.review.models import ReviewRequest from ietf.review.utils import can_request_review_of_doc, review_requests_to_list_for_docs from ietf.review.utils import no_review_from_teams_on_doc +from ietf.utils import markup_txt, log +from ietf.utils.text import maybe_split def render_document_top(request, doc, tab, name): @@ -186,7 +188,13 @@ def document_main(request, name, rev=None): filename = name + ".txt" content = get_document_content(filename, os.path.join(settings.RFC_PATH, filename), - split_content, markup=True) + split_content, markup=True).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + content = markup_txt.markup(maybe_split(content, split=split_content)) # file types base_path = os.path.join(settings.RFC_PATH, name + ".") @@ -216,7 +224,13 @@ def document_main(request, name, rev=None): filename = "%s-%s.txt" % (draft_name, doc.rev) content = get_document_content(filename, os.path.join(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR, filename), - split_content, markup=True) + split_content, markup=True).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + content = markup_txt.markup(maybe_split(content, split=split_content)) # file types base_path = os.path.join(settings.INTERNET_DRAFT_PATH, doc.name + "-" + doc.rev + ".") @@ -439,7 +453,13 @@ def document_main(request, name, rev=None): if doc.type_id == "charter": filename = "%s-%s.txt" % (doc.canonical_name(), doc.rev) - content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True) + content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + content = markup_txt.markup(content) ballot_summary = None if doc.get_state_slug() in ("intrev", "iesgrev"): @@ -480,9 +500,15 @@ def document_main(request, name, rev=None): if doc.rev == "00" and not os.path.isfile(pathname): # This could move to a template - content = "A conflict review response has not yet been proposed." + content = u"A conflict review response has not yet been proposed." else: - content = get_document_content(filename, pathname, split=False, markup=True) + content = get_document_content(filename, pathname, split=False, markup=True).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + content = markup_txt.markup(content) ballot_summary = None if doc.get_state_slug() in ("iesgeval") and doc.active_ballot(): @@ -507,9 +533,14 @@ def document_main(request, name, rev=None): if doc.rev == "00" and not os.path.isfile(pathname): # This could move to a template - content = "Status change text has not yet been proposed." + content = u"Status change text has not yet been proposed." else: - content = get_document_content(filename, pathname, split=False) + content = get_document_content(filename, pathname, split=False).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') ballot_summary = None if doc.get_state_slug() in ("iesgeval"): @@ -562,7 +593,12 @@ def document_main(request, name, rev=None): url = urlbase + extension if extension == ".txt": - content = get_document_content(basename, pathname + extension, split=False) + content = get_document_content(basename, pathname + extension, split=False).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content != utext: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') t = "plain text" other_types.append((t, url)) diff --git a/ietf/doc/views_status_change.py b/ietf/doc/views_status_change.py index ea47065cc..547906c99 100644 --- a/ietf/doc/views_status_change.py +++ b/ietf/doc/views_status_change.py @@ -282,7 +282,12 @@ def newstatus(relateddoc): def default_approval_text(status_change,relateddoc): filename = "%s-%s.txt" % (status_change.canonical_name(), status_change.rev) - current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False) + current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False).decode('utf-8') + utext = status_change.text_or_error() # pyflakes:ignore + if current_text and current_text != utext and not 'Error; cannot read' in current_text: + debug.show('current_text[:64]') + debug.show('utext[:64]') + log.assertion('current_text == utext') if relateddoc.target.document.std_level.slug in ('std','ps','ds','bcp',): action = "Protocol Action" diff --git a/ietf/meeting/forms.py b/ietf/meeting/forms.py index dfa830361..fc01f686d 100644 --- a/ietf/meeting/forms.py +++ b/ietf/meeting/forms.py @@ -18,6 +18,7 @@ from ietf.meeting.helpers import is_meeting_approved, get_next_agenda_name from ietf.message.models import Message from ietf.person.models import Person from ietf.utils.fields import DatepickerDateField, DurationField +from ietf.utils import log # need to insert empty option for use in ChoiceField # countries.insert(0, ('', '-'*9 )) @@ -220,7 +221,14 @@ class InterimSessionModelForm(forms.ModelForm): if self.instance.agenda(): doc = self.instance.agenda() path = os.path.join(doc.get_file_path(), doc.filename_with_rev()) - self.initial['agenda'] = get_document_content(os.path.basename(path), path, markup=False) + content = get_document_content(os.path.basename(path), path, markup=False).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if content and content != utext and not 'Error; cannot read' in content: + debug.show('content[:64]') + debug.show('utext[:64]') + log.assertion('content == utext') + self.initial['agenda'] = content + def clean_date(self): '''Date field validator. We can't use required on the input because diff --git a/ietf/secr/telechat/views.py b/ietf/secr/telechat/views.py index 57d0b854f..61334e540 100644 --- a/ietf/secr/telechat/views.py +++ b/ietf/secr/telechat/views.py @@ -6,6 +6,8 @@ from django.forms.formsets import formset_factory from django.shortcuts import render, get_object_or_404, redirect from django.utils.functional import curry +import debug # pyflakes:ignore + from ietf.doc.models import DocEvent, Document, BallotDocEvent, BallotPositionDocEvent, BallotType, WriteupDocEvent from ietf.doc.utils import get_document_content, add_state_change_event from ietf.person.models import Person @@ -15,7 +17,7 @@ from ietf.iesg.models import TelechatDate, TelechatAgendaItem, Telechat from ietf.iesg.agenda import agenda_data, get_doc_section from ietf.ietfauth.utils import role_required from ietf.secr.telechat.forms import BallotForm, ChangeStateForm, DateSelectForm, TELECHAT_TAGS - +from ietf.utils import log ''' @@ -70,7 +72,12 @@ def get_doc_writeup(doc): writeup = latest.text elif doc.type_id == 'conflrev': path = os.path.join(doc.get_file_path(),doc.filename_with_rev()) - writeup = get_document_content(doc.name,path,split=False,markup=False) + writeup = get_document_content(doc.name,path,split=False,markup=False).decode('utf-8') + utext = doc.text_or_error() # pyflakes:ignore + if writeup and writeup != utext and not 'Error; cannot read' in writeup: + debug.show('writeup[:64]') + debug.show('utext[:64]') + log.assertion('writeup == utext') return writeup def get_last_telechat_date(): diff --git a/ietf/utils/markup_txt.py b/ietf/utils/markup_txt.py index 6efbf26a0..fd71a517f 100644 --- a/ietf/utils/markup_txt.py +++ b/ietf/utils/markup_txt.py @@ -30,26 +30,37 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from django.utils.html import escape -import string import re +import six +import string +from django.utils.html import escape + +from ietf.utils import log from ietf.utils.text import wordwrap -def markup(content, split=True, width=None): +def markup_ascii(content, width=None): + log.unreachable('2017-12-08') + if six.PY2: + assert isinstance(content, basestring) + # at this point, "content" is normal string + # fix most common non-ASCII characters + t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee") + # map everything except printable ASCII, TAB, LF, FF to "?" + t2 = string.maketrans('','') + t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129 + t4 = t1.translate(t3) + content = content.translate(t4) + else: + log.assertion('six.PY2') + return markup(content.decode('ascii'), width) + +def markup(content, width=None): + log.assertion('isinstance(content, six.text_type)') # normalize line endings to LF only content = content.replace("\r\n", "\n") content = content.replace("\r", "\n") - # at this point, "content" is normal string - # fix most common non-ASCII characters - t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee") - # map everything except printable ASCII, TAB, LF, FF to "?" - t2 = string.maketrans('','') - t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129 - t4 = t1.translate(t3) - content = content.translate(t4) - # remove leading white space content = content.lstrip() # remove runs of blank lines @@ -69,36 +80,4 @@ def markup(content, split=True, width=None): content = re.sub("\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", """\n\n\g<1>\g<2>""", content) - if split: - n = content.find("\n", 5000) - content1 = "
"+content[:n+1]+"
\n" - return content1 - #content2 = "
"+content[n+1:]+"
\n" - #return (content1, content2) - else: - return "
" + content + "
\n" - -def markup_unicode(content, split=True, width=None, container_classes=None): - # normalize line endings to LF only - content = content.replace("\r\n", "\n") - content = content.replace("\r", "\n") - - # remove leading white space - content = content.lstrip() - # remove runs of blank lines - content = re.sub("\n\n\n+", "\n\n", content) - - # maybe wordwrap. This must be done before the escaping below. - if width: - content = wordwrap(content, width) - - # expand tabs + escape - content_to_show = escape(content.expandtabs()) - - if split: - n = content.find("\n", 5000) - content_to_show = content_to_show[:n+1] - - pre = '
' % container_classes if container_classes else '
'
-
-    return pre+content_to_show+'
\n' + return "
" + content + "
\n" diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 0d074f158..a06d9dd50 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -124,3 +124,12 @@ def isascii(text): return True except UnicodeEncodeError: return False + +def maybe_split(text, split=True, pos=5000): + if split: + n = text.find("\n", pos) + text = text[:n+1] + return text + + +