Started refactoring of reading text from document files (drafts, charters, etc.) in order to normalise on one way of doing this, and making that return unicode rather than undecoded bytes. This is the first step of two, in order to gauge the possible issues and report on discrepancies.
- Legacy-Id: 14406
This commit is contained in:
parent
f2f21c4ef3
commit
967ece7e7d
|
@ -8,6 +8,8 @@ from django.utils.html import strip_tags
|
|||
from django.conf import settings
|
||||
from django.urls import reverse as urlreverse
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.utils.mail import send_mail, send_mail_text
|
||||
from ietf.ipr.utils import iprs_from_docs, related_docs
|
||||
from ietf.doc.models import WriteupDocEvent, LastCallDocEvent, DocAlias, ConsensusDocEvent
|
||||
|
@ -15,6 +17,7 @@ from ietf.doc.utils import needed_ballot_positions, get_document_content
|
|||
from ietf.group.models import Role
|
||||
from ietf.doc.models import Document
|
||||
from ietf.mailtrigger.utils import gather_address_lists
|
||||
from ietf.utils import log
|
||||
|
||||
def email_state_changed(request, doc, text, mailtrigger_id=None):
|
||||
(to,cc) = gather_address_lists(mailtrigger_id or 'doc_state_edited',doc=doc)
|
||||
|
@ -515,7 +518,13 @@ def email_charter_internal_review(request, charter):
|
|||
os.path.join(settings.CHARTER_PATH,filename),
|
||||
split=False,
|
||||
markup=False,
|
||||
)
|
||||
).decode('utf-8')
|
||||
utext = charter.text_or_error() # pyflakes:ignore
|
||||
if charter_text and charter_text != utext and not 'Error; cannot read' in charter_text:
|
||||
debug.show('charter_text[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('charter_text == utext')
|
||||
|
||||
send_mail(request, addrs.to, settings.DEFAULT_FROM_EMAIL,
|
||||
'Internal %s Review: %s (%s)'%(charter.group.type.name,charter.group.name,charter.group.acronym),
|
||||
'doc/mail/charter_internal_review.txt',
|
||||
|
|
|
@ -449,6 +449,9 @@ class DocumentInfo(models.Model):
|
|||
#
|
||||
return text
|
||||
|
||||
def text_or_error(self):
|
||||
return self.text() or "Error; cannot read (%s)"%self.get_file_name()
|
||||
|
||||
def htmlized(self):
|
||||
name = self.get_base_name()
|
||||
text = self.text()
|
||||
|
|
|
@ -18,7 +18,7 @@ import debug # pyflakes:ignore
|
|||
from ietf.doc.models import ConsensusDocEvent
|
||||
from ietf.doc.utils import get_document_content
|
||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
|
||||
|
||||
from ietf.utils import log
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
@ -509,7 +509,13 @@ def document_content(doc):
|
|||
if doc is None:
|
||||
return None
|
||||
path = os.path.join(doc.get_file_path(),doc.filename_with_rev())
|
||||
return get_document_content(doc.name,path,markup=False)
|
||||
content = get_document_content(doc.name,path,markup=False)
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
return content
|
||||
|
||||
@register.filter
|
||||
def format_timedelta(timedelta):
|
||||
|
|
|
@ -22,7 +22,7 @@ from ietf.doc.models import TelechatDocEvent
|
|||
from ietf.name.models import DocReminderTypeName, DocRelationshipName
|
||||
from ietf.group.models import Role
|
||||
from ietf.ietfauth.utils import has_role
|
||||
from ietf.utils import draft, markup_txt
|
||||
from ietf.utils import draft
|
||||
from ietf.utils.mail import send_mail
|
||||
from ietf.mailtrigger.utils import gather_address_lists
|
||||
|
||||
|
@ -299,6 +299,7 @@ def get_unicode_document_content(key, filename, codec='utf-8', errors='ignore'):
|
|||
return raw_content
|
||||
|
||||
def get_document_content(key, filename, split=True, markup=True):
|
||||
#log.unreachable("2017-12-05")
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
raw_content = f.read()
|
||||
|
@ -306,10 +307,11 @@ def get_document_content(key, filename, split=True, markup=True):
|
|||
error = "Error; cannot read ("+key+")"
|
||||
return error
|
||||
|
||||
if markup:
|
||||
return markup_txt.markup(raw_content, split)
|
||||
else:
|
||||
return raw_content
|
||||
# if markup:
|
||||
# return markup_txt.markup(raw_content, split)
|
||||
# else:
|
||||
# return raw_content
|
||||
return raw_content
|
||||
|
||||
def tags_suffix(tags):
|
||||
return (u"::" + u"::".join(t.name for t in tags)) if tags else u""
|
||||
|
|
|
@ -254,7 +254,12 @@ def edit_ad(request, name):
|
|||
def default_approval_text(review):
|
||||
|
||||
filename = "%s-%s.txt" % (review.canonical_name(), review.rev)
|
||||
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False)
|
||||
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False).decode('utf-8')
|
||||
utext = review.text_or_error() # pyflakes:ignore
|
||||
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
|
||||
debug.show('current_text[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('current_text == utext')
|
||||
|
||||
conflictdoc = review.relateddocument_set.get(relationship__slug='conflrev').target.document
|
||||
if conflictdoc.stream_id=='ise':
|
||||
|
|
|
@ -66,6 +66,8 @@ from ietf.meeting.utils import group_sessions, get_upcoming_manageable_sessions,
|
|||
from ietf.review.models import ReviewRequest
|
||||
from ietf.review.utils import can_request_review_of_doc, review_requests_to_list_for_docs
|
||||
from ietf.review.utils import no_review_from_teams_on_doc
|
||||
from ietf.utils import markup_txt, log
|
||||
from ietf.utils.text import maybe_split
|
||||
|
||||
|
||||
def render_document_top(request, doc, tab, name):
|
||||
|
@ -186,7 +188,13 @@ def document_main(request, name, rev=None):
|
|||
filename = name + ".txt"
|
||||
|
||||
content = get_document_content(filename, os.path.join(settings.RFC_PATH, filename),
|
||||
split_content, markup=True)
|
||||
split_content, markup=True).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
content = markup_txt.markup(maybe_split(content, split=split_content))
|
||||
|
||||
# file types
|
||||
base_path = os.path.join(settings.RFC_PATH, name + ".")
|
||||
|
@ -216,7 +224,13 @@ def document_main(request, name, rev=None):
|
|||
filename = "%s-%s.txt" % (draft_name, doc.rev)
|
||||
|
||||
content = get_document_content(filename, os.path.join(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR, filename),
|
||||
split_content, markup=True)
|
||||
split_content, markup=True).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
content = markup_txt.markup(maybe_split(content, split=split_content))
|
||||
|
||||
# file types
|
||||
base_path = os.path.join(settings.INTERNET_DRAFT_PATH, doc.name + "-" + doc.rev + ".")
|
||||
|
@ -439,7 +453,13 @@ def document_main(request, name, rev=None):
|
|||
if doc.type_id == "charter":
|
||||
filename = "%s-%s.txt" % (doc.canonical_name(), doc.rev)
|
||||
|
||||
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True)
|
||||
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
content = markup_txt.markup(content)
|
||||
|
||||
ballot_summary = None
|
||||
if doc.get_state_slug() in ("intrev", "iesgrev"):
|
||||
|
@ -480,9 +500,15 @@ def document_main(request, name, rev=None):
|
|||
|
||||
if doc.rev == "00" and not os.path.isfile(pathname):
|
||||
# This could move to a template
|
||||
content = "A conflict review response has not yet been proposed."
|
||||
content = u"A conflict review response has not yet been proposed."
|
||||
else:
|
||||
content = get_document_content(filename, pathname, split=False, markup=True)
|
||||
content = get_document_content(filename, pathname, split=False, markup=True).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
content = markup_txt.markup(content)
|
||||
|
||||
ballot_summary = None
|
||||
if doc.get_state_slug() in ("iesgeval") and doc.active_ballot():
|
||||
|
@ -507,9 +533,14 @@ def document_main(request, name, rev=None):
|
|||
|
||||
if doc.rev == "00" and not os.path.isfile(pathname):
|
||||
# This could move to a template
|
||||
content = "Status change text has not yet been proposed."
|
||||
content = u"Status change text has not yet been proposed."
|
||||
else:
|
||||
content = get_document_content(filename, pathname, split=False)
|
||||
content = get_document_content(filename, pathname, split=False).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
|
||||
ballot_summary = None
|
||||
if doc.get_state_slug() in ("iesgeval"):
|
||||
|
@ -562,7 +593,12 @@ def document_main(request, name, rev=None):
|
|||
url = urlbase + extension
|
||||
|
||||
if extension == ".txt":
|
||||
content = get_document_content(basename, pathname + extension, split=False)
|
||||
content = get_document_content(basename, pathname + extension, split=False).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content != utext:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
t = "plain text"
|
||||
|
||||
other_types.append((t, url))
|
||||
|
|
|
@ -282,7 +282,12 @@ def newstatus(relateddoc):
|
|||
def default_approval_text(status_change,relateddoc):
|
||||
|
||||
filename = "%s-%s.txt" % (status_change.canonical_name(), status_change.rev)
|
||||
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False)
|
||||
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False).decode('utf-8')
|
||||
utext = status_change.text_or_error() # pyflakes:ignore
|
||||
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
|
||||
debug.show('current_text[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('current_text == utext')
|
||||
|
||||
if relateddoc.target.document.std_level.slug in ('std','ps','ds','bcp',):
|
||||
action = "Protocol Action"
|
||||
|
|
|
@ -18,6 +18,7 @@ from ietf.meeting.helpers import is_meeting_approved, get_next_agenda_name
|
|||
from ietf.message.models import Message
|
||||
from ietf.person.models import Person
|
||||
from ietf.utils.fields import DatepickerDateField, DurationField
|
||||
from ietf.utils import log
|
||||
|
||||
# need to insert empty option for use in ChoiceField
|
||||
# countries.insert(0, ('', '-'*9 ))
|
||||
|
@ -220,7 +221,14 @@ class InterimSessionModelForm(forms.ModelForm):
|
|||
if self.instance.agenda():
|
||||
doc = self.instance.agenda()
|
||||
path = os.path.join(doc.get_file_path(), doc.filename_with_rev())
|
||||
self.initial['agenda'] = get_document_content(os.path.basename(path), path, markup=False)
|
||||
content = get_document_content(os.path.basename(path), path, markup=False).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if content and content != utext and not 'Error; cannot read' in content:
|
||||
debug.show('content[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('content == utext')
|
||||
self.initial['agenda'] = content
|
||||
|
||||
|
||||
def clean_date(self):
|
||||
'''Date field validator. We can't use required on the input because
|
||||
|
|
|
@ -6,6 +6,8 @@ from django.forms.formsets import formset_factory
|
|||
from django.shortcuts import render, get_object_or_404, redirect
|
||||
from django.utils.functional import curry
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.doc.models import DocEvent, Document, BallotDocEvent, BallotPositionDocEvent, BallotType, WriteupDocEvent
|
||||
from ietf.doc.utils import get_document_content, add_state_change_event
|
||||
from ietf.person.models import Person
|
||||
|
@ -15,7 +17,7 @@ from ietf.iesg.models import TelechatDate, TelechatAgendaItem, Telechat
|
|||
from ietf.iesg.agenda import agenda_data, get_doc_section
|
||||
from ietf.ietfauth.utils import role_required
|
||||
from ietf.secr.telechat.forms import BallotForm, ChangeStateForm, DateSelectForm, TELECHAT_TAGS
|
||||
|
||||
from ietf.utils import log
|
||||
|
||||
|
||||
'''
|
||||
|
@ -70,7 +72,12 @@ def get_doc_writeup(doc):
|
|||
writeup = latest.text
|
||||
elif doc.type_id == 'conflrev':
|
||||
path = os.path.join(doc.get_file_path(),doc.filename_with_rev())
|
||||
writeup = get_document_content(doc.name,path,split=False,markup=False)
|
||||
writeup = get_document_content(doc.name,path,split=False,markup=False).decode('utf-8')
|
||||
utext = doc.text_or_error() # pyflakes:ignore
|
||||
if writeup and writeup != utext and not 'Error; cannot read' in writeup:
|
||||
debug.show('writeup[:64]')
|
||||
debug.show('utext[:64]')
|
||||
log.assertion('writeup == utext')
|
||||
return writeup
|
||||
|
||||
def get_last_telechat_date():
|
||||
|
|
|
@ -30,26 +30,37 @@
|
|||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from django.utils.html import escape
|
||||
import string
|
||||
import re
|
||||
import six
|
||||
import string
|
||||
|
||||
from django.utils.html import escape
|
||||
|
||||
from ietf.utils import log
|
||||
from ietf.utils.text import wordwrap
|
||||
|
||||
def markup(content, split=True, width=None):
|
||||
def markup_ascii(content, width=None):
|
||||
log.unreachable('2017-12-08')
|
||||
if six.PY2:
|
||||
assert isinstance(content, basestring)
|
||||
# at this point, "content" is normal string
|
||||
# fix most common non-ASCII characters
|
||||
t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee")
|
||||
# map everything except printable ASCII, TAB, LF, FF to "?"
|
||||
t2 = string.maketrans('','')
|
||||
t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129
|
||||
t4 = t1.translate(t3)
|
||||
content = content.translate(t4)
|
||||
else:
|
||||
log.assertion('six.PY2')
|
||||
return markup(content.decode('ascii'), width)
|
||||
|
||||
def markup(content, width=None):
|
||||
log.assertion('isinstance(content, six.text_type)')
|
||||
# normalize line endings to LF only
|
||||
content = content.replace("\r\n", "\n")
|
||||
content = content.replace("\r", "\n")
|
||||
|
||||
# at this point, "content" is normal string
|
||||
# fix most common non-ASCII characters
|
||||
t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee")
|
||||
# map everything except printable ASCII, TAB, LF, FF to "?"
|
||||
t2 = string.maketrans('','')
|
||||
t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129
|
||||
t4 = t1.translate(t3)
|
||||
content = content.translate(t4)
|
||||
|
||||
# remove leading white space
|
||||
content = content.lstrip()
|
||||
# remove runs of blank lines
|
||||
|
@ -69,36 +80,4 @@ def markup(content, split=True, width=None):
|
|||
|
||||
content = re.sub("\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", """\n\n<span class="m_h">\g<1>\g<2></span>""", content)
|
||||
|
||||
if split:
|
||||
n = content.find("\n", 5000)
|
||||
content1 = "<pre>"+content[:n+1]+"</pre>\n"
|
||||
return content1
|
||||
#content2 = "<pre>"+content[n+1:]+"</pre>\n"
|
||||
#return (content1, content2)
|
||||
else:
|
||||
return "<pre>" + content + "</pre>\n"
|
||||
|
||||
def markup_unicode(content, split=True, width=None, container_classes=None):
|
||||
# normalize line endings to LF only
|
||||
content = content.replace("\r\n", "\n")
|
||||
content = content.replace("\r", "\n")
|
||||
|
||||
# remove leading white space
|
||||
content = content.lstrip()
|
||||
# remove runs of blank lines
|
||||
content = re.sub("\n\n\n+", "\n\n", content)
|
||||
|
||||
# maybe wordwrap. This must be done before the escaping below.
|
||||
if width:
|
||||
content = wordwrap(content, width)
|
||||
|
||||
# expand tabs + escape
|
||||
content_to_show = escape(content.expandtabs())
|
||||
|
||||
if split:
|
||||
n = content.find("\n", 5000)
|
||||
content_to_show = content_to_show[:n+1]
|
||||
|
||||
pre = '<pre class="%s" >' % container_classes if container_classes else '<pre>'
|
||||
|
||||
return pre+content_to_show+'</pre>\n'
|
||||
return "<pre>" + content + "</pre>\n"
|
||||
|
|
|
@ -124,3 +124,12 @@ def isascii(text):
|
|||
return True
|
||||
except UnicodeEncodeError:
|
||||
return False
|
||||
|
||||
def maybe_split(text, split=True, pos=5000):
|
||||
if split:
|
||||
n = text.find("\n", pos)
|
||||
text = text[:n+1]
|
||||
return text
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue