Tweaked the file content read refactoring in [14406] to try latin-1 conversion if unicode doesn't work.

- Legacy-Id: 14410
Note: SVN reference [14406] has been migrated to Git commit 967ece7e7d
This commit is contained in:
Henrik Levkowetz 2017-12-10 17:48:09 +00:00
parent 3f40d9c962
commit 660c81c272
8 changed files with 22 additions and 14 deletions

View file

@ -518,7 +518,7 @@ def email_charter_internal_review(request, charter):
os.path.join(settings.CHARTER_PATH,filename),
split=False,
markup=False,
).decode('utf-8')
)
utext = charter.text_or_error() # pyflakes:ignore
if charter_text and charter_text != utext and not 'Error; cannot read' in charter_text:
debug.show('charter_text[:64]')

View file

@ -22,7 +22,7 @@ from ietf.doc.models import TelechatDocEvent
from ietf.name.models import DocReminderTypeName, DocRelationshipName
from ietf.group.models import Role
from ietf.ietfauth.utils import has_role
from ietf.utils import draft
from ietf.utils import draft, text
from ietf.utils.mail import send_mail
from ietf.mailtrigger.utils import gather_address_lists
@ -311,7 +311,7 @@ def get_document_content(key, filename, split=True, markup=True):
# return markup_txt.markup(raw_content, split)
# else:
# return raw_content
return raw_content
return text.decode(raw_content)
def tags_suffix(tags):
return (u"::" + u"::".join(t.name for t in tags)) if tags else u""

View file

@ -254,7 +254,7 @@ def edit_ad(request, name):
def default_approval_text(review):
filename = "%s-%s.txt" % (review.canonical_name(), review.rev)
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False).decode('utf-8')
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False)
utext = review.text_or_error() # pyflakes:ignore
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
debug.show('current_text[:64]')

View file

@ -188,7 +188,7 @@ def document_main(request, name, rev=None):
filename = name + ".txt"
content = get_document_content(filename, os.path.join(settings.RFC_PATH, filename),
split_content, markup=True).decode('utf-8')
split_content, markup=True)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')
@ -224,7 +224,7 @@ def document_main(request, name, rev=None):
filename = "%s-%s.txt" % (draft_name, doc.rev)
content = get_document_content(filename, os.path.join(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR, filename),
split_content, markup=True).decode('utf-8')
split_content, markup=True)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')
@ -453,7 +453,7 @@ def document_main(request, name, rev=None):
if doc.type_id == "charter":
filename = "%s-%s.txt" % (doc.canonical_name(), doc.rev)
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True).decode('utf-8')
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')
@ -502,7 +502,7 @@ def document_main(request, name, rev=None):
# This could move to a template
content = u"A conflict review response has not yet been proposed."
else:
content = get_document_content(filename, pathname, split=False, markup=True).decode('utf-8')
content = get_document_content(filename, pathname, split=False, markup=True)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')
@ -535,7 +535,7 @@ def document_main(request, name, rev=None):
# This could move to a template
content = u"Status change text has not yet been proposed."
else:
content = get_document_content(filename, pathname, split=False).decode('utf-8')
content = get_document_content(filename, pathname, split=False)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')
@ -593,7 +593,7 @@ def document_main(request, name, rev=None):
url = urlbase + extension
if extension == ".txt":
content = get_document_content(basename, pathname + extension, split=False).decode('utf-8')
content = get_document_content(basename, pathname + extension, split=False)
utext = doc.text_or_error() # pyflakes:ignore
if content != utext:
debug.show('content[:64]')

View file

@ -282,7 +282,7 @@ def newstatus(relateddoc):
def default_approval_text(status_change,relateddoc):
filename = "%s-%s.txt" % (status_change.canonical_name(), status_change.rev)
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False).decode('utf-8')
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False)
utext = status_change.text_or_error() # pyflakes:ignore
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
debug.show('current_text[:64]')

View file

@ -221,7 +221,7 @@ class InterimSessionModelForm(forms.ModelForm):
if self.instance.agenda():
doc = self.instance.agenda()
path = os.path.join(doc.get_file_path(), doc.filename_with_rev())
content = get_document_content(os.path.basename(path), path, markup=False).decode('utf-8')
content = get_document_content(os.path.basename(path), path, markup=False)
utext = doc.text_or_error() # pyflakes:ignore
if content and content != utext and not 'Error; cannot read' in content:
debug.show('content[:64]')

View file

@ -72,7 +72,7 @@ def get_doc_writeup(doc):
writeup = latest.text
elif doc.type_id == 'conflrev':
path = os.path.join(doc.get_file_path(),doc.filename_with_rev())
writeup = get_document_content(doc.name,path,split=False,markup=False).decode('utf-8')
writeup = get_document_content(doc.name,path,split=False,markup=False)
utext = doc.text_or_error() # pyflakes:ignore
if writeup and writeup != utext and not 'Error; cannot read' in writeup:
debug.show('writeup[:64]')

View file

@ -131,5 +131,13 @@ def maybe_split(text, split=True, pos=5000):
text = text[:n+1]
return text
def decode(raw):
assert isinstance(raw, six.binary_type)
try:
text = raw.decode('utf-8')
except UnicodeDecodeError:
# if this fails, don't catch the exception here; let it propagate
text = raw.decode('latin-1')
#
return text