fix: quicker calculation of status from draft text (#8111)
* fix: quicker calculation of status from draft text * chore: remove unused import * fix: only read a small prefix of draft text when needed
This commit is contained in:
parent
8a4d020268
commit
b926178e62
|
@ -530,7 +530,7 @@ class DocumentInfo(models.Model):
|
|||
def replaced_by(self):
|
||||
return set([ r.document for r in self.related_that("replaces") ])
|
||||
|
||||
def text(self):
|
||||
def text(self, size = -1):
|
||||
path = self.get_file_name()
|
||||
root, ext = os.path.splitext(path)
|
||||
txtpath = root+'.txt'
|
||||
|
@ -538,14 +538,21 @@ class DocumentInfo(models.Model):
|
|||
path = txtpath
|
||||
try:
|
||||
with io.open(path, 'rb') as file:
|
||||
raw = file.read()
|
||||
raw = file.read(size)
|
||||
except IOError:
|
||||
return None
|
||||
text = None
|
||||
try:
|
||||
text = raw.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
text = raw.decode('latin-1')
|
||||
#
|
||||
for back in range(1,4):
|
||||
try:
|
||||
text = raw[:-back].decode('utf-8')
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if text is None:
|
||||
text = raw.decode('latin-1')
|
||||
return text
|
||||
|
||||
def text_or_error(self):
|
||||
|
|
|
@ -84,7 +84,7 @@ from ietf.review.models import ReviewAssignment
|
|||
from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs
|
||||
from ietf.review.utils import no_review_from_teams_on_doc
|
||||
from ietf.utils import markup_txt, log, markdown
|
||||
from ietf.utils.draft import PlaintextDraft
|
||||
from ietf.utils.draft import get_status_from_draft_text
|
||||
from ietf.utils.meetecho import MeetechoAPIError, SlidesManager
|
||||
from ietf.utils.response import permission_denied
|
||||
from ietf.utils.text import maybe_split
|
||||
|
@ -2261,12 +2261,11 @@ def idnits2_state(request, name, rev=None):
|
|||
elif doc.intended_std_level:
|
||||
doc.deststatus = doc.intended_std_level.name
|
||||
else:
|
||||
text = doc.text()
|
||||
# 10000 is a conservative prefix on number of utf-8 encoded bytes to
|
||||
# cover at least the first 10 lines of characters
|
||||
text = doc.text(size=10000)
|
||||
if text:
|
||||
parsed_draft = PlaintextDraft(
|
||||
text=doc.text(), source=name, name_from_source=False
|
||||
)
|
||||
doc.deststatus = parsed_draft.get_status()
|
||||
doc.deststatus = get_status_from_draft_text(text)
|
||||
else:
|
||||
doc.deststatus = "Unknown"
|
||||
return render(
|
||||
|
|
|
@ -131,6 +131,24 @@ def acronym_match(s, l):
|
|||
#_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym))
|
||||
return s == acronym
|
||||
|
||||
def get_status_from_draft_text(text):
|
||||
|
||||
# Take prefix to shortcut work over very large drafts
|
||||
# 5000 is conservatively much more than a full page of characters and we
|
||||
# only want the first 10 lines.
|
||||
text = text.strip()[:5000] # Take prefix to shortcut work over very large drafts
|
||||
text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis
|
||||
text = text.replace("\r\n", "\n") # Convert DOS to unix
|
||||
text = text.replace("\r", "\n") # Convert MAC to unix
|
||||
lines = text.split("\n")[:10]
|
||||
status = None
|
||||
for line in lines:
|
||||
status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?) ", line)
|
||||
if status_match:
|
||||
status = status_match.group(1)
|
||||
break
|
||||
return status
|
||||
|
||||
class Draft:
|
||||
"""Base class for drafts
|
||||
|
||||
|
|
Loading…
Reference in a new issue