fix: quicker calculation of status from draft text (#8111)

* fix: quicker calculation of status from draft text

* chore: remove unused import

* fix: only read a small prefix of draft text when needed
This commit is contained in:
Robert Sparks 2024-10-29 11:18:31 -05:00 committed by GitHub
parent 8a4d020268
commit b926178e62
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 34 additions and 10 deletions

View file

@ -530,7 +530,7 @@ class DocumentInfo(models.Model):
def replaced_by(self):
return set([ r.document for r in self.related_that("replaces") ])
def text(self):
def text(self, size = -1):
path = self.get_file_name()
root, ext = os.path.splitext(path)
txtpath = root+'.txt'
@ -538,14 +538,21 @@ class DocumentInfo(models.Model):
path = txtpath
try:
with io.open(path, 'rb') as file:
raw = file.read()
raw = file.read(size)
except IOError:
return None
text = None
try:
text = raw.decode('utf-8')
except UnicodeDecodeError:
text = raw.decode('latin-1')
#
for back in range(1,4):
try:
text = raw[:-back].decode('utf-8')
break
except UnicodeDecodeError:
pass
if text is None:
text = raw.decode('latin-1')
return text
def text_or_error(self):

View file

@ -84,7 +84,7 @@ from ietf.review.models import ReviewAssignment
from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs
from ietf.review.utils import no_review_from_teams_on_doc
from ietf.utils import markup_txt, log, markdown
from ietf.utils.draft import PlaintextDraft
from ietf.utils.draft import get_status_from_draft_text
from ietf.utils.meetecho import MeetechoAPIError, SlidesManager
from ietf.utils.response import permission_denied
from ietf.utils.text import maybe_split
@ -2261,12 +2261,11 @@ def idnits2_state(request, name, rev=None):
elif doc.intended_std_level:
doc.deststatus = doc.intended_std_level.name
else:
text = doc.text()
# 10000 is a conservative prefix on number of utf-8 encoded bytes to
# cover at least the first 10 lines of characters
text = doc.text(size=10000)
if text:
parsed_draft = PlaintextDraft(
text=doc.text(), source=name, name_from_source=False
)
doc.deststatus = parsed_draft.get_status()
doc.deststatus = get_status_from_draft_text(text)
else:
doc.deststatus = "Unknown"
return render(

View file

@ -131,6 +131,24 @@ def acronym_match(s, l):
#_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym))
return s == acronym
def get_status_from_draft_text(text):
# Take prefix to shortcut work over very large drafts
# 5000 is conservatively much more than a full page of characters and we
# only want the first 10 lines.
text = text.strip()[:5000] # Take prefix to shortcut work over very large drafts
text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis
text = text.replace("\r\n", "\n") # Convert DOS to unix
text = text.replace("\r", "\n") # Convert MAC to unix
lines = text.split("\n")[:10]
status = None
for line in lines:
status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?) ", line)
if status_match:
status = status_match.group(1)
break
return status
class Draft:
"""Base class for drafts