Skip to content

Commit

Permalink
fix: quicker calculation of status from draft text (#8111)
Browse files Browse the repository at this point in the history
* fix: quicker calculation of status from draft text

* chore: remove unused import

* fix: only read a small prefix of draft text when needed
  • Loading branch information
rjsparks authored Oct 29, 2024
1 parent 8a4d020 commit b926178
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
15 changes: 11 additions & 4 deletions ietf/doc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,22 +530,29 @@ def replaces(self):
def replaced_by(self):
return set([ r.document for r in self.related_that("replaces") ])

def text(self):
def text(self, size = -1):
path = self.get_file_name()
root, ext = os.path.splitext(path)
txtpath = root+'.txt'
if ext != '.txt' and os.path.exists(txtpath):
path = txtpath
try:
with io.open(path, 'rb') as file:
raw = file.read()
raw = file.read(size)
except IOError:
return None
text = None
try:
text = raw.decode('utf-8')
except UnicodeDecodeError:
text = raw.decode('latin-1')
#
for back in range(1,4):
try:
text = raw[:-back].decode('utf-8')
break
except UnicodeDecodeError:
pass
if text is None:
text = raw.decode('latin-1')
return text

def text_or_error(self):
Expand Down
11 changes: 5 additions & 6 deletions ietf/doc/views_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs
from ietf.review.utils import no_review_from_teams_on_doc
from ietf.utils import markup_txt, log, markdown
from ietf.utils.draft import PlaintextDraft
from ietf.utils.draft import get_status_from_draft_text
from ietf.utils.meetecho import MeetechoAPIError, SlidesManager
from ietf.utils.response import permission_denied
from ietf.utils.text import maybe_split
Expand Down Expand Up @@ -2261,12 +2261,11 @@ def idnits2_state(request, name, rev=None):
elif doc.intended_std_level:
doc.deststatus = doc.intended_std_level.name
else:
text = doc.text()
# 10000 is a conservative prefix on number of utf-8 encoded bytes to
# cover at least the first 10 lines of characters
text = doc.text(size=10000)
if text:
parsed_draft = PlaintextDraft(
text=doc.text(), source=name, name_from_source=False
)
doc.deststatus = parsed_draft.get_status()
doc.deststatus = get_status_from_draft_text(text)
else:
doc.deststatus = "Unknown"
return render(
Expand Down
18 changes: 18 additions & 0 deletions ietf/utils/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,24 @@ def acronym_match(s, l):
#_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym))
return s == acronym

def get_status_from_draft_text(text):

# Take prefix to shortcut work over very large drafts
# 5000 is conservatively much more than a full page of characters and we
# only want the first 10 lines.
text = text.strip()[:5000] # Take prefix to shortcut work over very large drafts
text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis
text = text.replace("\r\n", "\n") # Convert DOS to unix
text = text.replace("\r", "\n") # Convert MAC to unix
lines = text.split("\n")[:10]
status = None
for line in lines:
status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?) ", line)
if status_match:
status = status_match.group(1)
break
return status

class Draft:
"""Base class for drafts
Expand Down

0 comments on commit b926178

Please sign in to comment.