fix: quicker calculation of status from draft text (#8111)

* fix: quicker calculation of status from draft text * chore: remove unused import * fix: only read a small prefix of draft text when needed
ietf-tools · Oct 29, 2024 · b926178 · b926178
1 parent 8a4d020
commit b926178
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 10 deletions.
diff --git a/ietf/doc/models.py b/ietf/doc/models.py
@@ -530,22 +530,29 @@ def replaces(self):
     def replaced_by(self):
         return set([ r.document for r in self.related_that("replaces") ])
 
-    def text(self):
+    def text(self, size = -1):
         path = self.get_file_name()
         root, ext =  os.path.splitext(path)
         txtpath = root+'.txt'
         if ext != '.txt' and os.path.exists(txtpath):
             path = txtpath
         try:
             with io.open(path, 'rb') as file:
-                raw = file.read()
+                raw = file.read(size)
         except IOError:
             return None
+        text = None
         try:
             text = raw.decode('utf-8')
         except UnicodeDecodeError:
-            text = raw.decode('latin-1')
-        #
+            for back in range(1,4):
+                try:
+                    text = raw[:-back].decode('utf-8')
+                    break
+                except UnicodeDecodeError:
+                    pass
+            if text is None:
+                text = raw.decode('latin-1')
         return text
 
     def text_or_error(self):

diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py
@@ -84,7 +84,7 @@
 from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs
 from ietf.review.utils import no_review_from_teams_on_doc
 from ietf.utils import markup_txt, log, markdown
-from ietf.utils.draft import PlaintextDraft
+from ietf.utils.draft import get_status_from_draft_text
 from ietf.utils.meetecho import MeetechoAPIError, SlidesManager
 from ietf.utils.response import permission_denied
 from ietf.utils.text import maybe_split
@@ -2261,12 +2261,11 @@ def idnits2_state(request, name, rev=None):
     elif doc.intended_std_level:
         doc.deststatus = doc.intended_std_level.name
     else:
-        text = doc.text()
+         # 10000 is a conservative prefix on number of utf-8 encoded bytes to 
+         # cover at least the first 10 lines of characters
+        text = doc.text(size=10000)
         if text:
-            parsed_draft = PlaintextDraft(
-                text=doc.text(), source=name, name_from_source=False
-            )
-            doc.deststatus = parsed_draft.get_status()
+            doc.deststatus = get_status_from_draft_text(text)
         else:
             doc.deststatus = "Unknown"
     return render(

diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py
@@ -131,6 +131,24 @@ def acronym_match(s, l):
     #_debug(" s:%s; l:%s => %s; %s" % (s, l, acronym, s==acronym)) 
     return s == acronym
 
+def get_status_from_draft_text(text):
+
+    # Take prefix to shortcut work over very large drafts
+    # 5000 is conservatively much more than a full page of characters and we
+    # only want the first 10 lines.
+    text = text.strip()[:5000] # Take prefix to shortcut work over very large drafts 
+    text = re.sub(".\x08", "", text)    # Get rid of inkribbon backspace-emphasis
+    text = text.replace("\r\n", "\n")   # Convert DOS to unix
+    text = text.replace("\r", "\n")     # Convert MAC to unix
+    lines = text.split("\n")[:10]
+    status = None
+    for line in lines:
+        status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?)   ", line)
+        if status_match:
+            status = status_match.group(1)
+            break
+    return status
+
 class Draft:
     """Base class for drafts