Skip to content

Commit

Permalink
gh-103285: Rewrite _splitlines_no_ff to improve performance (#103307)
Browse files Browse the repository at this point in the history
  • Loading branch information
gaogaotiantian authored Apr 24, 2023
1 parent f0ed293 commit 3686013
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 18 deletions.
26 changes: 8 additions & 18 deletions Lib/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
:license: Python License.
"""
import sys
import re
from _ast import *
from contextlib import contextmanager, nullcontext
from enum import IntEnum, auto, _simple_enum
Expand Down Expand Up @@ -305,28 +306,17 @@ def get_docstring(node, clean=True):
return text


def _splitlines_no_ff(source):
_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
def _splitlines_no_ff(source, maxlines=None):
"""Split a string into lines ignoring form feed and other chars.
This mimics how the Python parser splits source code.
"""
idx = 0
lines = []
next_line = ''
while idx < len(source):
c = source[idx]
next_line += c
idx += 1
# Keep \r\n together
if c == '\r' and idx < len(source) and source[idx] == '\n':
next_line += '\n'
idx += 1
if c in '\r\n':
lines.append(next_line)
next_line = ''

if next_line:
lines.append(next_line)
for lineno, match in enumerate(_line_pattern.finditer(source), 1):
if maxlines is not None and lineno > maxlines:
break
lines.append(match[0])
return lines


Expand Down Expand Up @@ -360,7 +350,7 @@ def get_source_segment(source, node, *, padded=False):
except AttributeError:
return None

lines = _splitlines_no_ff(source)
lines = _splitlines_no_ff(source, maxlines=end_lineno+1)
if end_lineno == lineno:
return lines[lineno].encode()[col_offset:end_col_offset].decode()

Expand Down
11 changes: 11 additions & 0 deletions Lib/test/test_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2293,6 +2293,17 @@ class C:
cdef = ast.parse(s).body[0]
self.assertEqual(ast.get_source_segment(s, cdef.body[0], padded=True), s_method)

def test_source_segment_newlines(self):
s = 'def f():\n pass\ndef g():\r pass\r\ndef h():\r\n pass\r\n'
f, g, h = ast.parse(s).body
self._check_content(s, f, 'def f():\n pass')
self._check_content(s, g, 'def g():\r pass')
self._check_content(s, h, 'def h():\r\n pass')

s = 'def f():\n a = 1\r b = 2\r\n c = 3\n'
f = ast.parse(s).body[0]
self._check_content(s, f, s.rstrip())

def test_source_segment_missing_info(self):
s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n'
v, w, x, y = ast.parse(s).body
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance of :func:`ast.get_source_segment`.

0 comments on commit 3686013

Please sign in to comment.