Skip to content

Commit

Permalink
perf: PyTracer improvements (#1388)
Browse files Browse the repository at this point in the history
* cache the bound method of _trace on self

this speeds up pure python tracing because we don't have to re-create a
bound method object all the time

* optimize checking whether a file should be traced

the optimization works based on the following heuristic: in a
majority of cases, functions call other functions in the same file. In
that situation we don't have to re-check whether we should trace the
file

* fix optimization in the presence of contexts

* fix too long line
  • Loading branch information
cfbolz authored May 30, 2022
1 parent f9a74c7 commit f40da64
Showing 1 changed file with 41 additions and 24 deletions.
65 changes: 41 additions & 24 deletions coverage/pytracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def __init__(self):
# On exit, self.in_atexit = True
atexit.register(setattr, self, 'in_atexit', True)

# cache a bound method on the instance, so that we don't have to
# re-create a bound method object all the time
self._cached_bound_method_trace = self._trace


def __repr__(self):
return "<PyTracer at 0x{:x}: {} lines in {} files>".format(
id(self),
Expand Down Expand Up @@ -105,7 +110,7 @@ def _trace(self, frame, event, arg_unused):

#self.log(":", frame.f_code.co_filename, frame.f_lineno, frame.f_code.co_name + "()", event)

if (self.stopped and sys.gettrace() == self._trace): # pylint: disable=comparison-with-callable
if (self.stopped and sys.gettrace() == self._cached_bound_method_trace): # pylint: disable=comparison-with-callable
# The PyTrace.stop() method has been called, possibly by another
# thread, let's deactivate ourselves now.
if 0:
Expand All @@ -129,12 +134,13 @@ def _trace(self, frame, event, arg_unused):
context_maybe = self.should_start_context(frame)
if context_maybe is not None:
self.context = context_maybe
self.started_context = True
started_context = True
self.switch_context(self.context)
else:
self.started_context = False
started_context = False
else:
self.started_context = False
started_context = False
self.started_context = started_context

# Entering a new frame. Decide if we should trace in this file.
self._activity = True
Expand All @@ -143,23 +149,33 @@ def _trace(self, frame, event, arg_unused):
self.cur_file_data,
self.cur_file_name,
self.last_line,
self.started_context,
started_context,
)
)

# Improve tracing performance: when calling a function, both caller
# and callee are often within the same file. if that's the case, we
# don't have to re-check whether to trace the corresponding
# function (which is a little bit espensive since it involves
# dictionary lookups). This optimization is only correct if we
# didn't start a context.
filename = frame.f_code.co_filename
self.cur_file_name = filename
disp = self.should_trace_cache.get(filename)
if disp is None:
disp = self.should_trace(filename, frame)
self.should_trace_cache[filename] = disp

self.cur_file_data = None
if disp.trace:
tracename = disp.source_filename
if tracename not in self.data:
self.data[tracename] = set()
self.cur_file_data = self.data[tracename]
else:
if filename != self.cur_file_name or started_context:
self.cur_file_name = filename
disp = self.should_trace_cache.get(filename)
if disp is None:
disp = self.should_trace(filename, frame)
self.should_trace_cache[filename] = disp

self.cur_file_data = None
if disp.trace:
tracename = disp.source_filename
if tracename not in self.data:
self.data[tracename] = set()
self.cur_file_data = self.data[tracename]
else:
frame.f_trace_lines = False
elif not self.cur_file_data:
frame.f_trace_lines = False

# The call event is really a "start frame" event, and happens for
Expand Down Expand Up @@ -225,7 +241,7 @@ def _trace(self, frame, event, arg_unused):
if self.started_context:
self.context = None
self.switch_context(None)
return self._trace
return self._cached_bound_method_trace

def start(self):
"""Start this Tracer.
Expand All @@ -243,10 +259,10 @@ def start(self):
# function, but we are marked as running again, so maybe it
# will be ok?
#self.log("~", "starting on different threads")
return self._trace
return self._cached_bound_method_trace

sys.settrace(self._trace)
return self._trace
sys.settrace(self._cached_bound_method_trace)
return self._cached_bound_method_trace

def stop(self):
"""Stop this Tracer."""
Expand All @@ -271,9 +287,10 @@ def stop(self):
# so don't warn if we are in atexit on PyPy and the trace function
# has changed to None.
dont_warn = (env.PYPY and env.PYPYVERSION >= (5, 4) and self.in_atexit and tf is None)
if (not dont_warn) and tf != self._trace: # pylint: disable=comparison-with-callable
if (not dont_warn) and tf != self._cached_bound_method_trace: # pylint: disable=comparison-with-callable
self.warn(
f"Trace function changed, data is likely wrong: {tf!r} != {self._trace!r}",
f"Trace function changed, data is likely wrong: "
f"{tf!r} != {self._cached_bound_method_trace!r}",
slug="trace-changed",
)

Expand Down

0 comments on commit f40da64

Please sign in to comment.