From bc209e52081bc0faa5fe58d81673fb2ecfd283a0 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Thu, 27 Apr 2023 12:02:45 -0400 Subject: [PATCH] fix(profiling): Do not keep reference to frame to prevent memory leak (#2049) The profiler can capture frames from it's own thread. When it does so, it holds on to a reference to the frame in the previous sample. One of the frames it holds on it is a frame from the profiler itself, which prevents the references to other frames to other frames from being freed. A consequence of this is that the local variables of those frames are not able to be freed either. This change ensures we do not keep a reference to the profiler around in order to prevent this issue. --- mypy.ini | 2 + sentry_sdk/_lru_cache.py | 156 +++++++++++++++++++++++++++++++++++++++ sentry_sdk/profiler.py | 138 ++++++++++++++-------------------- tests/test_lru_cache.py | 37 ++++++++++ tests/test_profiler.py | 60 ++++++++------- 5 files changed, 283 insertions(+), 110 deletions(-) create mode 100644 sentry_sdk/_lru_cache.py create mode 100644 tests/test_lru_cache.py diff --git a/mypy.ini b/mypy.ini index b23e18f66a..fef90c867e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -59,6 +59,8 @@ ignore_missing_imports = True [mypy-sentry_sdk._queue] ignore_missing_imports = True disallow_untyped_defs = False +[mypy-sentry_sdk._lru_cache] +disallow_untyped_defs = False [mypy-celery.app.trace] ignore_missing_imports = True [mypy-flask.signals] diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py new file mode 100644 index 0000000000..91cf55d09a --- /dev/null +++ b/sentry_sdk/_lru_cache.py @@ -0,0 +1,156 @@ +""" +A fork of Python 3.6's stdlib lru_cache (found in Python's 'cpython/Lib/functools.py') +adapted into a data structure for single threaded uses. + +https://github.com/python/cpython/blob/v3.6.12/Lib/functools.py + + +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; + +All Rights Reserved + + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + +""" + +SENTINEL = object() + + +# aliases to the entries in a node +PREV = 0 +NEXT = 1 +KEY = 2 +VALUE = 3 + + +class LRUCache(object): + def __init__(self, max_size): + assert max_size > 0 + + self.max_size = max_size + self.full = False + + self.cache = {} + + # root of the circularly linked list to keep track of + # the least recently used key + self.root = [] # type: ignore + # the node looks like [PREV, NEXT, KEY, VALUE] + self.root[:] = [self.root, self.root, None, None] + + self.hits = self.misses = 0 + + def set(self, key, value): + link = self.cache.get(key, SENTINEL) + + if link is not SENTINEL: + # have to move the node to the front of the linked list + link_prev, link_next, _key, _value = link + + # first remove the node from the lsnked list + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + + # insert the node between the root and the last + last = self.root[PREV] + last[NEXT] = self.root[PREV] = link + link[PREV] = last + link[NEXT] = self.root + + # update the value + link[VALUE] = value + + elif self.full: + # reuse the root node, so update its key/value + old_root = self.root + old_root[KEY] = key + old_root[VALUE] = value + + self.root = old_root[NEXT] + old_key = self.root[KEY] + + self.root[KEY] = self.root[VALUE] = None + + del self.cache[old_key] + + self.cache[key] = old_root + + else: + # insert new node after last + last = self.root[PREV] + link = [last, self.root, key, value] + last[NEXT] = self.root[PREV] = self.cache[key] = link + self.full = len(self.cache) >= self.max_size + + def get(self, key, default=None): + link = self.cache.get(key, SENTINEL) + + if link is SENTINEL: + self.misses += 1 + return default + + # have to move the node to the front of the linked list + link_prev, link_next, _key, _value = link + + # first remove the node from the lsnked list + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + + # insert the node between the root and the last + last = self.root[PREV] + last[NEXT] = self.root[PREV] = link + link[PREV] = last + link[NEXT] = self.root + + self.hits += 1 + + return link[VALUE] diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 2ce6e01a2f..ee74a86e52 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -37,6 +37,7 @@ import sentry_sdk from sentry_sdk._compat import PY33, PY311 +from sentry_sdk._lru_cache import LRUCache from sentry_sdk._types import TYPE_CHECKING from sentry_sdk.utils import ( capture_internal_exception, @@ -65,19 +66,6 @@ ThreadId = str - # The exact value of this id is not very meaningful. The purpose - # of this id is to give us a compact and unique identifier for a - # raw stack that can be used as a key to a dictionary so that it - # can be used during the sampled format generation. - RawStackId = Tuple[int, int] - - RawFrame = Tuple[ - str, # abs_path - int, # lineno - ] - RawStack = Tuple[RawFrame, ...] - RawSample = Sequence[Tuple[str, Tuple[RawStackId, RawStack, Deque[FrameType]]]] - ProcessedSample = TypedDict( "ProcessedSample", { @@ -120,6 +108,21 @@ {"profile_id": str}, ) + FrameId = Tuple[ + str, # abs_path + int, # lineno + ] + FrameIds = Tuple[FrameId, ...] + + # The exact value of this id is not very meaningful. The purpose + # of this id is to give us a compact and unique identifier for a + # raw stack that can be used as a key to a dictionary so that it + # can be used during the sampled format generation. + StackId = Tuple[int, int] + + ExtractedStack = Tuple[StackId, FrameIds, List[ProcessedFrame]] + ExtractedSample = Sequence[Tuple[ThreadId, ExtractedStack]] + try: from gevent import get_hub as get_gevent_hub # type: ignore @@ -244,12 +247,16 @@ def teardown_profiler(): MAX_STACK_DEPTH = 128 +CWD = os.getcwd() + + def extract_stack( - frame, # type: Optional[FrameType] - prev_cache=None, # type: Optional[Tuple[RawStackId, RawStack, Deque[FrameType]]] + raw_frame, # type: Optional[FrameType] + cache, # type: LRUCache + cwd=CWD, # type: str max_stack_depth=MAX_STACK_DEPTH, # type: int ): - # type: (...) -> Tuple[RawStackId, RawStack, Deque[FrameType]] + # type: (...) -> ExtractedStack """ Extracts the stack starting the specified frame. The extracted stack assumes the specified frame is the top of the stack, and works back @@ -259,31 +266,21 @@ def extract_stack( only the first `MAX_STACK_DEPTH` frames will be returned. """ - frames = deque(maxlen=max_stack_depth) # type: Deque[FrameType] + raw_frames = deque(maxlen=max_stack_depth) # type: Deque[FrameType] - while frame is not None: - f_back = frame.f_back - frames.append(frame) - frame = f_back + while raw_frame is not None: + f_back = raw_frame.f_back + raw_frames.append(raw_frame) + raw_frame = f_back - if prev_cache is None: - stack = tuple(frame_key(frame) for frame in frames) - else: - _, prev_stack, prev_frames = prev_cache - prev_depth = len(prev_frames) - depth = len(frames) - - # We want to match the frame found in this sample to the frames found in the - # previous sample. If they are the same (using the `is` operator), we can - # skip the expensive work of extracting the frame information and reuse what - # we extracted during the last sample. - # - # Make sure to keep in mind that the stack is ordered from the inner most - # from to the outer most frame so be careful with the indexing. - stack = tuple( - prev_stack[i] if i >= 0 and frame is prev_frames[i] else frame_key(frame) - for i, frame in zip(range(prev_depth - depth, prev_depth), frames) - ) + frame_ids = tuple(frame_id(raw_frame) for raw_frame in raw_frames) + frames = [] + for i, fid in enumerate(frame_ids): + frame = cache.get(fid) + if frame is None: + frame = extract_frame(raw_frames[i], cwd) + cache.set(fid, frame) + frames.append(frame) # Instead of mapping the stack into frame ids and hashing # that as a tuple, we can directly hash the stack. @@ -296,14 +293,14 @@ def extract_stack( # To Reduce the likelihood of hash collisions, we include # the stack depth. This means that only stacks of the same # depth can suffer from hash collisions. - stack_id = len(stack), hash(stack) + stack_id = len(raw_frames), hash(frame_ids) - return stack_id, stack, frames + return stack_id, frame_ids, frames -def frame_key(frame): - # type: (FrameType) -> RawFrame - return (frame.f_code.co_filename, frame.f_lineno) +def frame_id(raw_frame): + # type: (FrameType) -> FrameId + return (raw_frame.f_code.co_filename, raw_frame.f_lineno) def extract_frame(frame, cwd): @@ -472,8 +469,8 @@ def __init__( self.stop_ns = 0 # type: int self.active = False # type: bool - self.indexed_frames = {} # type: Dict[RawFrame, int] - self.indexed_stacks = {} # type: Dict[RawStackId, int] + self.indexed_frames = {} # type: Dict[FrameId, int] + self.indexed_stacks = {} # type: Dict[StackId, int] self.frames = [] # type: List[ProcessedFrame] self.stacks = [] # type: List[ProcessedStack] self.samples = [] # type: List[ProcessedSample] @@ -613,8 +610,8 @@ def __exit__(self, ty, value, tb): scope.profile = old_profile - def write(self, cwd, ts, sample, frame_cache): - # type: (str, int, RawSample, Dict[RawFrame, ProcessedFrame]) -> None + def write(self, ts, sample): + # type: (int, ExtractedSample) -> None if not self.active: return @@ -630,23 +627,19 @@ def write(self, cwd, ts, sample, frame_cache): elapsed_since_start_ns = str(offset) - for tid, (stack_id, raw_stack, frames) in sample: + for tid, (stack_id, frame_ids, frames) in sample: try: # Check if the stack is indexed first, this lets us skip # indexing frames if it's not necessary if stack_id not in self.indexed_stacks: - for i, raw_frame in enumerate(raw_stack): - if raw_frame not in self.indexed_frames: - self.indexed_frames[raw_frame] = len(self.indexed_frames) - processed_frame = frame_cache.get(raw_frame) - if processed_frame is None: - processed_frame = extract_frame(frames[i], cwd) - frame_cache[raw_frame] = processed_frame - self.frames.append(processed_frame) + for i, frame_id in enumerate(frame_ids): + if frame_id not in self.indexed_frames: + self.indexed_frames[frame_id] = len(self.indexed_frames) + self.frames.append(frames[i]) self.indexed_stacks[stack_id] = len(self.indexed_stacks) self.stacks.append( - [self.indexed_frames[raw_frame] for raw_frame in raw_stack] + [self.indexed_frames[frame_id] for frame_id in frame_ids] ) self.samples.append( @@ -791,12 +784,7 @@ def make_sampler(self): # type: () -> Callable[..., None] cwd = os.getcwd() - # In Python3+, we can use the `nonlocal` keyword to rebind the value, - # but this is not possible in Python2. To get around this, we wrap - # the value in a list to allow updating this value each sample. - last_sample = [ - {} - ] # type: List[Dict[int, Tuple[RawStackId, RawStack, Deque[FrameType]]]] + cache = LRUCache(max_size=256) def _sample_stack(*args, **kwargs): # type: (*Any, **Any) -> None @@ -808,7 +796,6 @@ def _sample_stack(*args, **kwargs): if not self.new_profiles and not self.active_profiles: # make sure to clear the cache if we're not profiling so we dont # keep a reference to the last stack of frames around - last_sample[0] = {} return # This is the number of profiles we want to pop off. @@ -824,27 +811,16 @@ def _sample_stack(*args, **kwargs): now = nanosecond_time() try: - raw_sample = { - tid: extract_stack(frame, last_sample[0].get(tid)) + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) for tid, frame in sys._current_frames().items() - } + ] except AttributeError: # For some reason, the frame we get doesn't have certain attributes. # When this happens, we abandon the current sample as it's bad. capture_internal_exception(sys.exc_info()) - - # make sure to clear the cache if something went wrong when extracting - # the stack so we dont keep a reference to the last stack of frames around - last_sample[0] = {} - return - # make sure to update the last sample so the cache has - # the most recent stack for better cache hits - last_sample[0] = raw_sample - - sample = [(str(tid), data) for tid, data in raw_sample.items()] - # Move the new profiles into the active_profiles set. # # We cannot directly add the to active_profiles set @@ -860,11 +836,9 @@ def _sample_stack(*args, **kwargs): inactive_profiles = [] - frame_cache = {} # type: Dict[RawFrame, ProcessedFrame] - for profile in self.active_profiles: if profile.active: - profile.write(cwd, now, sample, frame_cache) + profile.write(now, sample) else: # If a thread is marked inactive, we buffer it # to `inactive_profiles` so it can be removed. diff --git a/tests/test_lru_cache.py b/tests/test_lru_cache.py new file mode 100644 index 0000000000..5343e76169 --- /dev/null +++ b/tests/test_lru_cache.py @@ -0,0 +1,37 @@ +import pytest + +from sentry_sdk._lru_cache import LRUCache + + +@pytest.mark.parametrize("max_size", [-10, -1, 0]) +def test_illegal_size(max_size): + with pytest.raises(AssertionError): + LRUCache(max_size=max_size) + + +def test_simple_set_get(): + cache = LRUCache(1) + assert cache.get(1) is None + cache.set(1, 1) + assert cache.get(1) == 1 + + +def test_overwrite(): + cache = LRUCache(1) + assert cache.get(1) is None + cache.set(1, 1) + assert cache.get(1) == 1 + cache.set(1, 2) + assert cache.get(1) == 2 + + +def test_cache_eviction(): + cache = LRUCache(3) + cache.set(1, 1) + cache.set(2, 2) + cache.set(3, 3) + assert cache.get(1) == 1 + assert cache.get(2) == 2 + cache.set(4, 4) + assert cache.get(3) is None + assert cache.get(4) == 4 diff --git a/tests/test_profiler.py b/tests/test_profiler.py index b0e8925be4..11ece9821e 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -20,6 +20,7 @@ setup_profiler, ) from sentry_sdk.tracing import Transaction +from sentry_sdk._lru_cache import LRUCache from sentry_sdk._queue import Queue try: @@ -472,35 +473,40 @@ def test_extract_stack_with_max_depth(depth, max_stack_depth, actual_depth): # increase the max_depth by the `base_stack_depth` to account # for the extra frames pytest will add - _, stack, frames = extract_stack( - frame, max_stack_depth=max_stack_depth + base_stack_depth + _, frame_ids, frames = extract_stack( + frame, LRUCache(max_size=1), max_stack_depth=max_stack_depth + base_stack_depth ) - assert len(stack) == base_stack_depth + actual_depth + assert len(frame_ids) == base_stack_depth + actual_depth assert len(frames) == base_stack_depth + actual_depth for i in range(actual_depth): - assert get_frame_name(frames[i]) == "get_frame", i + assert frames[i]["function"] == "get_frame", i # index 0 contains the inner most frame on the stack, so the lamdba # should be at index `actual_depth` if sys.version_info >= (3, 11): assert ( - get_frame_name(frames[actual_depth]) + frames[actual_depth]["function"] == "test_extract_stack_with_max_depth.." ), actual_depth else: - assert get_frame_name(frames[actual_depth]) == "", actual_depth + assert frames[actual_depth]["function"] == "", actual_depth -def test_extract_stack_with_cache(): - frame = get_frame(depth=1) - - prev_cache = extract_stack(frame) - _, stack1, _ = prev_cache - _, stack2, _ = extract_stack(frame, prev_cache) - - assert len(stack1) == len(stack2) - for i, (frame1, frame2) in enumerate(zip(stack1, stack2)): +@pytest.mark.parametrize( + ("frame", "depth"), + [(get_frame(depth=1), len(inspect.stack()))], +) +def test_extract_stack_with_cache(frame, depth): + # make sure cache has enough room or this test will fail + cache = LRUCache(max_size=depth) + _, _, frames1 = extract_stack(frame, cache) + _, _, frames2 = extract_stack(frame, cache) + + assert len(frames1) > 0 + assert len(frames2) > 0 + assert len(frames1) == len(frames2) + for i, (frame1, frame2) in enumerate(zip(frames1, frames2)): # DO NOT use `==` for the assertion here since we are # testing for identity, and using `==` would test for # equality which would always pass since we're extract @@ -629,9 +635,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ) @mock.patch("sentry_sdk.profiler.MAX_PROFILE_DURATION_NS", 1) def test_max_profile_duration_reached(scheduler_class): - sample = [("1", extract_stack(get_frame()))] - - cwd = os.getcwd() + sample = [("1", extract_stack(get_frame(), LRUCache(max_size=1)))] with scheduler_class(frequency=1000) as scheduler: transaction = Transaction(sampled=True) @@ -640,15 +644,15 @@ def test_max_profile_duration_reached(scheduler_class): assert profile.active # write a sample at the start time, so still active - profile.write(cwd, profile.start_ns + 0, sample, {}) + profile.write(profile.start_ns + 0, sample) assert profile.active # write a sample at max time, so still active - profile.write(cwd, profile.start_ns + 1, sample, {}) + profile.write(profile.start_ns + 1, sample) assert profile.active # write a sample PAST the max time, so now inactive - profile.write(cwd, profile.start_ns + 2, sample, {}) + profile.write(profile.start_ns + 2, sample) assert not profile.active @@ -675,8 +679,8 @@ def ensure_running(self): sample_stacks = [ - extract_stack(get_frame(), max_stack_depth=1), - extract_stack(get_frame(), max_stack_depth=2), + extract_stack(get_frame(), LRUCache(max_size=1), max_stack_depth=1), + extract_stack(get_frame(), LRUCache(max_size=1), max_stack_depth=2), ] @@ -706,7 +710,7 @@ def ensure_running(self): pytest.param( [(0, [("1", sample_stacks[0])])], { - "frames": [extract_frame(sample_stacks[0][2][0], os.getcwd())], + "frames": [sample_stacks[0][2][0]], "samples": [ { "elapsed_since_start_ns": "0", @@ -725,7 +729,7 @@ def ensure_running(self): (1, [("1", sample_stacks[0])]), ], { - "frames": [extract_frame(sample_stacks[0][2][0], os.getcwd())], + "frames": [sample_stacks[0][2][0]], "samples": [ { "elapsed_since_start_ns": "0", @@ -750,8 +754,8 @@ def ensure_running(self): ], { "frames": [ - extract_frame(sample_stacks[0][2][0], os.getcwd()), - extract_frame(sample_stacks[1][2][0], os.getcwd()), + sample_stacks[0][2][0], + sample_stacks[1][2][0], ], "samples": [ { @@ -785,7 +789,7 @@ def test_profile_processing( # force the sample to be written at a time relative to the # start of the profile now = profile.start_ns + ts - profile.write(os.getcwd(), now, sample, {}) + profile.write(now, sample) processed = profile.process()