From a3671e45ce0465f1776be1293dee4930d874511d Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 13 Jul 2020 18:28:17 -0700 Subject: [PATCH] Skip coercing to `bytes` in `merge_frames` As the frames we receive are typically mutable, non-`bytes` objects like `bytearray`s or NumPy `ndarray`s, coercing to `bytes` at this stage triggers a copy of all frames. As we are going to toss those copied versions anyways when joining them into a larger `bytes` object, this ends up being wasteful with memory. Fortunately `bytes.join(...)` accepts any and all `bytes`-like objects. So instead just pass them all through as-is to `bytes.join(...)`, which is free and doesn't require a copy. Should cutdown on the memory usage in this part of the code. --- distributed/protocol/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distributed/protocol/utils.py b/distributed/protocol/utils.py index fa020dae909..defbda2ba4f 100644 --- a/distributed/protocol/utils.py +++ b/distributed/protocol/utils.py @@ -1,7 +1,7 @@ import struct import msgpack -from ..utils import ensure_bytes, nbytes +from ..utils import nbytes BIG_BYTES_SHARD_SIZE = 2 ** 26 @@ -84,7 +84,7 @@ def merge_frames(header, frames): if len(L) == 1: # no work necessary out.extend(L) else: - out.append(b"".join(map(ensure_bytes, L))) + out.append(b"".join(L)) return out