Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More control over external data size #412

Merged
merged 5 commits into from
Jan 24, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/sparsezoo/utils/onnx/external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@

EXTERNAL_ONNX_DATA_NAME = "model.data"

# DUMP_EXTERNAL_DATA_TRESHOLD is a limiting value
# for the model saved with external data. If the model
# is larger than this value, it will be saved with external data.
# The threshold is expressed in bits and corresponds
# set to 500MB. This is roughly the size of
# 250 million parameters (assuming fp16).
DUMP_EXTERNAL_DATA_THRESHOLD = 4e9


def onnx_includes_external_data(model: ModelProto) -> bool:
"""
Expand Down Expand Up @@ -66,6 +74,7 @@ def onnx_includes_external_data(model: ModelProto) -> bool:
def save_onnx(
model: ModelProto,
model_path: str,
max_external_file_size: int = 16e9,
external_data_file: Optional[str] = None,
) -> bool:
"""
Expand All @@ -84,10 +93,15 @@ def save_onnx(
large to be saved as a single protobuf, and this argument is None,
the external data file will be coerced to take the default name
specified in the variable EXTERNAL_ONNX_DATA_NAME
:param max_external_file_size: The maximum file size in bytes of a single split
external data out file. Defaults to 16000000000 (16e9 = 16GB)
:return True if the model was saved with external data, False otherwise.
"""
if external_data_file is not None:
_LOGGER.debug(f"Saving with external data: {external_data_file}")
_LOGGER.debug(
f"Saving with external data, with file chunks of maximum size "
f"{max_external_file_size / 1e9} GB"
)
_check_for_old_external_data(
model_path=model_path, external_data_file=external_data_file
)
Expand All @@ -98,13 +112,15 @@ def save_onnx(
all_tensors_to_one_file=True,
location=external_data_file,
)
split_external_data(model_path, max_file_size=max_external_file_size)
return True

if model.ByteSize() > onnx.checker.MAXIMUM_PROTOBUF:
if model.ByteSize() > DUMP_EXTERNAL_DATA_THRESHOLD:
external_data_file = external_data_file or EXTERNAL_ONNX_DATA_NAME
_LOGGER.warning(
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
"The ONNX model is too large to be saved as a single protobuf. "
f"Saving with external data: {external_data_file}"
"Saving with external data, with file chunks of maximum size "
f"{DUMP_EXTERNAL_DATA_THRESHOLD / 1e9} GB"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wrong threshold - also let's downgrade from warning - probably a bit to scary for an expected, default flow

dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
)
_check_for_old_external_data(
model_path=model_path, external_data_file=external_data_file
Expand All @@ -116,6 +132,7 @@ def save_onnx(
all_tensors_to_one_file=True,
location=external_data_file,
)
split_external_data(model_path, max_file_size=max_external_file_size)
return True

onnx.save(model, model_path)
Expand Down
Loading