Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More control over external data size #412

Merged
merged 5 commits into from
Jan 24, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/sparsezoo/utils/onnx/external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@

EXTERNAL_ONNX_DATA_NAME = "model.data"

# The maximum size of a single data file in megabytes
# set to 400MB. This is roughly the size of
# 25 million parameters (assuming fp16).
MAX_FILE_SIZE_EXTERNAL_DATA = 4e8


def onnx_includes_external_data(model: ModelProto) -> bool:
"""
Expand Down Expand Up @@ -87,7 +92,10 @@ def save_onnx(
:return True if the model was saved with external data, False otherwise.
"""
if external_data_file is not None:
_LOGGER.debug(f"Saving with external data: {external_data_file}")
_LOGGER.debug(
f"Saving with external data, with file chunks of maximum size "
f"{MAX_FILE_SIZE_EXTERNAL_DATA / 1e6} MB"
)
_check_for_old_external_data(
model_path=model_path, external_data_file=external_data_file
)
Expand All @@ -98,13 +106,15 @@ def save_onnx(
all_tensors_to_one_file=True,
location=external_data_file,
)
split_external_data(model_path, MAX_FILE_SIZE_EXTERNAL_DATA)
return True

if model.ByteSize() > onnx.checker.MAXIMUM_PROTOBUF:
if model.ByteSize() > MAX_FILE_SIZE_EXTERNAL_DATA:
external_data_file = external_data_file or EXTERNAL_ONNX_DATA_NAME
_LOGGER.warning(
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
"The ONNX model is too large to be saved as a single protobuf. "
f"Saving with external data: {external_data_file}"
"Saving with external data, with file chunks of maximum size "
f"{MAX_FILE_SIZE_EXTERNAL_DATA / 1e6} MB"
)
_check_for_old_external_data(
model_path=model_path, external_data_file=external_data_file
Expand All @@ -116,6 +126,7 @@ def save_onnx(
all_tensors_to_one_file=True,
location=external_data_file,
)
split_external_data(model_path, MAX_FILE_SIZE_EXTERNAL_DATA)
return True

onnx.save(model, model_path)
Expand Down
Loading