From 00cb35a0aa9d52b539e4350953ba72787f0144d7 Mon Sep 17 00:00:00 2001 From: Callum Tilbury Date: Tue, 12 Mar 2024 15:02:23 +0200 Subject: [PATCH 1/3] fix: improve the compression api of vaults --- flashbax/vault/vault.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/flashbax/vault/vault.py b/flashbax/vault/vault.py index f366dd5..f2d75fe 100644 --- a/flashbax/vault/vault.py +++ b/flashbax/vault/vault.py @@ -38,7 +38,7 @@ "id": "gzip", "level": 5, } -VERSION = 1.1 +VERSION = 1.2 def _path_to_ds_name(path: Tuple[Union[DictKey, GetAttrKey], ...]) -> str: @@ -87,8 +87,8 @@ def __init__( # noqa: CCR001 vault_uid (Optional[str], optional): Unique identifier for this vault. Defaults to None, which will use the current timestamp. compression (Optional[dict], optional): - Compression settings for the vault. Defaults to None, which will use - the default settings. + Compression settings used when when creating the vault. + Defaults to None, which will use the default compression. metadata (Optional[dict], optional): Any additional metadata to save. Defaults to None. @@ -145,7 +145,6 @@ def __init__( # noqa: CCR001 "version": VERSION, "structure_shape": serialised_experience_structure_shape, "structure_dtype": serialised_experience_structure_dtype, - "compression": compression or COMPRESSION_DEFAULT, **(metadata_json_ready or {}), # Allow user to save extra metadata } # Dump metadata to file @@ -184,12 +183,8 @@ def __init__( # noqa: CCR001 target=experience_structure, ) - # Load compression settings from metadata - self._compression = ( - self._metadata["compression"] - if "compression" in self._metadata - else COMPRESSION_DEFAULT - ) + # Keep the compression settings, to be used in init_leaf, in case we're creating the vault + self._compression = compression # Each leaf of the fbx_state.experience maps to a data store, so we tree map over the # tree structure to create each of the data stores. @@ -235,11 +230,7 @@ def _get_base_spec(self, name: str) -> dict: "base": f"{DRIVER}{self._base_path}", "path": name, }, - "metadata": { - "compressor": { - **self._compression, - } - }, + "metadata": {}, } def _init_leaf( @@ -260,14 +251,17 @@ def _init_leaf( leaf_shape, leaf_dtype = None, None if create_ds: - # Only specify dtype and shape if we are creating a vault - # (i.e. don't impose dtype and shape if we are _loading_ a vault) + # Only specify dtype, shape, and compression if we are creating a vault + # (i.e. don't impose these fields if we are _loading_ a vault) leaf_shape = ( shape[0], # Batch dim TIME_AXIS_MAX_LENGTH, # Time dim, which we extend *shape[2:], # Experience dim(s) ) leaf_dtype = dtype + spec["metadata"]["compressor"] = ( + COMPRESSION_DEFAULT if self._compression is None else self._compression + ) leaf_ds = ts.open( spec, From f9186589290b88da1a078b813d748dd502c3c183 Mon Sep 17 00:00:00 2001 From: Callum Tilbury Date: Tue, 12 Mar 2024 15:05:13 +0200 Subject: [PATCH 2/3] chore: bump fbx to 0.1.2 after vault update --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cfd35fe..a2ef586 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ authors = [ {name="InstaDeep" , email = "hello@instadeep.com"}, ] requires-python = ">=3.9" -version = "0.1.1" +version = "0.1.2" classifiers=[ "Development Status :: 2 - Pre-Alpha", "Environment :: Console", From 88894f814613561ebf8bc7161e994a41037dbe93 Mon Sep 17 00:00:00 2001 From: Callum Tilbury Date: Tue, 12 Mar 2024 15:54:08 +0200 Subject: [PATCH 3/3] chore: improved clarity, after chats with Edan --- flashbax/vault/vault.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/flashbax/vault/vault.py b/flashbax/vault/vault.py index f2d75fe..921a05b 100644 --- a/flashbax/vault/vault.py +++ b/flashbax/vault/vault.py @@ -115,6 +115,11 @@ def __init__( # noqa: CCR001 print(f"Loading vault found at {self._base_path}") + if compression is not None: + print( + "Requested compression settings will be ignored as the vault already exists." + ) + elif experience_structure is not None: # Create the necessary dirs for the vault os.makedirs(self._base_path) @@ -230,7 +235,6 @@ def _get_base_spec(self, name: str) -> dict: "base": f"{DRIVER}{self._base_path}", "path": name, }, - "metadata": {}, } def _init_leaf( @@ -259,9 +263,11 @@ def _init_leaf( *shape[2:], # Experience dim(s) ) leaf_dtype = dtype - spec["metadata"]["compressor"] = ( - COMPRESSION_DEFAULT if self._compression is None else self._compression - ) + spec["metadata"] = { + "compressor": COMPRESSION_DEFAULT + if self._compression is None + else self._compression + } leaf_ds = ts.open( spec,