Skip to content

Commit

Permalink
Reduce Collisions Probability to Near 0 (#2264)
Browse files Browse the repository at this point in the history
* init

* fixed hashed

* more test fix

* test fix
  • Loading branch information
tssweeney authored Jun 9, 2021
1 parent aa645f2 commit 778555b
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 28 deletions.
4 changes: 2 additions & 2 deletions tests/utils/mock_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,7 @@ def storage():
"digest": "3aaaaaaaaaaaaaaaaaaaaa==",
"size": 81299,
},
"media/tables/5aac4cea.table.json": {
"media/tables/5aac4cea496fd061.table.json": {
"digest": "3aaaaaaaaaaaaaaaaaaaaa==",
"size": 81299,
},
Expand Down Expand Up @@ -887,7 +887,7 @@ def storage():
}
},
}
elif _id == "b89758a7e7503bdb021e0534fe444d9a":
elif _id == "d68ee9316e84e3e190d8e94e354962a6":
return {
"version": 1,
"storagePolicy": "wandb-storage-policy-v1",
Expand Down
38 changes: 19 additions & 19 deletions tests/wandb_artifacts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,19 +551,19 @@ def test_add_obj_wbimage(runner):
artifact.add(wb_image, "my-image")

manifest = artifact.manifest.to_manifest_json()
assert artifact.digest == "88c32e731a1ddb3117249140b7bf0d27"
assert artifact.digest == "1ddf487dc76a2e1091046286da63d184"
assert manifest["contents"] == {
"media/cls.classes.json": {
"digest": "eG00DqdCcCBqphilriLNfw==",
"size": 64,
},
"media/images/641e917f/2x2.png": {
"media/images/641e917f31888a48/2x2.png": {
"digest": "L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"my-image.image-file.json": {
"digest": "A8NTF/lXHjyjy9NVTnH8vw==",
"size": 293,
"digest": "2RmUbJG/CceV8DBhEJAFiQ==",
"size": 301,
},
}

Expand All @@ -577,19 +577,19 @@ def test_add_obj_using_brackets(runner):
artifact["my-image"] = wb_image

manifest = artifact.manifest.to_manifest_json()
assert artifact.digest == "88c32e731a1ddb3117249140b7bf0d27"
assert artifact.digest == "1ddf487dc76a2e1091046286da63d184"
assert manifest["contents"] == {
"media/cls.classes.json": {
"digest": "eG00DqdCcCBqphilriLNfw==",
"size": 64,
},
"media/images/641e917f/2x2.png": {
"media/images/641e917f31888a48/2x2.png": {
"digest": "L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"my-image.image-file.json": {
"digest": "A8NTF/lXHjyjy9NVTnH8vw==",
"size": 293,
"digest": "2RmUbJG/CceV8DBhEJAFiQ==",
"size": 301,
},
}

Expand Down Expand Up @@ -689,13 +689,13 @@ def test_add_obj_wbimage_classes_obj(runner):
"digest": "eG00DqdCcCBqphilriLNfw==",
"size": 64,
},
"media/images/641e917f/2x2.png": {
"media/images/641e917f31888a48/2x2.png": {
"digest": "L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"my-image.image-file.json": {
"digest": "A8NTF/lXHjyjy9NVTnH8vw==",
"size": 293,
"digest": "2RmUbJG/CceV8DBhEJAFiQ==",
"size": 301,
},
}

Expand All @@ -716,13 +716,13 @@ def test_add_obj_wbimage_classes_obj_already_added(runner):
"digest": "eG00DqdCcCBqphilriLNfw==",
"size": 64,
},
"media/images/641e917f/2x2.png": {
"media/images/641e917f31888a48/2x2.png": {
"digest": "L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"my-image.image-file.json": {
"digest": "3lTCGIlHAbNJlwIp2ALaTQ==",
"size": 294,
"digest": "2bVMBRXNW0RW4jx0Ov34nw==",
"size": 302,
},
}

Expand Down Expand Up @@ -768,11 +768,11 @@ def test_add_obj_wbtable_images(runner):
"digest": "eG00DqdCcCBqphilriLNfw==",
"size": 64,
},
"media/images/641e917f/2x2.png": {
"media/images/641e917f31888a48/2x2.png": {
"digest": u"L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"my-table.table.json": {"digest": "dQsR9hmEpOiRckgfFbiO1g==", "size": 1011},
"my-table.table.json": {"digest": "PAhKp1yh9i/XOgp4UF5Oug==", "size": 1027},
}


Expand All @@ -796,15 +796,15 @@ def test_add_obj_wbtable_images_duplicate_name(runner):

manifest = artifact.manifest.to_manifest_json()
assert manifest["contents"] == {
"media/images/641e917f/img.png": {
"media/images/641e917f31888a48/img.png": {
"digest": "L1pBeGPxG+6XVRQk4WuvdQ==",
"size": 71,
},
"media/images/cf37c38f/img.png": {
"media/images/cf37c38fd1dca3aa/img.png": {
"digest": "pQVvBBgcuG+jTN0Xo97eZQ==",
"size": 8837,
},
"my-table.table.json": {"digest": "Ts96ecO6RcC9J0aOABjflw==", "size": 797},
"my-table.table.json": {"digest": "1LySke8/dwO4qZVUEyuqdw==", "size": 813},
}


Expand Down
2 changes: 1 addition & 1 deletion wandb/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def _ensure_table_in_artifact(self, table, artifact, table_ndx):
# Give the new object a unique, yet deterministic name
name = binascii.hexlify(
base64.standard_b64decode(table.entry.digest)
).decode("ascii")[:8]
).decode("ascii")[:16]
entry = artifact.add_reference(
table.ref_url(), "{}.{}.json".format(name, table.name.split(".")[-2])
)[0]
Expand Down
4 changes: 2 additions & 2 deletions wandb/sdk/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def bind_to_run(
extension = self._extension

if id_ is None:
id_ = self._sha256[:8]
id_ = self._sha256[:16]

file_path = _wb_filename(key, step, id_, extension)
media_path = os.path.join(self.get_media_subdir(), file_path)
Expand Down Expand Up @@ -510,7 +510,7 @@ def to_json(self, run: Union["LocalRun", "LocalArtifact"]) -> dict:
# we end up with a unique path for each.
name = os.path.join(
self.get_media_subdir(),
self._sha256[:8],
self._sha256[:16],
os.path.basename(self._path),
)

Expand Down
2 changes: 1 addition & 1 deletion wandb/sdk/wandb_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def add_file(
if is_tmp:
file_path, file_name = os.path.split(name)
file_name_parts = file_name.split(".")
file_name_parts[0] = b64_string_to_hex(digest)[:8]
file_name_parts[0] = b64_string_to_hex(digest)[:16]
name = os.path.join(file_path, ".".join(file_name_parts))

return self._add_local_file(name, local_path, digest=digest)
Expand Down
4 changes: 2 additions & 2 deletions wandb/sdk_py27/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def bind_to_run(
extension = self._extension

if id_ is None:
id_ = self._sha256[:8]
id_ = self._sha256[:16]

file_path = _wb_filename(key, step, id_, extension)
media_path = os.path.join(self.get_media_subdir(), file_path)
Expand Down Expand Up @@ -510,7 +510,7 @@ def to_json(self, run):
# we end up with a unique path for each.
name = os.path.join(
self.get_media_subdir(),
self._sha256[:8],
self._sha256[:16],
os.path.basename(self._path),
)

Expand Down
2 changes: 1 addition & 1 deletion wandb/sdk_py27/wandb_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def add_file(
if is_tmp:
file_path, file_name = os.path.split(name)
file_name_parts = file_name.split(".")
file_name_parts[0] = b64_string_to_hex(digest)[:8]
file_name_parts[0] = b64_string_to_hex(digest)[:16]
name = os.path.join(file_path, ".".join(file_name_parts))

return self._add_local_file(name, local_path, digest=digest)
Expand Down

0 comments on commit 778555b

Please sign in to comment.