Skip to content

Commit

Permalink
Add the concept of a container (unique) ID
Browse files Browse the repository at this point in the history
This fixes #11
I also fixed a bug I discovered, where config values that
were cached but the cache was not cleared when re-initialising the
container.
To reduce the risk of such a problem, I am now only caching the *whole*
configuration dictionary (and not each value), and I don't need to
implement caching anymore for each single config value; and I just
need to clear one variable `self._config` rather than each newly-defined
configuration.

In addition, I add a test to check that re-initialising the container
creates a new container ID (`container.container_id`).
  • Loading branch information
giovannipizzi committed Aug 27, 2020
1 parent 2d87757 commit c69da7a
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 19 deletions.
59 changes: 40 additions & 19 deletions disk_objectstore/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import os
import shutil
import uuid
import warnings
import zlib

Expand Down Expand Up @@ -87,11 +88,11 @@ def __init__(self, folder):
"""
self._folder = os.path.realpath(folder)
self._session = None # Will be populated by the _get_session function

# These act as caches and will be populated by the corresponding properties
self._loose_prefix_len = None
self._pack_size_target = None
# IMPORANT! IF YOU ADD MORE, REMEMBER TO CLEAR THEM IN `init_container()`!
self._current_pack_id = None
self._hash_type = None
self._config = None

def get_folder(self):
"""Return the path to the folder that will host the object-store container."""
Expand Down Expand Up @@ -315,12 +316,24 @@ def init_container( # pylint: disable=bad-continuation
if os.path.exists(self._folder):
shutil.rmtree(self._folder)

# Reinitialize the configuration cache, since this will change
# (at least the container_id, possibly the rest), and the other caches
self._config = None
self._current_pack_id = None

if self.is_initialised:
raise FileExistsError(
'The container already exists, so you cannot initialise it - '
'use the clear option if you want to overwrite with a clean one'
)

# If we are here, either the folder is empty, or just cleared.
# It could also be that one of the folders does not exist. This is considered an invalid situation.
# But this will be catched later, where I check that the folder is empty before overwriting the
# configuration file.
# In this case, I have to generate a new UUID to be used as the container_id
container_id = uuid.uuid4().hex

try:
os.makedirs(self._folder)
except FileExistsError:
Expand All @@ -339,7 +352,8 @@ def init_container( # pylint: disable=bad-continuation
'container_version': 1, # For future compatibility, this is the version of the format
'loose_prefix_len': loose_prefix_len,
'pack_size_target': pack_size_target,
'hash_type': hash_type
'hash_type': hash_type,
'container_id': container_id
},
fhandle
)
Expand All @@ -358,39 +372,46 @@ def _get_repository_config(self):
"""Return the repository config."""
if not self.is_initialised:
raise NotInitialised('The container is not initialised yet - use .init_container() first')
with open(self._get_config_file()) as fhandle:
config = json.load(fhandle)
return config
if self._config is None:
with open(self._get_config_file()) as fhandle:
self._config = json.load(fhandle)
return self._config

@property
def loose_prefix_len(self):
"""Return the length of the prefix of loose objects, when sharding.
This is read from the repository configuration.
This is read from the (cached) repository configuration.
"""
if self._loose_prefix_len is None:
self._loose_prefix_len = self._get_repository_config()['loose_prefix_len']
return self._loose_prefix_len
return self._get_repository_config()['loose_prefix_len']

@property
def pack_size_target(self):
"""Return the length of the pack name, when sharding.
This is read from the repository configuration.
This is read from the (cached) repository configuration.
"""
if self._pack_size_target is None:
self._pack_size_target = self._get_repository_config()['pack_size_target']
return self._pack_size_target
return self._get_repository_config()['pack_size_target']

@property
def hash_type(self):
"""Return the length of the prefix of loose objects, when sharding.
This is read from the repository configuration.
This is read from the (cached) repository configuration.
"""
return self._get_repository_config()['hash_type']

@property
def container_id(self):
"""Return the repository unique ID.
This is read from the (cached) repository configuration, and is a UUID uniquely identifying
this specific container. This is generated at the container initialization (call `init_container`) and will
never change for this container.
Clones of the container should have a different ID even if they have the same content.
"""
if self._hash_type is None:
self._hash_type = self._get_repository_config()['hash_type']
return self._hash_type
return self._get_repository_config()['container_id']

def get_object_content(self, hashkey):
"""Get the content of an object with a given hash key.
Expand Down
11 changes: 11 additions & 0 deletions tests/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -3033,3 +3033,14 @@ def test_pack_all_loose_many(temp_container):
temp_container.pack_all_loose()
retrieved = temp_container.get_objects_content(expected.keys())
assert retrieved == expected


def test_container_id(temp_container):
"""Check the creation of unique container IDs."""
old_container_id = temp_container.container_id
assert old_container_id is not None
assert isinstance(old_container_id, str)

# Re-initialize: it should get a new container_id
temp_container.init_container(clear=True)
assert old_container_id != temp_container.container_id

0 comments on commit c69da7a

Please sign in to comment.