Skip to content

Commit

Permalink
Merge pull request #36 from openweathermap/fix/uuid-and-integrity
Browse files Browse the repository at this point in the history
Fix UUID and integrity
  • Loading branch information
SerGeRybakov authored Mar 5, 2024
2 parents 1591bfb + 4e551bb commit a3df8fc
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 108 deletions.
2 changes: 1 addition & 1 deletion deker/ABC/base_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def __init__(
self.__is_deleted = False
self._validate(id_, primary_attributes, custom_attributes)
self.__collection: "Collection" = collection
self.__id: str = id_ if id_ else get_id(self)
self.__id: str = id_ if id_ else get_id()
self.__adapter = adapter
self.__array_adapter = array_adapter

Expand Down
77 changes: 38 additions & 39 deletions deker/integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@
DekerBaseApplicationError,
DekerCollectionNotExistsError,
DekerIntegrityError,
DekerMetaDataError,
)
from deker.managers import ArrayManager, VArrayManager
from deker.tools import get_main_path, get_symlink_path
from deker.types.private.enums import LocksExtensions


if TYPE_CHECKING:
from deker.client import Client

Expand Down Expand Up @@ -170,38 +171,45 @@ def check_arrays_locks(self, collection: Collection) -> None:
if self.stop_on_error and self.errors:
raise DekerIntegrityError(self._parse_errors())

def _check_varrays_or_arrays(
self, collection: Collection, data_manager: Union[ArrayManager, Optional[VArrayManager]]
) -> None:
"""Check if Arrays or VArrays in Collection are initializing.
:param collection: Collection to be checked
:param data_manager: DataManager to get arrays or varrays from collection
"""
try:
for array in data_manager:
try:
if self.next_checker:
self.next_checker.check(array, collection)
except DekerBaseApplicationError as e:
if self.stop_on_error:
raise DekerIntegrityError(str(e))
self.errors[
f"Collection {collection.name} arrays integrity errors:"
].append(str(e))
except DekerMetaDataError as e:
if self.stop_on_error:
raise e
self.errors[
f"Collection {collection.name} (V)Arrays initialization errors:"
].append(str(e))

def check(self, collection: Collection) -> None:
"""Check if Arrays or VArray in Collection are valid.
"""Check if Arrays or VArrays and their locks in Collection are valid.
:param collection: Collection to be checked
"""
if self.level < self.CHECKER_LEVEL:
return
self.check_arrays_locks(collection)

for array in collection.arrays:
try:
if self.next_checker:
self.next_checker.check(array, collection)
except DekerBaseApplicationError as e:
if not self.stop_on_error:
self.errors[f"Collection {collection.name} arrays integrity errors:"].append(
str(e)
)
else:
raise DekerIntegrityError(str(e))
self._check_varrays_or_arrays(collection, collection.arrays)
if collection.varray_schema:
for varray in collection.varrays:
try:
if self.next_checker:
self.next_checker.check(varray, collection)
except DekerBaseApplicationError as e:
if not self.stop_on_error:
self.errors[
f"Collection {collection.name} varrays integrity errors:"
].append(str(e))
else:
raise DekerIntegrityError(str(e))
self._check_varrays_or_arrays(collection, collection.varrays)
return


class CollectionsChecker(BaseChecker):
Expand Down Expand Up @@ -247,23 +255,14 @@ def check(self, collection_name: Optional[str] = None) -> None:
:param collection_name: optional collection to be checked
"""
if collection_name:
# skipping collections checker
self.next_checker: Optional[BaseChecker] = (
self.next_checker.next_checker if self.next_checker else None
)
try:
collection: Collection = self.client.get_collection(collection_name)
if not collection:
raise DekerCollectionNotExistsError(
f"Collection {collection_name} does not exist at this storage"
)
collection: Collection = self.client.get_collection(collection_name)
if not collection:
raise DekerCollectionNotExistsError(
f"Collection {collection_name} does not exist at this storage"
)
if self.level > self.CHECKER_LEVEL:
if self.next_checker:
self.next_checker.check(collection)
except DekerCollectionNotExistsError:
raise
except Exception as e:
self.errors["Collections initialization errors:"].append(str(e))
return
collections = self.check_collections()
if self.level > self.CHECKER_LEVEL:
collections_pbar = tqdm.tqdm(collections)
Expand Down
52 changes: 4 additions & 48 deletions deker/tools/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,14 @@

import uuid

from functools import singledispatch
from typing import Any, Dict, List, Tuple, Union
from typing import Dict, List, Tuple, Union

import numpy as np

from deker_tools.data import convert_size_to_human
from deker_tools.time import get_utc
from psutil import swap_memory, virtual_memory

from deker.errors import DekerMemoryError, DekerValidationError
from deker.types.private.enums import ArrayType


def calculate_total_cells_in_array(seq: Union[Tuple[int, ...], List[int]]) -> int:
Expand Down Expand Up @@ -110,47 +107,6 @@ def check_memory(shape: tuple, dtype: type, mem_limit_from_settings: int) -> Non
)


def generate_uid(array_type: ArrayType) -> str:
"""Generate uuid5 for given array_type.
:param array_type: Either array or varray
"""
if not isinstance(array_type, ArrayType):
raise TypeError("Invalid argument type. Array type is required")

namespace = uuid.NAMESPACE_X500 if array_type == ArrayType.array else uuid.NAMESPACE_OID
return str(uuid.uuid5(namespace, array_type.value + get_utc().isoformat()))


def get_id(array: Any) -> str:
"""Generate unique id by object type and datetime.
:param array: any object
"""
from deker.arrays import Array, VArray

@singledispatch
def generate_id(arr: Any) -> str:
"""Generate unique id by object type and datetime.
:param arr: any object
"""
raise TypeError(f"Invalid object type: {type(arr)}")

@generate_id.register(Array)
def array_id(arr: Array) -> str: # noqa[ARG001]
"""Generate id for Array.
:param arr: Array type
"""
return generate_uid(ArrayType.array)

@generate_id.register(VArray)
def varray_id(arr: VArray) -> str: # noqa[ARG001]
"""Generate id for VArray.
:param arr: VArray type
"""
return generate_uid(ArrayType.varray)

return generate_id(array)
def get_id() -> str:
"""Generate unique id with uuid4."""
return str(uuid.uuid4())
13 changes: 4 additions & 9 deletions tests/test_cases/test_client/test_client_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,16 +379,11 @@ def test_client_check_integrity_collection(
f.seek(0)
json.dump(data, f, indent=4)
f.truncate()

client.check_integrity(2, stop_on_error=False, collection=collection_1.name)
try:
client.check_integrity(2, stop_on_error=False, collection=collection_1.name)
except Exception as e:
assert str(e) == f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'"
errors = capsys.readouterr().out
assert all(
s in errors
for s in (
"Integrity check is running...\n",
f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'\n\n",
)
)
collection_1.delete()
collection_2.delete()
for root, _, files in os.walk(os.path.curdir):
Expand Down
19 changes: 15 additions & 4 deletions tests/test_cases/test_integrity/test_integrity_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,14 @@ def test_check_collection_does_not_exist(
with pytest.raises(DekerCollectionNotExistsError):
integrity_checker.check("collection_does_not_exist")

@pytest.mark.parametrize("check_params", [None, "test_integrity_locks"])
def test_check_locks(
self, client: Client, root_path: Path, array_schema: ArraySchema, ctx: CTX
self,
client: Client,
root_path: Path,
array_schema: ArraySchema,
ctx: CTX,
check_params: str,
):
"""Tests if function returns error if lock is not found."""

Expand All @@ -89,7 +95,7 @@ def test_check_locks(
try:
filename = collection.path.parent / (collection.name + ".lock")
os.remove(filename)
errors = integrity_checker.check()
errors = integrity_checker.check(check_params)
assert (
errors
== f"Collections locks errors:\n\t- BaseLock for {collection.name} not found\n"
Expand Down Expand Up @@ -127,6 +133,7 @@ def test_check_extra_locks(
os.remove(filename)
collection.delete()

@pytest.mark.parametrize("check_params", [None, "test_return"])
def test_check_return(
self,
array_schema_with_attributes: ArraySchema,
Expand All @@ -135,6 +142,7 @@ def test_check_return(
ctx: CTX,
uri: Uri,
storage_adapter: Type[BaseStorageAdapter],
check_params: str,
):
"""Tests if function returns errors."""
integrity_checker = IntegrityChecker(
Expand Down Expand Up @@ -169,14 +177,15 @@ def test_check_return(
Path.unlink(symlink_path / files[0])

try:
errors = integrity_checker.check()
errors = integrity_checker.check(check_params)
error_1 = f"Symlink {symlink_path} not found\n"
error_2 = f"Array {array_1.id} data is corrupted: Index (9) out of range for (0-1)\n"

assert error_2 in errors and error_1 in errors
finally:
collection.delete()

@pytest.mark.parametrize("check_params", [None, "test_check_array_raises_on_init"])
def test_check_array_raises_on_init(
self,
array_schema_with_attributes: ArraySchema,
Expand All @@ -185,6 +194,7 @@ def test_check_array_raises_on_init(
ctx: CTX,
uri: Uri,
storage_adapter: Type[BaseStorageAdapter],
check_params: str,
):
"""Tests if function raises exception if array file is incorrect."""
collection = client.create_collection(
Expand Down Expand Up @@ -220,8 +230,9 @@ def test_check_array_raises_on_init(
f.flush()
try:
with pytest.raises(DekerMetaDataError):
assert integrity_checker.check()
integrity_checker.check(check_params)
finally:
collection.delete()
array.delete()


Expand Down
7 changes: 0 additions & 7 deletions tests/test_cases/test_tools/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from deker.collection import Collection
from deker.errors import DekerInstanceNotExistsError, DekerMemoryError, DekerValidationError
from deker.tools import check_memory, convert_human_memory_to_bytes
from deker.tools.array import generate_uid
from deker.tools.time import convert_datetime_attrs_to_iso, convert_iso_attrs_to_datetime


Expand Down Expand Up @@ -221,12 +220,6 @@ def test_convert_isoformat_attrs_raises(attrs):
assert convert_iso_attrs_to_datetime(attrs)


@pytest.mark.parametrize("array_type_arg", (list(), set(), tuple(), dict(), 1, "2", 3.4))
def test_generate_id_raises(array_type_arg):
with pytest.raises(TypeError):
generate_uid(array_type_arg)


@pytest.mark.parametrize(
"params,result,error",
(
Expand Down

0 comments on commit a3df8fc

Please sign in to comment.