diff --git a/alluxio/alluxio_file_system.py b/alluxio/alluxio_file_system.py index 266f1a9..3701c21 100644 --- a/alluxio/alluxio_file_system.py +++ b/alluxio/alluxio_file_system.py @@ -14,6 +14,8 @@ import requests from requests.adapters import HTTPAdapter +from .const import ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE +from .const import ALLUXIO_HASH_NODE_PER_WORKER_KEY from .const import ALLUXIO_PAGE_SIZE_DEFAULT_VALUE from .const import ALLUXIO_PAGE_SIZE_KEY from .const import ALLUXIO_SUCCESS_IDENTIFIER @@ -141,20 +143,32 @@ def __init__( "Supply either 'etcd_hosts' or 'worker_hosts', not both" ) self.logger = logger or logging.getLogger("AlluxioFileSystem") + if etcd_hosts and not worker_hosts: + self.logger.warning( + "Does not supply 'etcd_hosts'. An etcd cluster is required for dynamic cluster changes." + ) self.session = self._create_session(concurrency) # parse options page_size = ALLUXIO_PAGE_SIZE_DEFAULT_VALUE + hash_node_per_worker = int(ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE) if options: if ALLUXIO_PAGE_SIZE_KEY in options: page_size = options[ALLUXIO_PAGE_SIZE_KEY] self.logger.debug(f"Page size is set to {page_size}") + if ALLUXIO_HASH_NODE_PER_WORKER_KEY in options: + hash_node_per_worker = int( + options[ALLUXIO_HASH_NODE_PER_WORKER_KEY] + ) + self.logger.debug( + f"Hash node per worker is set to {hash_node_per_worker}" + ) + self.page_size = humanfriendly.parse_size(page_size, binary=True) self.hash_provider = ConsistentHashProvider( etcd_hosts=etcd_hosts, etcd_port=int(etcd_port), - worker_hosts=worker_hosts, - worker_http_port=http_port, + hash_node_per_worker=hash_node_per_worker, options=options, logger=self.logger, etcd_refresh_workers_interval=etcd_refresh_workers_interval, diff --git a/alluxio/const.py b/alluxio/const.py index 619fa61..5de4ea9 100644 --- a/alluxio/const.py +++ b/alluxio/const.py @@ -4,6 +4,10 @@ ALLUXIO_ETCD_PASSWORD_KEY = "alluxio.etcd.password" ALLUXIO_PAGE_SIZE_KEY = "alluxio.worker.page.store.page.size" ALLUXIO_PAGE_SIZE_DEFAULT_VALUE = "1MB" +ALLUXIO_HASH_NODE_PER_WORKER_KEY = ( + "alluxio.user.consistent.hash.virtual.node.count.per.worker" +) +ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE = "5" ALLUXIO_SUCCESS_IDENTIFIER = "success" LIST_URL_FORMAT = "http://{worker_host}:{http_port}/v1/files" FULL_PAGE_URL_FORMAT = ( diff --git a/alluxio/worker_ring.py b/alluxio/worker_ring.py index 3f3deac..7f6ede5 100644 --- a/alluxio/worker_ring.py +++ b/alluxio/worker_ring.py @@ -1,9 +1,9 @@ import json import logging -import math import random import threading import time +import uuid from dataclasses import dataclass from typing import List from typing import Set @@ -27,6 +27,7 @@ DEFAULT_WEB_PORT = 30000 DEFAULT_DOMAIN_SOCKET_PATH = "" DEFAULT_HTTP_SERVER_PORT = 28080 +DEFAULT_WORKER_IDENTIFIER_VERSION = 1 @dataclass(frozen=True) @@ -41,38 +42,66 @@ class WorkerNetAddress: domain_socket_path: str = DEFAULT_DOMAIN_SOCKET_PATH http_server_port: int = DEFAULT_HTTP_SERVER_PORT + +@dataclass(frozen=True) +class WorkerIdentity: + version: int + identifier: bytes + + +class NULL_NAMESPACE: + bytes = b"" + + +@dataclass(frozen=True) +class WorkerEntity: + worker_identity: WorkerIdentity + worker_net_address: WorkerNetAddress + @staticmethod def from_worker_info(worker_info): try: worker_info_string = worker_info.decode("utf-8") worker_info_json = json.loads(worker_info_string) - worker_net_address = worker_info_json.get("WorkerNetAddress", {}) + identity_info = worker_info_json.get("Identity", {}) + worker_identity = WorkerIdentity( + version=int(identity_info.get("version")), + identifier=bytes.fromhex(identity_info.get("identifier")), + ) - return WorkerNetAddress( - host=worker_net_address.get("Host", DEFAULT_HOST), - container_host=worker_net_address.get( + worker_net_address_info = worker_info_json.get( + "WorkerNetAddress", {} + ) + worker_net_address = WorkerNetAddress( + host=worker_net_address_info.get("Host", DEFAULT_HOST), + container_host=worker_net_address_info.get( "ContainerHost", DEFAULT_CONTAINER_HOST ), - rpc_port=worker_net_address.get("RpcPort", DEFAULT_RPC_PORT), - data_port=worker_net_address.get( + rpc_port=worker_net_address_info.get( + "RpcPort", DEFAULT_RPC_PORT + ), + data_port=worker_net_address_info.get( "DataPort", DEFAULT_DATA_PORT ), - secure_rpc_port=worker_net_address.get( + secure_rpc_port=worker_net_address_info.get( "SecureRpcPort", DEFAULT_SECURE_RPC_PORT ), - netty_data_port=worker_net_address.get( + netty_data_port=worker_net_address_info.get( "NettyDataPort", DEFAULT_NETTY_DATA_PORT ), - web_port=worker_net_address.get("WebPort", DEFAULT_WEB_PORT), - domain_socket_path=worker_net_address.get( + web_port=worker_net_address_info.get( + "WebPort", DEFAULT_WEB_PORT + ), + domain_socket_path=worker_net_address_info.get( "DomainSocketPath", DEFAULT_DOMAIN_SOCKET_PATH, ), - http_server_port=worker_net_address.get( + http_server_port=worker_net_address_info.get( "HttpServerPort", DEFAULT_HTTP_SERVER_PORT, ), ) + return WorkerEntity(worker_identity, worker_net_address) except json.JSONDecodeError as e: raise ValueError( f"Provided worker_info is not a valid JSON string {e}" @@ -87,93 +116,89 @@ def from_worker_info(worker_info): ) from e @staticmethod - def from_worker_hosts( - worker_hosts, worker_http_port=DEFAULT_HTTP_SERVER_PORT - ): - worker_addresses = set() - for host in worker_hosts.split(","): - worker_address = WorkerNetAddress( - host=host, http_server_port=worker_http_port - ) - worker_addresses.add(worker_address) - return worker_addresses - - def dump_main_info(self): - return ( - "WorkerNetAddress{{host={}, containerHost={}, rpcPort={}, " - "dataPort={}, webPort={}, domainSocketPath={}}}" - ).format( - self.host, - self.container_host, - self.rpc_port, - self.data_port, - self.web_port, - self.domain_socket_path, + def from_host_and_port(worker_host, worker_http_port): + worker_uuid = uuid.uuid3(NULL_NAMESPACE, worker_host) + uuid_bytes = worker_uuid.bytes + + worker_identity = WorkerIdentity( + DEFAULT_WORKER_IDENTIFIER_VERSION, uuid_bytes ) + worker_net_address = WorkerNetAddress( + host=worker_host, + container_host=DEFAULT_CONTAINER_HOST, + rpc_port=DEFAULT_RPC_PORT, + data_port=DEFAULT_DATA_PORT, + secure_rpc_port=DEFAULT_SECURE_RPC_PORT, + netty_data_port=DEFAULT_NETTY_DATA_PORT, + web_port=DEFAULT_WEB_PORT, + domain_socket_path=DEFAULT_DOMAIN_SOCKET_PATH, + http_server_port=worker_http_port, + ) + return WorkerEntity(worker_identity, worker_net_address) class EtcdClient: def __init__(self, host="localhost", port=2379, options=None): - self.host = host - self.port = port + self._host = host + self._port = port # Parse options - self.etcd_username = None - self.etcd_password = None - self.prefix = ETCD_PREFIX_FORMAT.format( + self._etcd_username = None + self._etcd_password = None + self._prefix = ETCD_PREFIX_FORMAT.format( cluster_name=ALLUXIO_CLUSTER_NAME_DEFAULT_VALUE ) if options: if ALLUXIO_ETCD_USERNAME_KEY in options: - self.etcd_username = options[ALLUXIO_ETCD_USERNAME_KEY] + self._etcd_username = options[ALLUXIO_ETCD_USERNAME_KEY] if ALLUXIO_ETCD_PASSWORD_KEY in options: - self.etcd_password = options[ALLUXIO_ETCD_PASSWORD_KEY] + self._etcd_password = options[ALLUXIO_ETCD_PASSWORD_KEY] if ALLUXIO_CLUSTER_NAME_KEY in options: - self.prefix = ETCD_PREFIX_FORMAT.format( + self._prefix = ETCD_PREFIX_FORMAT.format( cluster_name=options[ALLUXIO_CLUSTER_NAME_KEY] ) - if (self.etcd_username is None) != (self.etcd_password is None): + if (self._etcd_username is None) != (self._etcd_password is None): raise ValueError( "Both ETCD username and password must be set or both should be unset." ) - def get_worker_addresses(self): + def get_worker_entities(self) -> Set[WorkerEntity]: """ - Retrieve worker addresses from etcd using the specified prefix. + Retrieve worker entities from etcd using the specified prefix. Returns: - list: A list of WorkerNetAddress objects. + set: A set of WorkerEntity objects. """ # Note that EtcdClient should not be passed through python multiprocessing etcd = self._get_etcd_client() - worker_addresses = set() + worker_entities: Set[WorkerEntity] = set() try: - worker_addresses = { - WorkerNetAddress.from_worker_info(worker_info) - for worker_info, _ in etcd.get_prefix(self.prefix) + worker_entities = { + WorkerEntity.from_worker_info(worker_info) + for worker_info, _ in etcd.get_prefix(self._prefix) } except Exception as e: raise Exception( - f"Failed to achieve worker info list from ETCD server {self.host}:{self.port} {e}" + f"Failed to achieve worker info list from ETCD server {self._host}:{self._port} {e}" ) from e - if not worker_addresses: + if not worker_entities: # TODO(lu) deal with the alluxio cluster initializing issue raise Exception( "Alluxio cluster may still be initializing. No worker registered" ) - return worker_addresses + return worker_entities def _get_etcd_client(self): - if self.etcd_username: + if self._etcd_username: return etcd3.client( - host=self.host, - port=self.port, - user=self.etcd_username, - password=self.etcd_password, + host=self._host, + port=self._port, + user=self._etcd_username, + password=self._etcd_password, ) - return etcd3.client(host=self.host, port=self.port) + return etcd3.client(host=self._host, port=self._port) class ConsistentHashProvider: @@ -185,7 +210,7 @@ def __init__( worker_http_port=DEFAULT_HTTP_SERVER_PORT, options=None, logger=None, - num_virtual_nodes=2000, + hash_node_per_worker=5, max_attempts=100, etcd_refresh_workers_interval=None, ): @@ -193,17 +218,15 @@ def __init__( self._etcd_port = etcd_port self._options = options self._logger = logger or logging.getLogger("ConsistentHashProvider") - self._num_virtual_nodes = num_virtual_nodes + self._hash_node_per_worker = hash_node_per_worker self._max_attempts = max_attempts self._lock = threading.Lock() self._is_ring_initialized = False - self._worker_addresses = None + self._worker_info_map = {} self._etcd_refresh_workers_interval = etcd_refresh_workers_interval if worker_hosts: self._update_hash_ring( - WorkerNetAddress.from_worker_hosts( - worker_hosts, worker_http_port - ) + self._generate_worker_info_map(worker_hosts, worker_http_port) ) if self._etcd_hosts: self._fetch_workers_and_update_ring() @@ -228,14 +251,36 @@ def get_multiple_workers( List[WorkerNetAddress]: A list containing the desired number of WorkerNetAddress objects. """ with self._lock: - if count >= len(self._worker_addresses): - return list(self._worker_addresses) - workers: Set[WorkerNetAddress] = set() - attempts = 0 - while len(workers) < count and attempts < self._max_attempts: - attempts += 1 - workers.add(self._get_ceiling_value(self._hash(key, attempts))) - return list(workers) + worker_identities = self._get_multiple_worker_identities( + key, count + ) + worker_addresses = [] + for worker_identity in worker_identities: + worker_address = self._worker_info_map.get(worker_identity) + if worker_address: + worker_addresses.append(worker_address) + return worker_addresses + + def _get_multiple_worker_identities( + self, key: str, count: int + ) -> List[WorkerIdentity]: + """ + This method needs external lock to ensure safety + """ + count = ( + len(self._worker_info_map) + if count >= len(self._worker_info_map) + else count + ) + workers = [] + attempts = 0 + while len(workers) < count and attempts < self._max_attempts: + attempts += 1 + worker = self._get_ceiling_value(self._hash(key, attempts)) + if worker not in workers: + workers.append(worker) + + return workers def _start_background_update_ring(self, interval): def update_loop(): @@ -260,19 +305,18 @@ def __del__(self): self.shutdown_background_update_ring() def _fetch_workers_and_update_ring(self): - worker_addresses = set() etcd_hosts_list = self._etcd_hosts.split(",") random.shuffle(etcd_hosts_list) + worker_entities: Set[WorkerEntity] = set() for host in etcd_hosts_list: try: - current_addresses = EtcdClient( + worker_entities = EtcdClient( host=host, port=self._etcd_port, options=self._options - ).get_worker_addresses() - worker_addresses.update(current_addresses) + ).get_worker_entities() break except Exception as e: continue - if not worker_addresses: + if not worker_entities: if self._is_ring_initialized: self._logger.info( f"Failed to achieve worker info list from ETCD servers:{self._etcd_hosts}" @@ -283,30 +327,72 @@ def _fetch_workers_and_update_ring(self): f"Failed to achieve worker info list from ETCD servers:{self._etcd_hosts}" ) - if worker_addresses != self._worker_addresses: - self._update_hash_ring(worker_addresses) + worker_info_map = {} + diff_in_worker_info_detected = False + for worker_entity in worker_entities: + worker_info_map[ + worker_entity.worker_identity + ] = worker_entity.worker_net_address + if worker_entity.worker_identity not in self._worker_info_map: + diff_in_worker_info_detected = True + elif ( + self._worker_info_map[worker_entity.worker_identity] + != worker_entity.worker_net_address + ): + diff_in_worker_info_detected = True + + if len(worker_info_map) != len(self._worker_info_map): + diff_in_worker_info_detected = True - def _update_hash_ring(self, worker_addresses: Set[WorkerNetAddress]): + if diff_in_worker_info_detected: + self._update_hash_ring(worker_info_map) + + def _update_hash_ring( + self, worker_info_map: dict[WorkerIdentity, WorkerNetAddress] + ): with self._lock: hash_ring = SortedDict() - weight = math.ceil(self._num_virtual_nodes / len(worker_addresses)) - for worker_address in worker_addresses: - worker_string = worker_address.dump_main_info() - for i in range(weight): - hash_key = self._hash(worker_string, i) - hash_ring[hash_key] = worker_address - self._hash_ring = hash_ring - self._worker_addresses = worker_addresses + for worker_identity in worker_info_map.keys(): + for i in range(self._hash_node_per_worker): + hash_key = self._hash_worker_identity(worker_identity, i) + hash_ring[hash_key] = worker_identity + self.hash_ring = hash_ring + self._worker_info_map = worker_info_map self._is_ring_initialized = True def _get_ceiling_value(self, hash_key: int): - key_index = self._hash_ring.bisect_right(hash_key) - if key_index < len(self._hash_ring): - ceiling_key = self._hash_ring.keys()[key_index] - ceiling_value = self._hash_ring[ceiling_key] + key_index = self.hash_ring.bisect_right(hash_key) + if key_index < len(self.hash_ring): + ceiling_key = self.hash_ring.keys()[key_index] + ceiling_value = self.hash_ring[ceiling_key] return ceiling_value else: - return self._hash_ring.peekitem(0)[1] + return self.hash_ring.peekitem(0)[1] def _hash(self, key: str, index: int) -> int: - return mmh3.hash(f"{key}{index}".encode("utf-8")) + hasher = mmh3.mmh3_32() + hasher.update(key.encode("utf-8")) + hasher.update(index.to_bytes(4, "little")) + return hasher.sintdigest() + + def _hash_worker_identity( + self, worker: WorkerIdentity, node_index: int + ) -> int: + # Hash the combined bytes + hasher = mmh3.mmh3_32() + hasher.update(worker.identifier) + hasher.update(worker.version.to_bytes(4, "little")) + hasher.update(node_index.to_bytes(4, "little")) + return hasher.sintdigest() + + def _generate_worker_info_map(self, worker_hosts, worker_http_port): + worker_info_map = {} + host_list = [host.strip() for host in worker_hosts.split(",")] + for worker_host in host_list: + worker_entity = WorkerEntity.from_host_and_port( + worker_host, worker_http_port + ) + worker_info_map[ + worker_entity.worker_identity + ] = worker_entity.worker_net_address + return worker_info_map diff --git a/tests/hash_res/activeNodesMap.json b/tests/hash_res/activeNodesMap.json new file mode 100644 index 0000000..6260595 --- /dev/null +++ b/tests/hash_res/activeNodesMap.json @@ -0,0 +1,1002 @@ +{ + "-2115369034": { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + "-2091368727": { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + "-2035703500": { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + "-2020503163": { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + "-2018838375": { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + "-2001073348": { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + "-2001008823": { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + "-2000498259": { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + "-1998016219": { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + "-1992280867": { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + "-1949531595": { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + "-1945615066": { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + "-1905409545": { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + "-1898825753": { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + "-1880929873": { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + "-1879245916": { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + "-1874448919": { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + "-1836289830": { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + "-1807338973": { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + "-1785695523": { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + "-1644192530": { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + "-1640144922": { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + "-1621072852": { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + "-1613871358": { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + "-1607875238": { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + }, + "-1603622913": { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + "-1598860684": { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + "-1591914451": { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + "-1582782076": { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + "-1572608538": { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + "-1565343821": { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + "-1544884377": { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + "-1533586579": { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + "-1502419178": { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + "-1497415322": { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + "-1481119148": { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + "-1460481702": { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + "-1454067970": { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + "-1428680219": { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + "-1402396211": { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + "-1388188426": { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + "-1361504644": { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + }, + "-1353690830": { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + "-1308819006": { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + "-1274190161": { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + "-1274184776": { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + "-1254602663": { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + "-1242990879": { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + "-1209240677": { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + "-1205533474": { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + "-1181002889": { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + "-1170547462": { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + "-1136284210": { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + "-1131198756": { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + "-1108751870": { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + "-1085625064": { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + "-1072596604": { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + "-1067521732": { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + "-1027139400": { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + "-965000512": { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + "-963201551": { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + "-953941995": { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + "-949681777": { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + "-948975218": { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + "-929168312": { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + "-928233989": { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + "-910886540": { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + "-887685381": { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + "-868669701": { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + "-858699180": { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + "-819778837": { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + "-813715057": { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + "-788575516": { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + "-782399481": { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + "-765629868": { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + "-713207990": { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + "-704126197": { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + "-703905062": { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + "-677761763": { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + "-677716059": { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + "-650059480": { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + "-601500921": { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + "-601452604": { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + "-558607524": { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + "-536222129": { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + "-535276574": { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + "-531165090": { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + "-513594848": { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + "-510745204": { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + "-492474525": { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + "-436715699": { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + "-436169307": { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + "-421725118": { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + "-420639473": { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + "-402034263": { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + "-372367314": { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + "-363276737": { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + "-350701540": { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + "-329451756": { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + "-322939709": { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + "-308807501": { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + "-273283867": { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + "-257651875": { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + "-246789829": { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + "-235365093": { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + "-214752869": { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + "-211889573": { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + "-205438280": { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + "-162809880": { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + "-149368597": { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + "-111074588": { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + "-106187699": { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + "-90712726": { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + }, + "-43170766": { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + "-37332753": { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + "21700536": { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + "26589100": { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + "49988399": { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + "50999564": { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + "63571604": { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + "104739144": { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + "130354123": { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + "130591054": { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + "135923079": { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + "140323032": { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + "143743215": { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + "153476705": { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + "248532166": { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + "274411076": { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + "305678758": { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + "346850862": { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + "347339085": { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + "350498951": { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + "350954699": { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + "408849612": { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + "429427545": { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + "446249379": { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + "449449981": { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + "473117705": { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + "497367507": { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + "528221851": { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + "535236085": { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + "538142841": { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + "582005327": { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + "584259208": { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + "584568901": { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + "632432913": { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + "632822913": { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + "643043431": { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + "656552419": { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + "664478574": { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + "676134760": { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + "684975246": { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + "707171019": { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + "713982546": { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + "755048076": { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + "826319166": { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + "826364895": { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + "836074637": { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + "845789493": { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + "846443122": { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + "852985392": { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + "867550676": { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + "895327246": { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + "914523155": { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + "922409680": { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + "943339311": { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + "954829692": { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + "962743900": { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + "1008763691": { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + "1011027326": { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + }, + "1033410522": { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + "1035110164": { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + "1042604743": { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + "1070649741": { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + "1076442160": { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + "1087399852": { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + "1130655362": { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + "1149185862": { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + "1190920924": { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + "1201967894": { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + "1224529416": { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + "1233480337": { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + "1240271849": { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + "1276199166": { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + "1288248433": { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + "1300668502": { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + "1318427302": { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + "1331625336": { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + "1334054318": { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + "1341896786": { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + "1346329458": { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + "1361550001": { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + "1374838964": { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + "1406091652": { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + "1408472174": { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + "1420996528": { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + "1442515016": { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + "1444111170": { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + "1456142202": { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + "1479814255": { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + "1487691117": { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + "1493252170": { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + "1513450563": { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + "1516202376": { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + "1518368864": { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + "1555735296": { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + "1567793620": { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + "1573984571": { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + "1579991687": { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + "1593672398": { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + "1602725778": { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + "1612739546": { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + "1639399392": { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + "1667669984": { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + "1692521345": { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + "1696767179": { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + "1700325412": { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + "1750718921": { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + "1753707681": { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + "1765578299": { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + "1770298781": { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + "1808790008": { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + "1811712400": { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + "1821685772": { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + "1828829657": { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + "1830498928": { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + "1836162724": { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + "1853048372": { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + "1857766797": { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + "1866065729": { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + "1871485982": { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + "1892545061": { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + "1894012653": { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + "1899051478": { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + "1907304406": { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + "1907464917": { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + }, + "1973184342": { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + "1977837486": { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + "1988098064": { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + "2002022199": { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + "2015796569": { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + "2050900322": { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + "2091004539": { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + "2103863933": { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + "2105959596": { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + "2108854606": { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + "2111136993": { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + "2118332817": { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + "2143686309": { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + } +} diff --git a/tests/hash_res/fileUrlWorkers.json b/tests/hash_res/fileUrlWorkers.json new file mode 100644 index 0000000..9317d6b --- /dev/null +++ b/tests/hash_res/fileUrlWorkers.json @@ -0,0 +1,464 @@ +{ + "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_checkin.json": [ + { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + } + ], + "gcs://bucket/file.txt": [ + { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + } + ], + "s://ai-testing/": [ + { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_review.json": [ + { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + } + ], + "s3://bucket/path/to/dir": [ + { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + } + ], + "hdfs://host:port/path/to/file": [ + { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + } + ], + "wasbs://container@account.blob.core.windows.net/dir": [ + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + } + ], + "gs://bucket/file.txt": [ + { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_review_sample_large.csv": [ + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + } + ], + "s3://ai-ref-arch/yelp-review/model.pt": [ + { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_business.json": [ + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + } + ], + "hdfs://namenode:8020/user/hadoop/file.txt": [ + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + } + ], + "gs://bucket/dir": [ + { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + } + ], + "gcs://bucket/dir": [ + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_user.json": [ + { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + } + ], + "s3://bucket/path/to/file": [ + { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + } + ], + "hdfs://host:port/path/to/dir": [ + { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_tip.json": [ + { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + } + ], + "hdfs://namenode:8020/user/hadoop/dir": [ + { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + } + ], + "s3://ai-ref-arch/yelp-review/yelp_review_sample.csv": [ + { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + } + ], + "wasbs://container@account.blob.core.windows.net/file.txt": [ + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + } + ] +} diff --git a/tests/hash_res/workerHostnames.json b/tests/hash_res/workerHostnames.json new file mode 100644 index 0000000..850c38b --- /dev/null +++ b/tests/hash_res/workerHostnames.json @@ -0,0 +1,52 @@ +[ + "worker-fmvjygixyt-8993", + "worker-ujvhyfprjc-79", + "worker-nakxlhtbwu-355", + "worker-uohlaocjcb-9854", + "worker-gxdkefonqf-1738", + "worker-gjzlwiuifu-4991", + "worker-oqjcznkqhh-1621", + "worker-muzlyqhjtf-1501", + "worker-fjqojdrnun-394", + "worker-hasicqeqms-2548", + "worker-pxtwffhwpv-1106", + "worker-iakqckkxxd-3188", + "worker-vjuxlgfjsn-2944", + "worker-fnwnfpqdsi-6567", + "worker-xwcsdwxijw-2610", + "worker-bfydonscfx-4806", + "worker-kvfkefpucw-7829", + "worker-hzwbeyabvg-6268", + "worker-atzylwpgju-9727", + "worker-crrkjzttdk-9462", + "worker-kfgorghjgw-235", + "worker-gukpxxujsl-526", + "worker-imoaepvvdz-8203", + "worker-dnbkvfqaed-811", + "worker-wtborcfzha-3675", + "worker-yfkdrypeuq-5502", + "worker-vocaaeuwug-8440", + "worker-nndnhtubyb-3198", + "worker-edmtlksxyz-8687", + "worker-jpswgyapjb-9091", + "worker-powoalhuan-889", + "worker-uhfxhulyiz-2526", + "worker-nufuphxfbk-3794", + "worker-yqfvjtwvyc-3118", + "worker-rfczdltwwt-3569", + "worker-keebwqvzoi-7748", + "worker-sqkpasdrca-9840", + "worker-lkjdkmlqdb-3457", + "worker-goakzjzyca-4901", + "worker-bcnbtjkqeg-6684", + "worker-xinbxyhoxj-4420", + "worker-usazyzkozq-6526", + "worker-nkegjyykfy-5121", + "worker-dmnwyfzetu-4312", + "worker-agyqpurqcz-1984", + "worker-ibolbcdres-8977", + "worker-kdikdfjtcb-352", + "worker-auuevjgrbb-2181", + "worker-zayysvqavb-2677", + "worker-hyvoweednm-4491" +] diff --git a/tests/hash_res/workerList.json b/tests/hash_res/workerList.json new file mode 100644 index 0000000..30106e2 --- /dev/null +++ b/tests/hash_res/workerList.json @@ -0,0 +1,202 @@ +[ + { + "version": 1, + "identifier": "b785d1f098863821a4eb6bee226c35b7" + }, + { + "version": 1, + "identifier": "065141ec74bd3ad8bff25e7bc18408be" + }, + { + "version": 1, + "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" + }, + { + "version": 1, + "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" + }, + { + "version": 1, + "identifier": "35a44a2979bb392b86a4d51b44b666c7" + }, + { + "version": 1, + "identifier": "55da12e6f8ea35059e783843d4733281" + }, + { + "version": 1, + "identifier": "a11b880d043231fba4a14c9da7c86a83" + }, + { + "version": 1, + "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" + }, + { + "version": 1, + "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" + }, + { + "version": 1, + "identifier": "ed014f952f6c3e1ca0d5a479e4141456" + }, + { + "version": 1, + "identifier": "b065a65d21613f30b210668f36b99865" + }, + { + "version": 1, + "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" + }, + { + "version": 1, + "identifier": "ff552a31dc9c31dfaae746915261cc0f" + }, + { + "version": 1, + "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" + }, + { + "version": 1, + "identifier": "1ae5694808e43da58ba98374c7e51190" + }, + { + "version": 1, + "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" + }, + { + "version": 1, + "identifier": "70d7e21f675633c38b057731c646283b" + }, + { + "version": 1, + "identifier": "861111d5e78e39bcb4111b1ac713d40b" + }, + { + "version": 1, + "identifier": "464294a547843ecca2be5a615b1397c0" + }, + { + "version": 1, + "identifier": "0fcc8d949efd3ed594bec8171d131a4e" + }, + { + "version": 1, + "identifier": "acf3522e534a35c2a811716eeb29c22e" + }, + { + "version": 1, + "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" + }, + { + "version": 1, + "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" + }, + { + "version": 1, + "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" + }, + { + "version": 1, + "identifier": "d71a716e22ef33e4baa94336282c6f3b" + }, + { + "version": 1, + "identifier": "08dfb7f3f3bf3668889accd20d27ae56" + }, + { + "version": 1, + "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" + }, + { + "version": 1, + "identifier": "b43de09a152b3d42ba0f628e69742db3" + }, + { + "version": 1, + "identifier": "d53648c23d853765a0662482a16a4ca3" + }, + { + "version": 1, + "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" + }, + { + "version": 1, + "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" + }, + { + "version": 1, + "identifier": "e4d8ef54fd3834628e07fd42e79978b3" + }, + { + "version": 1, + "identifier": "aacf21dd9eb537eeb332c48248e79865" + }, + { + "version": 1, + "identifier": "865668aadaba3511a84d8054613dd14b" + }, + { + "version": 1, + "identifier": "959e6e70afa63804bf6f6f003ab54adb" + }, + { + "version": 1, + "identifier": "d69c0d9d83d7348bbf3ff323a450babc" + }, + { + "version": 1, + "identifier": "ce88e51340d034eb95942143f1d177dd" + }, + { + "version": 1, + "identifier": "d9824b27273034f69ddf0369b907de1e" + }, + { + "version": 1, + "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" + }, + { + "version": 1, + "identifier": "07a1007d01053baeb0aa798f88e1a0f2" + }, + { + "version": 1, + "identifier": "f4c597fd0865329ca22e1ab30e0adf33" + }, + { + "version": 1, + "identifier": "5d2e3793fc6638e98507610a19a6079f" + }, + { + "version": 1, + "identifier": "18da1234f4cb339d8e563665adcbaae1" + }, + { + "version": 1, + "identifier": "f170065c69273b4092cf73ffe2fb8a49" + }, + { + "version": 1, + "identifier": "f4017d1d98933b659d6b0e2811c22a1c" + }, + { + "version": 1, + "identifier": "63612b5f0daf3724b30428ee6a390e86" + }, + { + "version": 1, + "identifier": "d7028a8eab373a28b2a2f6109a13011d" + }, + { + "version": 1, + "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" + }, + { + "version": 1, + "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" + }, + { + "version": 1, + "identifier": "9f1304b5ef553bb99291561ab5fdccba" + } +] diff --git a/tests/test_worker_entity.py b/tests/test_worker_entity.py new file mode 100644 index 0000000..99d909f --- /dev/null +++ b/tests/test_worker_entity.py @@ -0,0 +1,68 @@ +import json +import re + +from alluxio.worker_ring import WorkerEntity + + +def test_worker_entity_from_info_dynamic(): + # Define a mapping of field names to their specific values + field_values = { + "version": 1, + "identifier": "cb157baaafe04b988af01a4645d38456", + "Host": "192.168.4.36", + "ContainerHost": "container_host_value", + "RpcPort": 432423, + "DataPort": 54237, + "SecureRpcPort": 23514, + "NettyDataPort": 45837, + "WebPort": 65473, + "DomainSocketPath": "domain_socket_path_value", + "HttpServerPort": 39282, + } + + # Dynamically construct worker_info_dict using field_values + worker_info_dict = { + "Identity": { + k: v + for k, v in field_values.items() + if k in ["version", "identifier"] + }, + "WorkerNetAddress": { + k: v + for k, v in field_values.items() + if k not in ["version", "identifier"] + }, + } + worker_info_bytes = json.dumps(worker_info_dict).encode("utf-8") + + # Convert worker_info_bytes and instantiate WorkerEntity + worker_entity = WorkerEntity.from_worker_info(worker_info_bytes) + + # Validate WorkerIdentity fields + assert worker_entity.worker_identity.version == field_values["version"] + assert worker_entity.worker_identity.identifier == bytes.fromhex( + field_values["identifier"] + ) + # Dynamically validate WorkerNetAddress fields using field_values + for field_name, expected_value in field_values.items(): + if field_name in [ + "version", + "identifier", + ]: # Skip identity-specific fields + continue + # Convert CamelCase field_name to snake_case to match WorkerNetAddress attribute names + snake_case_field_name = camel_to_snake(field_name) + actual_value = getattr( + worker_entity.worker_net_address, snake_case_field_name + ) + assert ( + actual_value == expected_value + ), f"Field '{snake_case_field_name}' expected '{expected_value}', got '{actual_value}'" + + +def camel_to_snake(name): + """ + Convert a CamelCase name into snake_case. + """ + name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() diff --git a/tests/test_worker_hash_ring.py b/tests/test_worker_hash_ring.py new file mode 100644 index 0000000..edde791 --- /dev/null +++ b/tests/test_worker_hash_ring.py @@ -0,0 +1,84 @@ +import json + +from alluxio.worker_ring import ConsistentHashProvider +from alluxio.worker_ring import WorkerIdentity +from alluxio.worker_ring import WorkerNetAddress + + +def test_hash_ring(): + worker_hostnames_path = "tests/hash_res/workerHostnames.json" + with open(worker_hostnames_path, "r") as file: + worker_hostnames = json.load(file) + + hash_provider = ConsistentHashProvider( + worker_hosts=", ".join(worker_hostnames), + hash_node_per_worker=5, + etcd_refresh_workers_interval=100000000, + ) + + hash_ring_path = "tests/hash_res/activeNodesMap.json" + validate_hash_ring(hash_provider.hash_ring, hash_ring_path) + + worker_list_path = "tests/hash_res/workerList.json" + with open(worker_list_path, "r") as file: + workers_data = json.load(file) + + worker_info_map = {} + for worker_data in workers_data: + worker_identity = WorkerIdentity( + version=int(worker_data["version"]), + identifier=bytes.fromhex(worker_data["identifier"]), + ) + default_worker_net_address = WorkerNetAddress() + worker_info_map[worker_identity] = default_worker_net_address + + hash_provider._update_hash_ring(worker_info_map) + validate_hash_ring(hash_provider.hash_ring, hash_ring_path) + + file_workers_path = "tests/hash_res/fileUrlWorkers.json" + with open(file_workers_path, "r") as file: + file_workers_data = json.load(file) + + for ufs_url, workers in file_workers_data.items(): + current_worker_identities = ( + hash_provider._get_multiple_worker_identities(ufs_url, 5) + ) + original_set = { + (worker["version"], bytes.fromhex(worker["identifier"])) + for worker in workers + } + current_set = { + (worker.version, worker.identifier) + for worker in current_worker_identities + } + assert original_set == current_set + + +def validate_hash_ring(current_ring, result_file_path): + with open(result_file_path, "r") as file: + hash_ring_data = json.load(file) + + not_found_count = 0 + mismatch_count = 0 + for hash_key, worker_identity in hash_ring_data.items(): + key = int(hash_key) + if key in current_ring: + # Fetch the WorkerIdentity object from current_ring + current_worker_identity = current_ring[key] + + # Check if the version and identifier match + if current_worker_identity.version == worker_identity[ + "version" + ] and current_worker_identity.identifier == bytes.fromhex( + worker_identity["identifier"] + ): + continue + else: + mismatch_count += 1 + else: + not_found_count += 1 + + assert ( + not_found_count == 0 + ), "Some hash keys were not found in the current ring" + assert mismatch_count == 0, "Some hash keys had mismatched WorkerIdentity" diff --git a/tests/test_worker_net_address.py b/tests/test_worker_net_address.py deleted file mode 100644 index 55f2966..0000000 --- a/tests/test_worker_net_address.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest - -from alluxio.worker_ring import DEFAULT_DATA_PORT -from alluxio.worker_ring import WorkerNetAddress - - -def test_worker_net_address_from_worker_info(): - worker_info = b'{"WorkerNetAddress": {"Host": "localhost"}}' - result = WorkerNetAddress.from_worker_info(worker_info) - assert result.host == "localhost", "The host should be localhost" - - invalid_worker_info = b'{"invalid_json": {}' - with pytest.raises(ValueError) as err: - WorkerNetAddress.from_worker_info(invalid_worker_info) - assert ( - "Provided worker_info is not a valid JSON string" in err.value.args[0] - ) - - non_bytes_worker_info = '{"WorkerNetAddress": {"Host": "localhost"}}' - with pytest.raises(AttributeError) as err: - WorkerNetAddress.from_worker_info(non_bytes_worker_info) - assert ( - "Provided worker_info must be a bytes-like object" in err.value.args[0] - ) - - -def test_worker_net_address_from_worker_hosts(): - worker_hosts = "host1,host2,host3" - result = WorkerNetAddress.from_worker_hosts(worker_hosts) - assert len(result) == 3, "The length of worker list should be 3" - - -def test_worker_net_address_dump_main_info(): - worker_address = WorkerNetAddress( - host="host", rpc_port=1234, http_server_port=38080 - ) - result = worker_address.dump_main_info() - - assert "host" in result, "host should be included in the dump info" - assert ( - "1234" in result - ), "rpc port 1234 should be included in the dump info" - assert ( - str(DEFAULT_DATA_PORT) in result - ), "default data port should be included in the dump info" - assert ( - "38080" not in result - ), "Http server port should not be included in the dump info" - - -def test_worker_net_address_from_service_registry_worker_info(): - worker_info = ( - b'{"Identity":{"version":1,' - b'"identifier":"cb157baaafe04b988af01a4645d38456"},' - b'"WorkerNetAddress":{"Host":"192.168.4.36",' - b'"ContainerHost":"",' - b'"RpcPort":29999,' - b'"DataPort":29997,"SecureRpcPort":0,' - b'"NettyDataPort":29997,' - b'"WebPort":30000,' - b'"DomainSocketPath":""},' - b'"State":"AUTHORIZED",' - b'"GenerationNumber":-1,' - b'"ServiceEntityName":"worker-cb157baa-afe0-4b98-8af0-1a4645d38456"}' - ) - result = WorkerNetAddress.from_worker_info(worker_info) - assert result.host == "192.168.4.36", "The host should be 192.168.4.36"