diff --git a/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py b/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py index 7917b98ca..33cc2351e 100644 --- a/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py +++ b/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py @@ -117,6 +117,38 @@ def _deserialize_fragments( return [p.deserialize() for p in serialized_fragments] +class _ParquetFileFragmentMetaData: + """Class to store metadata of a Parquet file fragment. + + This includes all attributes from `pyarrow.parquet.FileMetaData` except for `schema`, + which is stored in `self.schema_pickled` as a pickled object from + `cloudpickle.loads()`, used in deduplicating schemas across multiple fragments. + """ + + def __init__(self, fragment_metadata: "pyarrow.parquet.FileMetaData"): + self.created_by = fragment_metadata.created_by + self.format_version = fragment_metadata.format_version + self.num_columns = fragment_metadata.num_columns + self.num_row_groups = fragment_metadata.num_row_groups + self.num_rows = fragment_metadata.num_rows + self.serialized_size = fragment_metadata.serialized_size + # This is a pickled schema object, to be set later with + # `self.set_schema_pickled()`. To get the underlying schema, use + # `cloudpickle.loads(self.schema_pickled)`. + self.schema_pickled: bytes | None = None + + # Calculate the total byte size of the file fragment using the original + # object, as it is not possible to access row groups from this class. + self.total_byte_size = 0 + for row_group_idx in range(fragment_metadata.num_row_groups): + row_group_metadata = fragment_metadata.row_group(row_group_idx) + self.total_byte_size += row_group_metadata.total_byte_size + + def set_schema_pickled(self, schema_pickled: bytes) -> None: + """Note: to get the underlying schema, use `cloudpickle.loads(self.schema_pickled)`.""" + self.schema_pickled = schema_pickled + + # This retry helps when the upstream datasource is not able to handle # overloaded read request or failed with some retriable failures. # For example when reading data from HA hdfs service, hdfs might @@ -267,7 +299,8 @@ def __init__( # noqa: PLR0912,PLR0915 prefetch_remote_args = {} if self._local_scheduling: prefetch_remote_args["scheduling_strategy"] = self._local_scheduling - self._metadata = meta_provider.prefetch_file_metadata(pq_ds.fragments, **prefetch_remote_args) or [] + raw_metadata = meta_provider.prefetch_file_metadata(pq_ds.fragments, **prefetch_remote_args) or [] + self._metadata = self._dedupe_metadata(raw_metadata) except OSError as e: _handle_read_os_error(e, paths) except pa.ArrowInvalid as ex: @@ -293,6 +326,37 @@ def __init__( # noqa: PLR0912,PLR0915 if shuffle == "files": self._file_metadata_shuffler = np.random.default_rng() + def _dedupe_metadata( + self, + raw_metadatas: list["pyarrow.parquet.FileMetaData"], + ) -> list[_ParquetFileFragmentMetaData]: + """Deduplicate schemas to reduce memory usage. + + For datasets with a large number of columns, the FileMetaData + (in particular the schema) can be very large. We can reduce the + memory usage by only keeping unique schema objects across all + file fragments. This method deduplicates the schemas and returns + a list of `_ParquetFileFragmentMetaData` objects. + """ + schema_to_id: dict[int, Any] = {} # schema_id -> serialized_schema + id_to_schema: dict[Any, bytes] = {} # serialized_schema -> schema_id + stripped_metadatas = [] + for fragment_metadata in raw_metadatas: + stripped_md = _ParquetFileFragmentMetaData(fragment_metadata) + + schema_ser = cloudpickle.dumps(fragment_metadata.schema.to_arrow_schema()) # type: ignore[no-untyped-call] + if schema_ser not in schema_to_id: + schema_id: int | None = len(schema_to_id) + schema_to_id[schema_ser] = schema_id + id_to_schema[schema_id] = schema_ser + stripped_md.set_schema_pickled(schema_ser) + else: + schema_id = schema_to_id.get(schema_ser) + existing_schema_ser = id_to_schema[schema_id] + stripped_md.set_schema_pickled(existing_schema_ser) + stripped_metadatas.append(stripped_md) + return stripped_metadatas + def estimate_inmemory_data_size(self) -> int | None: """Return an estimate of the Parquet files encoding ratio. @@ -300,9 +364,7 @@ def estimate_inmemory_data_size(self) -> int | None: """ total_size: int = 0 for file_metadata in self._metadata: - for row_group_idx in range(file_metadata.num_row_groups): - row_group_metadata = file_metadata.row_group(row_group_idx) - total_size += row_group_metadata.total_byte_size + total_size += file_metadata.total_byte_size return total_size * self._encoding_ratio # type: ignore[return-value] def get_read_tasks(self, parallelism: int) -> list[ReadTask]: @@ -315,7 +377,7 @@ def get_read_tasks(self, parallelism: int) -> list[ReadTask]: if len(pq_metadata) < len(self._pq_fragments): # Pad `pq_metadata` to be same length of `self._pq_fragments`. # This can happen when no file metadata being prefetched. - pq_metadata += [None] * (len(self._pq_fragments) - len(pq_metadata)) + pq_metadata += [None] * (len(self._pq_fragments) - len(pq_metadata)) # type: ignore[list-item] if self._file_metadata_shuffler is not None: files_metadata = list(zip(self._pq_fragments, self._pq_paths, pq_metadata)) @@ -330,10 +392,10 @@ def get_read_tasks(self, parallelism: int) -> list[ReadTask]: ) read_tasks = [] - for fragments, paths, metadata in zip( + for fragments, paths, metadata in zip( # type: ignore[var-annotated] np.array_split(pq_fragments, parallelism), np.array_split(pq_paths, parallelism), - np.array_split(pq_metadata, parallelism), + np.array_split(pq_metadata, parallelism), # type: ignore[arg-type] ): if len(fragments) <= 0: continue diff --git a/poetry.lock b/poetry.lock index 1c41a91fe..f4af3e71b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "aenum" @@ -412,17 +412,17 @@ css = ["tinycss2 (>=1.1.0,<1.3)"] [[package]] name = "boto3" -version = "1.34.79" +version = "1.34.93" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.79-py3-none-any.whl", hash = "sha256:265b0b4865e8c07e27abb32a31d2bd9129bb009b1d89ca0783776ec084886123"}, - {file = "boto3-1.34.79.tar.gz", hash = "sha256:139dd2d94eaa0e3213ff37ba7cf4cb2e3823269178fe8f3e33c965f680a9ddde"}, + {file = "boto3-1.34.93-py3-none-any.whl", hash = "sha256:b59355bf4a1408563969526f314611dbeacc151cf90ecb22af295dcc4fe18def"}, + {file = "boto3-1.34.93.tar.gz", hash = "sha256:e39516e4ca21612932599819662759c04485d53ca457996a913163da11f052a4"}, ] [package.dependencies] -botocore = ">=1.34.79,<1.35.0" +botocore = ">=1.34.93,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -860,13 +860,13 @@ xray = ["mypy-boto3-xray (>=1.34.0,<1.35.0)"] [[package]] name = "botocore" -version = "1.34.93" +version = "1.34.98" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.93-py3-none-any.whl", hash = "sha256:6fbd5a53a2adc9b3d4ebd90ae0ede83a91a41d96231f8a5984051f75495f246d"}, - {file = "botocore-1.34.93.tar.gz", hash = "sha256:79d39b0b87e962991c6dd55e78ce15155099f6fb741be88b1b8a456a702cc150"}, + {file = "botocore-1.34.98-py3-none-any.whl", hash = "sha256:631c0031d8ce922b5752ab395ead896a0281b0dc74745a754d0351a27c5d83de"}, + {file = "botocore-1.34.98.tar.gz", hash = "sha256:4cee65df02f4b0be08ad1401965cc89efafebc50ef0727d2d17083c7f1ed2831"}, ] [package.dependencies] @@ -1397,17 +1397,17 @@ files = [ [[package]] name = "deltalake" -version = "0.17.2" +version = "0.17.3" description = "Native Delta Lake Python binding based on delta-rs with Pandas integration" optional = true python-versions = ">=3.8" files = [ - {file = "deltalake-0.17.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:fa3e6d11f57df054d1f0e6ddd18af3f8d78f3a9b942ba82afc92af6a88e20fa1"}, - {file = "deltalake-0.17.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:53010b41da62a5cc54dfc7328b2012db2f2f4f34a5c994ff1179ac286bc30a51"}, - {file = "deltalake-0.17.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8058346784749da06032dc7ccc35b0c35c1e6410ac0d93e0bf300160f8658285"}, - {file = "deltalake-0.17.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3e811f8603c1290d9ee35bae5de3ecbd4da691840f3808c5eadae91ae905c17"}, - {file = "deltalake-0.17.2-cp38-abi3-win_amd64.whl", hash = "sha256:91036c2d473b66d42335aff02a6cb6cd0a466fbd30933b6e5987c3b850f359d9"}, - {file = "deltalake-0.17.2.tar.gz", hash = "sha256:eccde1fde8b675e6e980292a24c9ef6c259d02eddf6380cd36f9a417095a9223"}, + {file = "deltalake-0.17.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bde8e2b38e41935e726e195959bced8c59e9e2be5ad4140823611f2fd5348393"}, + {file = "deltalake-0.17.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0d83a4b1bd5eb2781f39e5c817868c24551b67b292b6a219ac41fc866fdfb1cf"}, + {file = "deltalake-0.17.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c579355192e062c48c6ffff06784d8b2ffcde94308d16c1368e94c02148637a3"}, + {file = "deltalake-0.17.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9c85e23894dc6b29148d2632e2284dbde52689094ff1f0418cededa3cebab06"}, + {file = "deltalake-0.17.3-cp38-abi3-win_amd64.whl", hash = "sha256:72228dcff9a5719bac0663050677938de199105171a40466e9491d79ced1fbf5"}, + {file = "deltalake-0.17.3.tar.gz", hash = "sha256:5f518c04d87c99c1e921dd76fbe7f3a9e5558534a0b21dd822b13eb5a62f3145"}, ] [package.dependencies] @@ -2955,6 +2955,7 @@ files = [ {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"}, {file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"}, {file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"}, + {file = "msgpack-1.0.8-py3-none-any.whl", hash = "sha256:24f727df1e20b9876fa6e95f840a2a2651e34c0ad147676356f4bf5fbb0206ca"}, {file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"}, ] @@ -3783,42 +3784,42 @@ kerberos = ["requests-kerberos"] [[package]] name = "oracledb" -version = "2.1.2" +version = "2.2.0" description = "Python interface to Oracle Database" optional = true python-versions = ">=3.7" files = [ - {file = "oracledb-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4ffaba9504c638c29129b484cf547accf750bd0f86df1ca6194646a4d2540691"}, - {file = "oracledb-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d98deb1e3a500920f5460d457925f0c8cef8d037881fdbd16df1c4734453dd"}, - {file = "oracledb-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bde2df672704fbe12ab0653f6e808b1ed62de28c6864b17fc3a1fcac9c1fd472"}, - {file = "oracledb-2.1.2-cp310-cp310-win32.whl", hash = "sha256:3b3798a1220fc8736a37b9280d0ae4cdf263bb203fc6e2b3a82c33f9a2010702"}, - {file = "oracledb-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:92620efd5eb0d23b252d75f2f2ff1deadf25f44546903e3283760cb276d524ed"}, - {file = "oracledb-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b913a164e1830d0e955b88d97c5e4da4d2402f8a8b0d38febb6ad5a8ef9e4743"}, - {file = "oracledb-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c53827344c6d001f492aee0a3acb6c1b6c0f3030c2f5dc8cb86dc4f0bb4dd1ab"}, - {file = "oracledb-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50225074841d5f9b281d620c012ced4b0946ff5a941c8b639be7babda5190709"}, - {file = "oracledb-2.1.2-cp311-cp311-win32.whl", hash = "sha256:a043b4df2919411b787bcd24ffa4286249a11d05d29bb20bb076d108c3c6f777"}, - {file = "oracledb-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:9edce208c26ee018e43b75323888743031be3e9f0c0e4221abf037129c12d949"}, - {file = "oracledb-2.1.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:08aa313b801dda950918168d3962ba59a617adce143e0c2bf1ee9b847695faaa"}, - {file = "oracledb-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de5c932b04d3bcdd22c71c0e5c5e1d16b6a3a2fc68dc472ee3a12e677461354c"}, - {file = "oracledb-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d590caf39b1901bcba394fcda9815438faff0afaf374025f89ef5d65993d0a4"}, - {file = "oracledb-2.1.2-cp312-cp312-win32.whl", hash = "sha256:1e3ffdfe76c97d1ca13a3fecf239c96d3889015bb5b775dc22b947108044b01e"}, - {file = "oracledb-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c1eaf8c74bb6de5772de768f2f3f5eb935ab935c633d3a012ddff7e691a2073"}, - {file = "oracledb-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2ee06e154e08cc5e4037855d74dc6e37dc054c91a7a1a372bb60d4442e2ed3d"}, - {file = "oracledb-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a21d84aaf5dddab0cfa8ab7c23272c0295a5c796f212a4ce8a6b499643663dd"}, - {file = "oracledb-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b337f7cf30753c3a32302fbc25ca80d7ff5049dd9333e681236a674a90c21caf"}, - {file = "oracledb-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:b5d936763a9b26d32c4e460dbb346c2a962fcc98e6df33dd2d81fdc2eb26f1e4"}, - {file = "oracledb-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0ea32b87b7202811d85082f10bf7789747ce45f195be4199c5611e7d76a79e78"}, - {file = "oracledb-2.1.2-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:f94b22da87e051e3a8620d2b04d99e1cc9d9abb4da6736d6ae0ca436ba03fb86"}, - {file = "oracledb-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:391034ee66717dba514e765263d08d18a2aa7badde373f82599b89e46fa3720a"}, - {file = "oracledb-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a2d9891244b9b94465e30af8cc79380bbb41081c5dc0511cbc94cc250e9e26d"}, - {file = "oracledb-2.1.2-cp38-cp38-win32.whl", hash = "sha256:9a9a6e0bf61952c2c82614b98fe896d2cda17d81ffca4527556e6607b10e3365"}, - {file = "oracledb-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:d9a6447589f203ca846526c99a667537b099d54ddeff09d24f9da59bdcc8f98b"}, - {file = "oracledb-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eb688dd1f8ea2038d17bc84fb651aa1e994b155d3cb8b8387df70ab2a7b4c4c"}, - {file = "oracledb-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f22c31b894bb085a33d70e174c9bcd0abafc630c2c941ff0d630ee3852f1aa6"}, - {file = "oracledb-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5bc03520b8bd4dbf2ac4d937d298a85a7208ffbeec738eea92ad7bb00e7134a"}, - {file = "oracledb-2.1.2-cp39-cp39-win32.whl", hash = "sha256:5d4f6bd1036d7edbb96d8d31f0ca53696a013c00ac82fc19ac0ca374d2265b2c"}, - {file = "oracledb-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:69bde9770392c1c859b1e1d767dbb9ca4c57e3f2946ca90c779d9402a7e96111"}, - {file = "oracledb-2.1.2.tar.gz", hash = "sha256:3054bcc295d7378834ba7a5aceb865985e954915f9b07a843ea84c3824c6a0b2"}, + {file = "oracledb-2.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:253a85eef53d97815b4d838e5275d0a99e33ec340eb4b945cd2371e2bcede46b"}, + {file = "oracledb-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5c2982076366f59dade28b554b43a257ad426e55359124bc37f191f51c2d46"}, + {file = "oracledb-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19408844bd4af5b4d40f06c3e5b88c6bfce4a749f61ab766f41b22c4070c5c15"}, + {file = "oracledb-2.2.0-cp310-cp310-win32.whl", hash = "sha256:c2e2e3f00d7eb7f4dabfa8996dc70db03bd7dbe474d2d1dc381daeff54cfdeff"}, + {file = "oracledb-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:efed536635b0fec5c1484eda55fad4affa57672b87596ec6273123a3133ba5b6"}, + {file = "oracledb-2.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4b7e14b04dc2af4697ca561f9bcac110a67a7be2ccf868d789e92771017feca"}, + {file = "oracledb-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61bbf9cd64a2f3b65a12550329b2f0caed7d9aa5e892c0ce69d9ea7b3cb3cb8e"}, + {file = "oracledb-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e461d1c7ef4d3f03d84595a13754390a62300976782d7c29efc07fcc915e1b3"}, + {file = "oracledb-2.2.0-cp311-cp311-win32.whl", hash = "sha256:6c7da69d18cf02e469e15215af9c6f219256972a172c0e544a2ecc2a5cab9aa5"}, + {file = "oracledb-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0245f677e27ee0990eb0213485031dacdc837a89569563f1594b82ccb362255"}, + {file = "oracledb-2.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:10d2cd354a15e2b7e191256a0179874068fc64fa6543b2e20c9c1c38f0dd0839"}, + {file = "oracledb-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fbf07e0e88c9ff1555c9301d95c69e0d48263cf7df63172043fe0a042539e687"}, + {file = "oracledb-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6a1365d3e05ca73b638ef939f9a609fed0ae5da75d13b2cfb75601ab8b85fce"}, + {file = "oracledb-2.2.0-cp312-cp312-win32.whl", hash = "sha256:3fe57091a1463efac692b352e99f9daeab5ab375bab2060c5caba9a3a7743c15"}, + {file = "oracledb-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5ca9c050e18b2b1005b40d44a2098155445836071253ee5d547c7f285fc7729"}, + {file = "oracledb-2.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b5ad105aabc8ff32e3d3a343a92cf84976cf2454b6a6ff02065383fc3863e68d"}, + {file = "oracledb-2.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a7f2572c358604186d857c80f384ad03226e372731770911856541a06bdd34"}, + {file = "oracledb-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa1fe78ed0cbf98593c1f3f620f751b725b189f8c845577e39a372f44b2bf384"}, + {file = "oracledb-2.2.0-cp37-cp37m-win32.whl", hash = "sha256:bcef115bd147d6f267e3b09cbc3fc04189bff69e94d05c1e266c698668061e8d"}, + {file = "oracledb-2.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1272bf562bcd6ff5e23b1e1fe8c3363d7a66fe8f48b1e00c4fb081d5436e1df5"}, + {file = "oracledb-2.2.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:e0010aee0ed0a57964ce9f6cb0e2315a4ffce947121e0bb1c618e5091e64bab4"}, + {file = "oracledb-2.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:437d7c5a36f7e72ca36e1ac3f1a7c087bffa1cd0ba3a84471e54506c8572a5ad"}, + {file = "oracledb-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:581b7067283910a53b1ac1a50c0046058a21bd5c073d529bf695113db6d25f62"}, + {file = "oracledb-2.2.0-cp38-cp38-win32.whl", hash = "sha256:97fdc27a15f6441434a7ef563f522c8ceac19c2933f2da1082125670a2e2fc6b"}, + {file = "oracledb-2.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:c22a2052997a01e59a4c9c33c9c0593eebcb1d893addeda9cd57003c2e088a85"}, + {file = "oracledb-2.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b924ee3e7d41edb367e5bb4cbb30990ad447fedda9ef0fe29b691d36a8d338c2"}, + {file = "oracledb-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3f9fa10b5f5c5dbe80dc7bdea5e5746abd411217e812fae66cc61c68f3f8f6"}, + {file = "oracledb-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba96a450275bceb5e0928e0dc01b5fb200e81ba04e99499d4930ccba681fd88a"}, + {file = "oracledb-2.2.0-cp39-cp39-win32.whl", hash = "sha256:35b6524b57979dbe8463af06648ad9972bce06e014a292ad96fec34c62665a8b"}, + {file = "oracledb-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0b4968f39871d501ab16a2fe05b5b4ae954e338e6b9dcefeb9bced998ddd4c4b"}, + {file = "oracledb-2.2.0.tar.gz", hash = "sha256:f52c7df38b13243b5ce583457b80748a34682b9bb8370da2497868b71976798b"}, ] [package.dependencies] @@ -5047,20 +5048,26 @@ prompt_toolkit = ">=2.0,<=3.0.36" [[package]] name = "ray" -version = "2.12.0" +version = "2.20.0" description = "Ray provides a simple, universal API for building distributed applications." optional = true python-versions = ">=3.8" files = [ - {file = "ray-2.12.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:775c4dda4292e813c29dc5cb22e61de7f47a411cf5a7a8a8b5dbb61fe9cd83f8"}, - {file = "ray-2.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50a236008840a402c2c88c1597f19347b4a33d7416e75ee18fd002bf76d0e04e"}, - {file = "ray-2.12.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:6e85b28e3c733541e350b709a1e8edeb4cc1eb8b3f7f69fd562981931739e2f0"}, - {file = "ray-2.12.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:9ad451cd7df8c5dbad107785d972b78a62903aabba3b3b8be4fe00f377bca120"}, - {file = "ray-2.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b23ded412190496bfa7738f1ca75a759fd699baa717d6351a18dbbdc2f88159d"}, - {file = "ray-2.12.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:43c6ea2140f05929d0bff96b6ee75722526074e5bcf580cccf8005b70d7aa796"}, - {file = "ray-2.12.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b0bbc2efebef78675b89c89d74db286ec5578beeab99b91ab6f8317a8630b4d3"}, - {file = "ray-2.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c055b6329af830c230f9dd29e734566aa8f1743a00cdf30e2bf20d143b556faf"}, - {file = "ray-2.12.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:efa813963a66271cb411f2532c44abac46005bf3c5c50fcb4bd08cd10a28fed9"}, + {file = "ray-2.20.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8855a5df8b3e6b8bcb5582a8491c50d0237e70751f941e8978bd6408245b7838"}, + {file = "ray-2.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0566b28c75aad1d47b9403c3901a85db586ce7191fdc6978e07ad56e80bf82b"}, + {file = "ray-2.20.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:738c68f4114754f846b3d03b730b42a6468f8b54665732da9f9108aa1d3ecbe3"}, + {file = "ray-2.20.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:2c7f8cd468cbba009d7ebd8a8da66026aeb520f7f4183dd6f49419d75bc84415"}, + {file = "ray-2.20.0-cp310-cp310-win_amd64.whl", hash = "sha256:611d34d0c659652a38ef482a82dfc362074984617765e1d5a414337e4f914cfd"}, + {file = "ray-2.20.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:f7816767e644014f65afbfceb6adfb08c15784a4227aa331b28ac90d1b757a58"}, + {file = "ray-2.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e98df29fd6dac52c87c1f5be5ad99601a8955eaabe921e5cab29b27775250ce"}, + {file = "ray-2.20.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:e84ddad1521e06c91fc641f2b856d33ca2bfa314784172862c41a5184e0e760b"}, + {file = "ray-2.20.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:d9b13815fae5c9a68c9a02f21e1c49c58a5bb6565cb9ed5d48571cacce7568f2"}, + {file = "ray-2.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:6ac1dcb303ddf53d2d87bc5b719e8c38f0a5efe41e175b6ba563fb65b5f4e9a2"}, + {file = "ray-2.20.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:1de0810f77ae4a0bf055aa2bdcb161be1d6d1b67b4095e85a5b3fbb6e0dadcd2"}, + {file = "ray-2.20.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f3519dd7794ead4d3e17d4570593b2a10e8db062836907517e85b4e769dec1a"}, + {file = "ray-2.20.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5a2cb9f100bbb6351372519b03ddc21d9fa6c8716621237273a59a6e250a8204"}, + {file = "ray-2.20.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:64b394a6462a2ac2401b1b004f2cc7ac31e429388abf27024072a55702f1159c"}, + {file = "ray-2.20.0-cp39-cp39-win_amd64.whl", hash = "sha256:65938f7bd28a825d90c643465ad6b1334d97d16e381c409b19269e4dcc043341"}, ] [package.dependencies] @@ -5095,9 +5102,9 @@ virtualenv = {version = ">=20.0.24,<20.21.1 || >20.21.1", optional = true, marke [package.extras] air = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "fastapi", "fsspec", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "memray", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "dm-tree", "fastapi", "fsspec", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "memray", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "pyyaml", "ray-cpp (==2.12.0)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] -client = ["grpcio (!=1.56.0)"] -cpp = ["ray-cpp (==2.12.0)"] +all = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "dm-tree", "fastapi", "fsspec", "grpcio", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "memray", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "pyyaml", "ray-cpp (==2.20.0)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +client = ["grpcio"] +cpp = ["ray-cpp (==2.20.0)"] data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"] default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "memray", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "virtualenv (>=20.0.24,!=20.21.1)"] observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] @@ -6446,4 +6453,4 @@ sqlserver = ["pyodbc"] [metadata] lock-version = "2.0" python-versions = ">=3.8, <4.0" -content-hash = "3061b23c54e4bb218da5c5d3faaa42588a4c440a9244cadc82fcea87c7334165" +content-hash = "50f69aeed09c351f78d3f592118025012ae6876467584c681ab55d145e5b0bee" diff --git a/pyproject.toml b/pyproject.toml index 160a36cf9..768275bad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ modin = [ { version = "0.23.1post0", markers = "python_version < \"3.9\"", optional = true }, { version = "^0.26.0", markers = "python_version >= \"3.9\"", optional = true } ] -ray = { version = "^2.10.0", extras = ["default", "data"], optional = true } +ray = { version = "^2.20.0", extras = ["default", "data"], optional = true } [tool.poetry.extras] redshift = ["redshift-connector"]