Skip to content

Commit

Permalink
TEST-modin-project#6830: Use local s3 server instead of public s3 buc…
Browse files Browse the repository at this point in the history
…kets

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Jan 17, 2024
1 parent 4bdaa49 commit 298eef5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 7 deletions.
7 changes: 5 additions & 2 deletions modin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,8 @@ def s3_storage_options(worker_id):
# to do that is to use the `worker_id`, which is unique, to determine what port to point
# to. We arbitrarily assign `5` as a worker id to the master worker, since we need a number
# for each worker, and we never run tests with more than `pytest -n 4`.
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
url = f"http://127.0.0.1:555{worker_id}/"
worker_id = "0" if worker_id == "master" else worker_id.lstrip("gw")
url = f"http://127.0.0.1:550{worker_id}/"
return {"client_kwargs": {"endpoint_url": url}}


Expand Down Expand Up @@ -676,6 +676,9 @@ def s3_resource(s3_base):
s3 = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base})

s3.rm(bucket, recursive=True)
# bucket = conn.Bucket(bucket)
# bucket.objects.delete()
# bucket.delete()
for _ in range(20):
# We want to wait until the deletion finishes.
if not cli.list_buckets()["Buckets"]:
Expand Down
Binary file not shown.
Binary file not shown.
17 changes: 12 additions & 5 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2031,15 +2031,22 @@ def test_read_parquet_5767(self, tmp_path, engine):
# both Modin and pandas read column "b" as a category
df_equals(test_df, read_df.astype("int64"))

def test_read_parquet_s3_with_column_partitioning(self, engine):
# This test case comes from
def test_read_parquet_s3_with_column_partitioning(
self, s3_resource, engine, s3_storage_options
):
# https://github.com/modin-project/modin/issues/4636
dataset_url = "s3://modin-datasets/modin-bugs/modin_bug_5159_parquet/df.parquet"
dataset_path = "modin/pandas/test/data/issue5159.parquet"
s3_path = "s3://modin-test/modin-bugs/issue5159.parquet"

# TODO: write files to local s3 storage not through pandas
pandas.read_parquet(dataset_path).to_parquet(
s3_path, engine=engine, storage_options=s3_storage_options
)
eval_io(
fn_name="read_parquet",
path=dataset_url,
path=s3_path,
engine=engine,
storage_options={"anon": True},
storage_options=s3_storage_options,
)


Expand Down

0 comments on commit 298eef5

Please sign in to comment.