Skip to content

Commit

Permalink
fixes for read_json and read_feather
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Jan 18, 2024
1 parent 9139768 commit f0cd130
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
6 changes: 5 additions & 1 deletion modin/core/io/column_stores/feather_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,9 @@ def _read(cls, path, columns=None, **kwargs):
# Filtering out the columns that describe the frame's index
columns = [col for col in reader.schema.names if col not in index_cols]
return cls.build_query_compiler(
path, columns, use_threads=False, dtype_backend=kwargs["dtype_backend"]
path,
columns,
use_threads=False,
storage_options=kwargs["storage_options"],
dtype_backend=kwargs["dtype_backend"],
)
13 changes: 11 additions & 2 deletions modin/core/io/text/json_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,21 @@ def _read(cls, path_or_buf, **kwargs):
return cls.single_worker_read(
path_or_buf, reason="`lines` argument not supported", **kwargs
)
with OpenFile(path_or_buf, "rb") as f:
with OpenFile(
path_or_buf,
"rb",
**(kwargs.get("storage_options", None) or {}),
) as f:
columns = pandas.read_json(BytesIO(b"" + f.readline()), lines=True).columns
kwargs["columns"] = columns
empty_pd_df = pandas.DataFrame(columns=columns)

with OpenFile(path_or_buf, "rb", kwargs.get("compression", "infer")) as f:
with OpenFile(
path_or_buf,
"rb",
kwargs.get("compression", "infer"),
**(kwargs.get("storage_options", None) or {}),
) as f:
column_widths, num_splits = cls._define_metadata(empty_pd_df, columns)
args = {"fname": path_or_buf, "num_splits": num_splits, **kwargs}
splits, _ = cls.partitioned_file(
Expand Down

0 comments on commit f0cd130

Please sign in to comment.