Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Parquet does not support saving fp16 GH#44846 #44847

Closed
wants to merge 7 commits into from
4 changes: 4 additions & 0 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ def write(
):
self.validate_dataframe(df)

# PyArrow does not support saving float16
Anirudhsekar96 marked this conversation as resolved.
Show resolved Hide resolved
if df.select_dtypes(include="float16").columns.size > 0:
raise ValueError("PyArrow does not support saving float16")

from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
if index is not None:
from_pandas_kwargs["preserve_index"] = index
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,14 @@ def test_duplicate_columns(self, pa):
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
self.check_error_on_write(df, pa, ValueError, "Duplicate column names found")

def test_write_column_fp16(self, pa):
Anirudhsekar96 marked this conversation as resolved.
Show resolved Hide resolved
# #44847
# Not able to write float 16 column using pyarrow.
data = np.arange(2, 10, dtype=np.float16)
df = pd.DataFrame(data=data, columns=["fp16"])
msg = "PyArrow does not support saving float16"
self.check_error_on_write(df, pa, ValueError, msg)

def test_unsupported(self, pa):
# timedelta
df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)})
Expand Down