Skip to content

Commit

Permalink
Allow not passing a column for optional types in struct
Browse files Browse the repository at this point in the history
  • Loading branch information
nonibansal authored and nikhilgarg28 committed Aug 29, 2024
1 parent 2e2e4b6 commit 99f1a31
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 8 deletions.
3 changes: 3 additions & 0 deletions fennel/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## [1.5.13] - 2024-08-29
- Allow not passing a column for optional types in struct

## [1.5.12] - 2024-08-29
- Add support for timestamp dtype parsing in case of non timestamp field.

Expand Down
21 changes: 20 additions & 1 deletion fennel/testing/test_cast_df_to_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,15 @@ def test_cast_col_to_pandas_dtype():
"d": b"hello world",
}
]
parsed_value = [
{
"a": 1,
"b": {"a": 1, "b": 2, "c": 3},
"c": [1, 2, 3, 4],
"d": b"hello world",
"e": pd.NA,
}
]
data = pd.Series([value], name="testing")
data_type = schema_proto.DataType(
array_type=schema_proto.ArrayType(
Expand Down Expand Up @@ -555,6 +564,16 @@ def test_cast_col_to_pandas_dtype():
bytes_type=schema_proto.BytesType()
),
),
schema_proto.Field(
name="e",
dtype=schema_proto.DataType(
optional_type=schema_proto.OptionalType(
of=schema_proto.DataType(
timestamp_type=schema_proto.TimestampType()
)
)
),
),
]
)
)
Expand All @@ -565,7 +584,7 @@ def test_cast_col_to_pandas_dtype():
pandas_dtype_data = cast_col_to_pandas_dtype(arrow_dtype_data, data_type)

assert pandas_dtype_data.dtype == object
assert pandas_dtype_data.tolist()[0] == value
assert pandas_dtype_data.tolist()[0] == parsed_value


def test_optional_timestamp_cast_col_to_pandas_dtype():
Expand Down
24 changes: 18 additions & 6 deletions fennel/testing/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,14 @@ def parse_datetime_in_value(
)
output: Dict[Any, Any] = {}
for field in dtype.struct_type.fields:
output[field.name] = parse_datetime_in_value(
value[field.name], field.dtype
)
dtype = field.dtype
name = field.name
if not dtype.HasField("optional_type") and name not in value:
raise ValueError(
f"value not found for non optional field : {field}"
)
if name in value:
output[name] = parse_datetime_in_value(value[name], dtype)
return output
else:
return value
Expand Down Expand Up @@ -312,9 +317,16 @@ def convert_val_to_pandas_dtype(
fields = data_type.struct_type.fields
output = {}
for field in fields:
output[field.name] = convert_val_to_pandas_dtype(
value[field.name], field.dtype, nullable
)
dtype = field.dtype
name = field.name
if not dtype.HasField("optional_type") and name not in value:
raise ValueError(
f"value not found for non optional field : {field}"
)
if name in value:
output[name] = convert_val_to_pandas_dtype(
value[name], dtype, nullable
)
return output


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "1.5.12"
version = "1.5.13"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <developers@fennel.ai>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit 99f1a31

Please sign in to comment.