From 1d22322c2c2d7d21c08add3b8eba4d4e7f309e82 Mon Sep 17 00:00:00 2001
From: Aditya Nambiar <aditya.nambiar007@gmail.com>
Date: Thu, 5 Sep 2024 20:08:24 -0700
Subject: [PATCH] assign: Include type promotion in assign

---
 fennel/CHANGELOG.md                        |   3 +
 fennel/client_tests/test_complex_struct.py | 151 ++++-
 fennel/client_tests/test_dataset.py        | 127 ++---
 fennel/datasets/datasets.py                |   5 +-
 fennel/dtypes/dtypes.py                    |  12 +
 fennel/expr/expr.py                        |  54 +-
 fennel/expr/test_expr.py                   | 618 ++++++++++++---------
 fennel/internal_lib/schema/schema.py       |  23 +-
 fennel/internal_lib/utils/utils.py         |  18 +-
 fennel/testing/executor.py                 |   8 +-
 poetry.lock                                |  88 +--
 pyproject.toml                             |  24 +-
 12 files changed, 712 insertions(+), 419 deletions(-)

diff --git a/fennel/CHANGELOG.md b/fennel/CHANGELOG.md
index 2eb9d4033..462489e1d 100644
--- a/fennel/CHANGELOG.md
+++ b/fennel/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## [1.5.18] - 2024-09-05
+- Struct initializer + arrow fixes + type promotion in assign
+
 ## [1.5.17] - 2024-09-04
 - Add support for several more expressions
 
diff --git a/fennel/client_tests/test_complex_struct.py b/fennel/client_tests/test_complex_struct.py
index 285007654..72b92a154 100644
--- a/fennel/client_tests/test_complex_struct.py
+++ b/fennel/client_tests/test_complex_struct.py
@@ -8,6 +8,7 @@
 from fennel.connectors import Webhook, source
 from fennel.datasets import dataset, Dataset, field, pipeline, LastK
 from fennel.dtypes import struct, Continuous
+from fennel.expr.expr import col, make_struct
 from fennel.featuresets import featureset, feature as F, extractor
 from fennel.lib import inputs, outputs
 from fennel.testing import mock
@@ -87,6 +88,118 @@ def movie_info(cls, movie: Dataset):
         )
 
 
+@dataset(index=True)
+class MovieInfoExpr:
+    director_id: int = field(key=True)
+    movie_id: int = field(key=True)
+    role_list: List[Role]
+    timestamp: datetime = field(timestamp=True)
+
+    @pipeline
+    @inputs(MovieDS)
+    def movie_info(cls, movie: Dataset):
+        return (
+            movie.assign(
+                role=make_struct(
+                    {
+                        "role_id": col("role_id"),
+                        "name": col("name"),
+                        "cost": col("cost"),
+                    },
+                    Role,
+                ).astype(Role)
+            )
+            .drop(columns=["role_id", "name", "cost"])
+            .groupby("director_id", "movie_id")
+            .aggregate(
+                LastK(
+                    into_field="role_list",
+                    of="role",
+                    window=Continuous("forever"),
+                    limit=3,
+                    dedup=False,
+                ),
+            )
+        )
+
+
+@dataset(index=True)
+class MovieInfoExpr2:
+    director_id: int = field(key=True)
+    movie_id: int = field(key=True)
+    role_list: List[Role]
+    timestamp: datetime = field(timestamp=True)
+
+    @pipeline
+    @inputs(MovieDS)
+    def movie_info(cls, movie: Dataset):
+        return (
+            movie.assign(
+                role=Role.expr(  # type: ignore
+                    role_id=col("role_id"), name=col("name"), cost=col("cost")
+                ).astype(Role)
+            )
+            .drop(columns=["role_id", "name", "cost"])
+            .groupby("director_id", "movie_id")
+            .aggregate(
+                LastK(
+                    into_field="role_list",
+                    of="role",
+                    window=Continuous("forever"),
+                    limit=3,
+                    dedup=False,
+                ),
+            )
+        )
+
+
+@struct
+class FullName:
+    first_name: str
+    last_name: str
+
+
+@struct
+class RoleExtended:
+    role_id: int
+    name: FullName
+    cost: int
+
+
+@dataset(index=True)
+class MovieInfoExprNested:
+    director_id: int = field(key=True)
+    movie_id: int = field(key=True)
+    role_list: List[RoleExtended]
+    timestamp: datetime = field(timestamp=True)
+
+    @pipeline
+    @inputs(MovieDS)
+    def movie_info(cls, movie: Dataset):
+        return (
+            movie.assign(
+                role=RoleExtended.expr(  # type: ignore
+                    role_id=col("role_id"),
+                    name=FullName.expr(  # type: ignore
+                        first_name=col("name"), last_name="rando"
+                    ),
+                    cost=col("cost"),
+                ).astype(RoleExtended)
+            )
+            .drop(columns=["role_id", "name", "cost"])
+            .groupby("director_id", "movie_id")
+            .aggregate(
+                LastK(
+                    into_field="role_list",
+                    of="role",
+                    window=Continuous("forever"),
+                    limit=3,
+                    dedup=False,
+                ),
+            )
+        )
+
+
 @featureset
 class Request:
     director_id: int
@@ -218,7 +331,13 @@ def test_complex_struct(client):
 
     client.commit(
         message="msg",
-        datasets=[MovieDS, MovieInfo],
+        datasets=[
+            MovieDS,
+            MovieInfo,
+            MovieInfoExpr,
+            MovieInfoExpr2,
+            MovieInfoExprNested,
+        ],
         featuresets=[Request, MovieFeatures],
     )
 
@@ -255,6 +374,36 @@ def test_complex_struct(client):
         input_dataframe=input_df,
     )
 
+    res1, found1 = client.lookup(
+        "MovieInfo",
+        keys=pd.DataFrame({"director_id": [1, 2], "movie_id": [1, 3]}),
+    )
+    res2, found2 = client.lookup(
+        "MovieInfoExpr",
+        keys=pd.DataFrame({"director_id": [1, 2], "movie_id": [1, 3]}),
+    )
+    res3, found3 = client.lookup(
+        "MovieInfoExpr2",
+        keys=pd.DataFrame({"director_id": [1, 2], "movie_id": [1, 3]}),
+    )
+    assert res1.shape == res2.shape
+    assert res1.shape == res3.shape
+    for c in res1.columns:
+        assert res1[c].equals(res2[c])
+        assert res1[c].equals(res3[c])
+    assert list(found1) == list(found2)
+    assert list(found1) == list(found3)
+
+    res4, found4 = client.lookup(
+        "MovieInfoExprNested",
+        keys=pd.DataFrame({"director_id": [1, 2], "movie_id": [1, 3]}),
+    )
+    assert res1.shape == res4.shape
+    assert list(found1) == list(found4)
+    for r in res4["role_list"]:
+        for role in r:
+            assert role.name.last_name == "rando"
+
     assert df.shape[0] == 4
     assert len(df["MovieFeatures.role_list_py"].tolist()[0]) == 3
     assert df["MovieFeatures.role_list_py"].tolist()[0][0].as_json() == {
diff --git a/fennel/client_tests/test_dataset.py b/fennel/client_tests/test_dataset.py
index 6001c994d..6d3e1619b 100644
--- a/fennel/client_tests/test_dataset.py
+++ b/fennel/client_tests/test_dataset.py
@@ -606,81 +606,6 @@ class UserInfoDataset:
         )
 
 
-# On demand datasets are not supported for now.
-
-# class TestDocumentDataset(unittest.TestCase):
-#     @mock_client
-#     def test_log_to_document_dataset(self, client):
-#         """Log some data to the dataset and check if it is logged correctly."""
-#
-#         @meta(owner="aditya@fennel.ai")
-#         @dataset
-#         class DocumentContentDataset:
-#             doc_id: int = field(key=True)
-#             bert_embedding: Embedding[4]
-#             fast_text_embedding: Embedding[3]
-#             num_words: int
-#             timestamp: datetime = field(timestamp=True)
-#
-#             @on_demand(expires_after="3d")
-#             @inputs(datetime, int)
-#             def get_embedding(cls, ts: pd.Series, doc_ids: pd.Series):
-#                 data = []
-#                 doc_ids = doc_ids.tolist()
-#                 for i in range(len(ts)):
-#                     data.append(
-#                         [
-#                             doc_ids[i],
-#                             [0.1, 0.2, 0.3, 0.4],
-#                             [1.1, 1.2, 1.3],
-#                             10 * i,
-#                             ts[i],
-#                         ]
-#                     )
-#                 columns = [
-#                     str(cls.doc_id),
-#                     str(cls.bert_embedding),
-#                     str(cls.fast_text_embedding),
-#                     str(cls.num_words),
-#                     str(cls.timestamp),
-#                 ]
-#                 return pd.DataFrame(data, columns=columns), pd.Series(
-#                     [True] * len(ts)
-#                 )
-#
-#         # Sync the dataset
-#         client.commit(datasets=[DocumentContentDataset])
-#         now = datetime.now(timezone.utc)
-#         data = [
-#             [18232, np.array([1, 2, 3, 4]), np.array([1, 2, 3]), 10, now],
-#             [
-#                 18234,
-#                 np.array([1, 2.2, 0.213, 0.343]),
-#                 np.array([0.87, 2, 3]),
-#                 9,
-#                 now,
-#             ],
-#             [18934, [1, 2.2, 0.213, 0.343], [0.87, 2, 3], 12, now],
-#         ]
-#         columns = [
-#             "doc_id",
-#             "bert_embedding",
-#             "fast_text_embedding",
-#             "num_words",
-#             "timestamp",
-#         ]
-#         df = pd.DataFrame(data, columns=columns)
-#         response = client.log("fennel_webhook","DocumentContentDataset", df)
-#         assert response.status_code == requests.codes.OK, response.json()
-#
-#         # Do some lookups
-#         doc_ids = pd.Series([18232, 1728, 18234, 18934, 19200, 91012])
-#         ts = pd.Series([now, now, now, now, now, now])
-#         df, _ = DocumentContentDataset.lookup(ts, doc_id=doc_ids)
-#         assert df.shape == (6, 5)
-#         assert df["num_words"].tolist() == [10.0, 9.0, 12, 0, 10.0, 20.0]
-
-
 ################################################################################
 #                           Dataset & Pipelines Unit Tests
 ################################################################################
@@ -1012,6 +937,25 @@ class Orders:
     timestamp: datetime
 
 
+@dataset
+class OrdersOptional:
+    uid: Optional[int]
+    uid_float: float
+    uid_twice: float
+    skus: List[int]
+    prices: List[float]
+    timestamp: datetime
+
+    @pipeline
+    @inputs(Orders)
+    def cast(cls, ds: Dataset):
+        return ds.assign(
+            uid=col("uid").astype(Optional[int]),  # type: ignore
+            uid_float=col("uid").astype(float),  # type: ignore
+            uid_twice=(col("uid") * 2.0).astype(float),  # type: ignore
+        )
+
+
 @dataset(index=True)
 class Derived:
     uid: int = field(key=True)
@@ -1066,6 +1010,39 @@ def test_basic_explode(self, client):
         assert df["price"].tolist()[0] == 10.1
         assert pd.isna(df["price"].tolist()[1])
 
+    @pytest.mark.integration
+    @mock
+    def test_basic_cast(self, client):
+        # # Sync the dataset
+        client.commit(message="msg", datasets=[Orders, OrdersOptional])
+        # log some rows to the transaction dataset
+        df = pd.DataFrame(
+            [
+                {
+                    "uid": 1,
+                    "skus": [1, 2],
+                    "prices": [10.1, 20.0],
+                    "timestamp": "2021-01-01T00:00:00",
+                },
+                {
+                    "uid": 2,
+                    "skus": [],
+                    "prices": [],
+                    "timestamp": "2021-01-01T00:00:00",
+                },
+            ]
+        )
+        client.log("webhook", "Orders", df)
+        client.sleep()
+
+        # do lookup on the WithSquare dataset
+        df = client.inspect("OrdersOptional")
+        assert df.shape == (2, 6)
+        assert df["uid"].tolist() == [1, 2]
+        assert df["uid_float"].tolist() == [1.0, 2.0]
+        assert df["uid_twice"].tolist() == [2.0, 4.0]
+        assert df["skus"].tolist() == [[1, 2], []]
+
 
 class TestBasicAssign(unittest.TestCase):
     @pytest.mark.integration
diff --git a/fennel/datasets/datasets.py b/fennel/datasets/datasets.py
index e6598bc8e..f4daa5340 100644
--- a/fennel/datasets/datasets.py
+++ b/fennel/datasets/datasets.py
@@ -2935,7 +2935,9 @@ def visitAssign(self, obj) -> DSSchema:
                     raise ValueError(
                         f"invalid assign - {output_schema_name} error in expression for column `{col}`: {str(e)}"
                     )
-                if typed_expr.dtype != expr_type:
+                if not typed_expr.expr.matches_type(
+                    typed_expr.dtype, input_schema.schema()
+                ):
                     printer = ExprPrinter()
                     type_errors.append(
                         f"'{col}' is expected to be of type `{dtype_to_string(typed_expr.dtype)}`, but evaluates to `{dtype_to_string(expr_type)}`. Full expression: `{printer.print(typed_expr.expr.root)}`"
@@ -2943,7 +2945,6 @@ def visitAssign(self, obj) -> DSSchema:
 
             if len(type_errors) > 0:
                 joined_errors = "\n\t".join(type_errors)
-                print(joined_errors)
                 raise TypeError(
                     f"found type errors in assign node of `{self.dsname}.{self.pipeline_name}`:\n\t{joined_errors}"
                 )
diff --git a/fennel/dtypes/dtypes.py b/fennel/dtypes/dtypes.py
index 253d9dbc0..e01f4f879 100644
--- a/fennel/dtypes/dtypes.py
+++ b/fennel/dtypes/dtypes.py
@@ -1,4 +1,5 @@
 import dataclasses
+from functools import partial
 import inspect
 import sys
 from dataclasses import dataclass
@@ -75,6 +76,16 @@ def get_fennel_struct(annotation) -> Any:
         return None
 
 
+def make_struct_expr(cls, **kwargs):
+    from fennel.expr.expr import Expr, make_expr, make_struct
+
+    fields = {}
+    for name, value in kwargs.items():
+        fields[name] = make_expr(value)
+
+    return make_struct(fields, cls)
+
+
 def struct(cls):
     for name, member in inspect.getmembers(cls):
         if inspect.isfunction(member) and name in cls.__dict__:
@@ -131,6 +142,7 @@ def struct(cls):
         setattr(cls, FENNEL_STRUCT_SRC_CODE, "")
     setattr(cls, FENNEL_STRUCT_DEPENDENCIES_SRC_CODE, dependency_code)
     cls.as_json = as_json
+    cls.expr = partial(make_struct_expr, cls)
     return dataclasses.dataclass(cls)
 
 
diff --git a/fennel/expr/expr.py b/fennel/expr/expr.py
index 6bcee1530..5b748540b 100644
--- a/fennel/expr/expr.py
+++ b/fennel/expr/expr.py
@@ -13,7 +13,7 @@
     parse_json,
 )
 import pyarrow as pa
-from fennel_data_lib import eval, type_of
+from fennel_data_lib import assign, type_of, matches
 
 import fennel.gen.schema_pb2 as schema_proto
 from fennel.internal_lib.schema import (
@@ -340,7 +340,26 @@ def typeof(self, schema: Dict = None) -> Type:
         datatype.ParseFromString(type_bytes)
         return from_proto(datatype)
 
-    def eval(self, input_df: pd.DataFrame, schema: Dict) -> pd.Series:
+    def matches_type(self, dtype: Type, schema: Dict) -> bool:
+        schema = schema or {}
+        from fennel.expr.serializer import ExprSerializer
+
+        serializer = ExprSerializer()
+        proto_expr = serializer.serialize(self.root)
+        proto_bytes = proto_expr.SerializeToString()
+        proto_schema = {}
+        for key, value in schema.items():
+            proto_schema[key] = get_datatype(value).SerializeToString()
+        proto_type_bytes = get_datatype(dtype).SerializeToString()
+        return matches(proto_bytes, proto_schema, proto_type_bytes)
+
+    def eval(
+        self,
+        input_df: pd.DataFrame,
+        schema: Dict,
+        output_dtype: Optional[Type] = None,
+        parse=True,
+    ) -> pd.Series:
         from fennel.expr.serializer import ExprSerializer
 
         def pd_to_pa(pd_data: pd.DataFrame, schema: Dict[str, Type]):
@@ -352,37 +371,29 @@ def pd_to_pa(pd_data: pd.DataFrame, schema: Dict[str, Type]):
                         f"column : {column} not found in input dataframe but defined in schema."
                     )
                 new_df[column] = cast_col_to_arrow_dtype(new_df[column], dtype)
-                print("NEw DF: ", column, new_df[column])
             new_df = new_df.loc[:, list(schema.keys())]
             fields = []
             for column, dtype in schema.items():
                 proto_dtype = get_datatype(dtype)
-                pa_type = convert_dtype_to_arrow_type_with_nullable(
-                    proto_dtype
-                )
-                print(f"column: {column}, dtype: {dtype}, pa_type: {pa_type}")
-                print("proto_dtype: ", proto_dtype)
+                pa_type = convert_dtype_to_arrow_type_with_nullable(proto_dtype)
                 if proto_dtype.HasField("optional_type"):
                     nullable = True
                 else:
                     nullable = False
                 field = pa.field(column, type=pa_type, nullable=nullable)
-                print(f"field: {field.name}, type: {field.type}", "nullable: ", field.nullable)
                 fields.append(field)
             pa_schema = pa.schema(fields)
             # Replace pd.NA with None
             new_df = new_df.where(pd.notna(new_df), None)
-            print("New DF: ", new_df)
-            x = pa.RecordBatch.from_pandas(
+            return pa.RecordBatch.from_pandas(
                 new_df, preserve_index=False, schema=pa_schema
             )
-            print("RecordBatch: ", x)
-            return x
 
-        def pa_to_pd(pa_data, ret_type):
+        def pa_to_pd(pa_data, ret_type, parse=True):
             ret = pa_data.to_pandas(types_mapper=pd.ArrowDtype)
             ret = cast_col_to_pandas(ret, get_datatype(ret_type))
-            ret = ret.apply(lambda x: parse_json(ret_type, x))
+            if parse:
+                ret = ret.apply(lambda x: parse_json(ret_type, x))
             return ret
 
         serializer = ExprSerializer()
@@ -392,9 +403,16 @@ def pa_to_pd(pa_data, ret_type):
         proto_schema = {}
         for key, value in schema.items():
             proto_schema[key] = get_datatype(value).SerializeToString()
-        ret_type = self.typeof(schema)
-        arrow_col = eval(proto_bytes, df_pa, proto_schema)
-        return pa_to_pd(arrow_col, ret_type)
+        if output_dtype is None:
+            ret_type = self.typeof(schema)
+        else:
+            ret_type = output_dtype
+
+        serialized_ret_type = get_datatype(ret_type).SerializeToString()
+        arrow_col = assign(
+            proto_bytes, df_pa, proto_schema, serialized_ret_type
+        )
+        return pa_to_pd(arrow_col, ret_type, parse)
 
     def __str__(self) -> str:  # type: ignore
         from fennel.expr.visitor import ExprPrinter
diff --git a/fennel/expr/test_expr.py b/fennel/expr/test_expr.py
index 59472b42a..478bd0ae3 100644
--- a/fennel/expr/test_expr.py
+++ b/fennel/expr/test_expr.py
@@ -408,14 +408,19 @@ def compare_values(received, expected, dtype):
                 r = getattr(act, field.name)
                 e = getattr(exp, field.name)
                 if not is_user_defined_class(field.type):
+                    if (
+                        not isinstance(e, list)
+                        and not isinstance(r, list)
+                        and pd.isna(e)
+                        and pd.isna(r)
+                    ):
+                        continue
                     assert (
                         r == e
                     ), f"Expected {e}, got {r} for field {field.name} in struct {act}"
                 else:
                     compare_values([r], [e], field.type)
     else:
-        print("Received", received)
-        print("Expected", expected)
         assert (
             list(received) == expected
         ), f"Expected {expected}, got {received} for dtype {dtype}"
@@ -423,8 +428,9 @@ def compare_values(received, expected, dtype):
 
 def check_test_case(test_case: ExprTestCase):
     # Test print test case
-    printer = ExprPrinter()
-    assert printer.print(test_case.expr) == test_case.display
+    if test_case.display:
+        printer = ExprPrinter()
+        assert printer.print(test_case.expr) == test_case.display
 
     # Test FetchReferences test case
     ref_extractor = FetchReferences()
@@ -441,6 +447,10 @@ def check_test_case(test_case: ExprTestCase):
     # Test type inference
     # If it is a dataclass, we check if all fields are present
     if is_user_defined_class(test_case.expected_dtype):
+        if not hasattr(
+            test_case.expr.typeof(test_case.schema), "__annotations__"
+        ):
+            assert False, "Expected a dataclass"
         for field in fields(test_case.expected_dtype):
             assert (
                 field.name
@@ -518,204 +528,294 @@ class Nested:
     c: List[int]
 
 
-def test_parse():
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(int)),
-        df=pd.DataFrame({"a": ["1", "2", "3", "4"]}),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'int'>)",
-        refs={"a"},
-        eval_result=[1, 2, 3, 4],
-        expected_dtype=int,
-        proto_json=None,
-    )
-    check_test_case(test_case)
-
-    # Parse a struct
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(A)),
-        df=pd.DataFrame(
-            {"a": ['{"x": 1, "y": 2, "z": "a"}', '{"x": 2, "y": 3, "z": "b"}']}
-        ),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'fennel.expr.test_expr.A'>)",
-        refs={"a"},
-        eval_result=[A(1, 2, "a"), A(2, 3, "b")],
-        expected_dtype=A,
-        proto_json=None,
-    )
-    check_test_case(test_case)
+@struct
+class OptionalA:
+    w: int
+    x: Optional[int]
+    y: Optional[int]
+    z: Optional[str]
 
-    # Parse a list of integers
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(List[int])),
-        df=pd.DataFrame({"a": ["[1, 2, 3]", "[4, 5, 6]"]}),
-        schema={"a": str},
-        display="PARSE(col('a'), typing.List[int])",
-        refs={"a"},
-        eval_result=[[1, 2, 3], [4, 5, 6]],
-        expected_dtype=List[int],
-        proto_json=None,
-    )
-    check_test_case(test_case)
 
-    # Parse a nested struct
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(Nested)),
-        df=pd.DataFrame(
-            {
-                "a": [
-                    '{"a": {"x": 1, "y": 2, "z": "a"}, "b": {"p": 1, "q": "b"}, "c": [1, 2, 3]}'
-                ]
-            }
+def test_parse():
+    cases = [
+        ExprTestCase(
+            expr=(col("a").str.parse(int)),
+            df=pd.DataFrame({"a": ["1", "2", "3", "4"]}),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'int'>)",
+            refs={"a"},
+            eval_result=[1, 2, 3, 4],
+            expected_dtype=int,
+            proto_json=None,
         ),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'fennel.expr.test_expr.Nested'>)",
-        refs={"a"},
-        eval_result=[Nested(A(1, 2, "a"), B(1, "b"), [1, 2, 3])],
-        expected_dtype=Nested,
-        proto_json=None,
-    )
-    check_test_case(test_case)
-
-    # Parse floats
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(float)),
-        df=pd.DataFrame({"a": ["1.1", "2.2", "3.3", "4.4"]}),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'float'>)",
-        refs={"a"},
-        eval_result=[1.1, 2.2, 3.3, 4.4],
-        expected_dtype=float,
-        proto_json=None,
-    )
-    check_test_case(test_case)
-
-    # Parse bool
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(bool)),
-        df=pd.DataFrame({"a": ["true", "false", "true", "false"]}),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'bool'>)",
-        refs={"a"},
-        eval_result=[True, False, True, False],
-        expected_dtype=bool,
-        proto_json=None,
-    )
-    check_test_case(test_case)
+        # Parse a struct
+        ExprTestCase(
+            expr=(col("a").str.parse(A)),
+            df=pd.DataFrame(
+                {
+                    "a": [
+                        '{"x": 1, "y": 2, "z": "a"}',
+                        '{"x": 2, "y": 3, "z": "b"}',
+                    ]
+                }
+            ),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'fennel.expr.test_expr.A'>)",
+            refs={"a"},
+            eval_result=[A(1, 2, "a"), A(2, 3, "b")],
+            expected_dtype=A,
+            proto_json=None,
+        ),
+        # Parse a list of integers
+        ExprTestCase(
+            expr=(col("a").str.parse(List[int])),
+            df=pd.DataFrame({"a": ["[1, 2, 3]", "[4, 5, 6]"]}),
+            schema={"a": str},
+            display="PARSE(col('a'), typing.List[int])",
+            refs={"a"},
+            eval_result=[[1, 2, 3], [4, 5, 6]],
+            expected_dtype=List[int],
+            proto_json=None,
+        ),
+        # Parse a nested struct
+        ExprTestCase(
+            expr=(col("a").str.parse(Nested)),
+            df=pd.DataFrame(
+                {
+                    "a": [
+                        '{"a": {"x": 1, "y": 2, "z": "a"}, "b": {"p": 1, "q": "b"}, "c": [1, 2, 3]}'
+                    ]
+                }
+            ),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'fennel.expr.test_expr.Nested'>)",
+            refs={"a"},
+            eval_result=[Nested(A(1, 2, "a"), B(1, "b"), [1, 2, 3])],
+            expected_dtype=Nested,
+            proto_json=None,
+        ),
+        # Parse floats
+        ExprTestCase(
+            expr=(col("a").str.parse(float)),
+            df=pd.DataFrame({"a": ["1.1", "2.2", "3.3", "4.4"]}),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'float'>)",
+            refs={"a"},
+            eval_result=[1.1, 2.2, 3.3, 4.4],
+            expected_dtype=float,
+            proto_json=None,
+        ),
+        # Parse bool
+        ExprTestCase(
+            expr=(col("a").str.parse(bool)),
+            df=pd.DataFrame({"a": ["true", "false", "true", "false"]}),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'bool'>)",
+            refs={"a"},
+            eval_result=[True, False, True, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
+        # Parse strings
+        ExprTestCase(
+            expr=(col("a").str.parse(str)),
+            df=pd.DataFrame({"a": ['"a1"', '"b"', '"c"', '"d"']}),
+            schema={"a": str},
+            display="PARSE(col('a'), <class 'str'>)",
+            refs={"a"},
+            eval_result=["a1", "b", "c", "d"],
+            expected_dtype=str,
+            proto_json=None,
+        ),
+        # Parse optional strings to structs
+        ExprTestCase(
+            expr=(
+                (
+                    col("a")
+                    .fillnull('{"x": 12, "y": 21, "z": "rando"}')
+                    .str.parse(A)
+                )
+            ),
+            df=pd.DataFrame(
+                {
+                    "a": [
+                        '{"x": 1, "y": 2, "z": "a"}',
+                        '{"x": 2, "y": 3, "z": "b"}',
+                        None,
+                        None,
+                    ]
+                }
+            ),
+            schema={"a": Optional[str]},
+            display=None,
+            refs={"a"},
+            eval_result=[
+                A(1, 2, "a"),
+                A(2, 3, "b"),
+                A(12, 21, "rando"),
+                A(12, 21, "rando"),
+            ],
+            expected_dtype=A,
+            proto_json={},
+        ),
+        ExprTestCase(
+            expr=(
+                (
+                    col("a")
+                    .fillnull("""{"x": 12, "y": 21, "z": "rando"}""")
+                    .str.parse(A)
+                )
+            ),
+            df=pd.DataFrame(
+                {
+                    "a": [
+                        '{"x": 1, "y": 2, "z": "a"}',
+                        '{"x": 2, "y": 3, "z": "b"}',
+                        None,
+                        None,
+                    ]
+                }
+            ),
+            schema={"a": Optional[str]},
+            display=None,
+            refs={"a"},
+            eval_result=[
+                A(1, 2, "a"),
+                A(2, 3, "b"),
+                A(12, 21, "rando"),
+                A(12, 21, "rando"),
+            ],
+            expected_dtype=A,
+            proto_json={},
+        ),
+        ExprTestCase(
+            expr=((col("a").fillnull("""{"w": 12}""").str.parse(OptionalA))),
+            df=pd.DataFrame(
+                {
+                    "a": [
+                        '{"w": 1, "x": 2, "y": 3, "z": "a"}',
+                        '{"w": 2, "x": 3, "y": 4, "z": "b"}',
+                        None,
+                        None,
+                    ]
+                }
+            ),
+            schema={"a": Optional[str]},
+            display=None,
+            refs={"a"},
+            eval_result=[
+                OptionalA(1, 2, 3, "a"),
+                OptionalA(2, 3, 4, "b"),
+                OptionalA(12, pd.NA, pd.NA, pd.NA),
+                OptionalA(12, pd.NA, pd.NA, pd.NA),
+            ],
+            expected_dtype=OptionalA,
+            proto_json={},
+        ),
+    ]
 
-    # Parse strings
-    test_case = ExprTestCase(
-        expr=(col("a").str.parse(str)),
-        df=pd.DataFrame({"a": ['"a1"', '"b"', '"c"', '"d"']}),
-        schema={"a": str},
-        display="PARSE(col('a'), <class 'str'>)",
-        refs={"a"},
-        eval_result=["a1", "b", "c", "d"],
-        expected_dtype=str,
-        proto_json=None,
-    )
-    check_test_case(test_case)
+    for case in cases:
+        check_test_case(case)
 
 
 def test_list():
     test_cases = [
-        # ExprTestCase(
-        #     expr=(col("a").list.get(0)),
-        #     df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
-        #     schema={"a": List[int]},
-        #     display="col('a')[0]",
-        #     refs={"a"},
-        #     eval_result=[1, 4, 7],
-        #     expected_dtype=Optional[int],
-        #     proto_json=None,
-        # ),
-        # # Get index where index is an expression
-        # ExprTestCase(
-        #     expr=(col("a").list.get(col("b") + col("c"))),
-        #     df=pd.DataFrame(
-        #         {
-        #             "a": [[1, 2, 3, 4], [4, 5, 6, 12], [7, 8, 9, 19]],
-        #             "b": [
-        #                 0,
-        #                 1,
-        #                 2,
-        #             ],
-        #             "c": [1, 2, 0],
-        #         }
-        #     ),
-        #     schema={"a": List[int], "b": int, "c": int},
-        #     display="col('a')[(col('b') + col('c'))]",
-        #     refs={"a", "b", "c"},
-        #     eval_result=[2, 12, 9],
-        #     expected_dtype=Optional[int],
-        #     proto_json=None,
-        # ),
-        # # Out of bounds index
-        # ExprTestCase(
-        #     expr=(col("a").list.get(col("b"))),
-        #     df=pd.DataFrame(
-        #         {
-        #             "a": [[1, 2, 3, 4], [4, 5, 6, 12], [7, 8, 9, 19]],
-        #             "b": [0, 21, 5],
-        #         }
-        #     ),
-        #     schema={"a": List[int], "b": int},
-        #     display="col('a')[col('b')]",
-        #     refs={"a", "b"},
-        #     eval_result=[1, pd.NA, pd.NA],
-        #     expected_dtype=Optional[int],
-        #     proto_json=None,
-        # ),
-        # # List contains
-        # ExprTestCase(
-        #     expr=(col("a").list.contains(3)),
-        #     df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
-        #     schema={"a": List[int]},
-        #     display="CONTAINS(col('a'), 3)",
-        #     refs={"a"},
-        #     eval_result=[True, False, False],
-        #     expected_dtype=bool,
-        #     proto_json=None,
-        # ),
-        # # List contains with expression
-        # ExprTestCase(
-        #     expr=(col("a").list.contains(col("b") * col("c"))),
-        #     df=pd.DataFrame(
-        #         {
-        #             "a": [[1, 2, 3], [4, 15, 6], [7, 8, 9]],
-        #             "b": [1, 5, 10],
-        #             "c": [2, 3, 4],
-        #         }
-        #     ),
-        #     schema={"a": List[int], "b": int, "c": int},
-        #     display="CONTAINS(col('a'), (col('b') * col('c')))",
-        #     refs={"a", "b", "c"},
-        #     eval_result=[True, True, False],
-        #     expected_dtype=bool,
-        #     proto_json=None,
-        # ),
-        # # List contains for list of strings
-        # ExprTestCase(
-        #     expr=(col("a2").list.contains(col("b2"))),
-        #     df=pd.DataFrame(
-        #         {
-        #             "a2": [
-        #                 ["a", "b", "c"],
-        #                 ["d", "e", "f"],
-        #                 ["g", "h", "i"],
-        #                 ["a", "b", "c"],
-        #             ],
-        #             "b2": ["a", "e", "c", "d"],
-        #         }
-        #     ),
-        #     schema={"a2": List[str], "b2": str},
-        #     display="""CONTAINS(col('a2'), col('b2'))""",
-        #     refs={"a2", "b2"},
-        #     eval_result=[True, True, False, False],
-        #     expected_dtype=bool,
-        #     proto_json=None,
-        # ),
+        ExprTestCase(
+            expr=(col("a").list.get(0)),
+            df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
+            schema={"a": List[int]},
+            display="col('a')[0]",
+            refs={"a"},
+            eval_result=[1, 4, 7],
+            expected_dtype=Optional[int],
+            proto_json=None,
+        ),
+        # Get index where index is an expression
+        ExprTestCase(
+            expr=(col("a").list.get(col("b") + col("c"))),
+            df=pd.DataFrame(
+                {
+                    "a": [[1, 2, 3, 4], [4, 5, 6, 12], [7, 8, 9, 19]],
+                    "b": [
+                        0,
+                        1,
+                        2,
+                    ],
+                    "c": [1, 2, 0],
+                }
+            ),
+            schema={"a": List[int], "b": int, "c": int},
+            display="col('a')[(col('b') + col('c'))]",
+            refs={"a", "b", "c"},
+            eval_result=[2, 12, 9],
+            expected_dtype=Optional[int],
+            proto_json=None,
+        ),
+        # Out of bounds index
+        ExprTestCase(
+            expr=(col("a").list.get(col("b"))),
+            df=pd.DataFrame(
+                {
+                    "a": [[1, 2, 3, 4], [4, 5, 6, 12], [7, 8, 9, 19]],
+                    "b": [0, 21, 5],
+                }
+            ),
+            schema={"a": List[int], "b": int},
+            display="col('a')[col('b')]",
+            refs={"a", "b"},
+            eval_result=[1, pd.NA, pd.NA],
+            expected_dtype=Optional[int],
+            proto_json=None,
+        ),
+        # List contains
+        ExprTestCase(
+            expr=(col("a").list.contains(3)),
+            df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
+            schema={"a": List[int]},
+            display="CONTAINS(col('a'), 3)",
+            refs={"a"},
+            eval_result=[True, False, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
+        # List contains with expression
+        ExprTestCase(
+            expr=(col("a").list.contains(col("b") * col("c"))),
+            df=pd.DataFrame(
+                {
+                    "a": [[1, 2, 3], [4, 15, 6], [7, 8, 9]],
+                    "b": [1, 5, 10],
+                    "c": [2, 3, 4],
+                }
+            ),
+            schema={"a": List[int], "b": int, "c": int},
+            display="CONTAINS(col('a'), (col('b') * col('c')))",
+            refs={"a", "b", "c"},
+            eval_result=[True, True, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
+        # List contains for list of strings
+        ExprTestCase(
+            expr=(col("a2").list.contains(col("b2"))),
+            df=pd.DataFrame(
+                {
+                    "a2": [
+                        ["a", "b", "c"],
+                        ["d", "e", "f"],
+                        ["g", "h", "i"],
+                        ["a", "b", "c"],
+                    ],
+                    "b2": ["a", "e", "c", "d"],
+                }
+            ),
+            schema={"a2": List[str], "b2": str},
+            display="""CONTAINS(col('a2'), col('b2'))""",
+            refs={"a2", "b2"},
+            eval_result=[True, True, False, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
         # Support struct inside a list
         ExprTestCase(
             expr=(
@@ -733,40 +833,40 @@ def test_list():
             expected_dtype=bool,
             proto_json=None,
         ),
-        # ExprTestCase(
-        #     expr=(col("a").list.len()),
-        #     df=pd.DataFrame(
-        #         {"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]}
-        #     ),
-        #     schema={"a": List[A]},
-        #     display="LEN(col('a'))",
-        #     refs={"a"},
-        #     eval_result=[3],
-        #     expected_dtype=int,
-        #     proto_json=None,
-        # ),
-        # # List length
-        # ExprTestCase(
-        #     expr=(col("a").list.len()),
-        #     df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6, 12], [7, 8, 9, 19]]}),
-        #     schema={"a": List[int]},
-        #     display="LEN(col('a'))",
-        #     refs={"a"},
-        #     eval_result=[3, 4, 4],
-        #     expected_dtype=int,
-        #     proto_json=None,
-        # ),
-        # # Empty list length
-        # ExprTestCase(
-        #     expr=(col("a").list.len()),
-        #     df=pd.DataFrame({"a": [[], [4, 5, 6, 12], [7, 8, 9, 19]]}),
-        #     schema={"a": List[int]},
-        #     display="LEN(col('a'))",
-        #     refs={"a"},
-        #     eval_result=[0, 4, 4],
-        #     expected_dtype=int,
-        #     proto_json=None,
-        # ),
+        ExprTestCase(
+            expr=(col("a").list.len()),
+            df=pd.DataFrame(
+                {"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]}
+            ),
+            schema={"a": List[A]},
+            display="LEN(col('a'))",
+            refs={"a"},
+            eval_result=[3],
+            expected_dtype=int,
+            proto_json=None,
+        ),
+        # List length
+        ExprTestCase(
+            expr=(col("a").list.len()),
+            df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6, 12], [7, 8, 9, 19]]}),
+            schema={"a": List[int]},
+            display="LEN(col('a'))",
+            refs={"a"},
+            eval_result=[3, 4, 4],
+            expected_dtype=int,
+            proto_json=None,
+        ),
+        # Empty list length
+        ExprTestCase(
+            expr=(col("a").list.len()),
+            df=pd.DataFrame({"a": [[], [4, 5, 6, 12], [7, 8, 9, 19]]}),
+            schema={"a": List[int]},
+            display="LEN(col('a'))",
+            refs={"a"},
+            eval_result=[0, 4, 4],
+            expected_dtype=int,
+            proto_json=None,
+        ),
     ]
 
     for test_case in test_cases:
@@ -1170,35 +1270,57 @@ def test_fillnull():
             expected_dtype=datetime,
             proto_json=None,
         ),
-    ]
-    for case in cases:
-        check_test_case(case)
-
-
-def test_isnull():
-    cases = [
+        # TODO(Nikhil): Add support for filling nulls for complex types
+        # Fill null for struct
         # ExprTestCase(
-        #     expr=(col("a").isnull()),
-        #     df=pd.DataFrame({"a": [1, 2, None, 4]}),
-        #     schema={"a": Optional[int]},
-        #     display="IS_NULL(col('a'))",
+        #     expr=(col("a").fillnull(lit(A(1, 2, "a"), A))),
+        #     df=pd.DataFrame({"a": [A(1, 2, "a"), None, A(3, 4, "b")]}),
+        #     schema={"a": Optional[A]},
+        #     display="""FILL_NULL(col('a'), {"x": 1, "y": 2, "z": "a"})""",
         #     refs={"a"},
-        #     eval_result=[False, False, True, False],
-        #     expected_dtype=bool,
+        #     eval_result=[A(1, 2, "a"), A(1, 2, "a"), A(3, 4, "b")],
+        #     expected_dtype=A,
         #     proto_json=None,
         # ),
+        # Fill null for Optional[List[int]]
         # ExprTestCase(
-        #     expr=(col("a").isnull()),
-        #     df=pd.DataFrame({"a": ["a", "b", None, "d"]}),
-        #     schema={"a": Optional[str]},
-        #     display="IS_NULL(col('a'))",
+        #     expr=(col("a").fillnull(lit([1, 2, 3], List[int]))),
+        #     df=pd.DataFrame({"a": [[1, 2, 3], None, [4, 5, 6]]}),
+        #     schema={"a": Optional[List[int]]},
+        #     display="FILL_NULL(col('a'), [1, 2, 3])",
         #     refs={"a"},
-        #     eval_result=[False, False, True, False],
-        #     expected_dtype=bool,
+        #     eval_result=[[1, 2, 3], [1, 2, 3], [4, 5, 6]],
+        #     expected_dtype=List[int],
         #     proto_json=None,
         # ),
+    ]
+    for case in cases:
+        check_test_case(case)
+
+
+def test_isnull():
+    cases = [
+        ExprTestCase(
+            expr=(col("a").isnull()),
+            df=pd.DataFrame({"a": [1, 2, None, 4]}),
+            schema={"a": Optional[int]},
+            display="IS_NULL(col('a'))",
+            refs={"a"},
+            eval_result=[False, False, True, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
+        ExprTestCase(
+            expr=(col("a").isnull()),
+            df=pd.DataFrame({"a": ["a", "b", None, "d"]}),
+            schema={"a": Optional[str]},
+            display="IS_NULL(col('a'))",
+            refs={"a"},
+            eval_result=[False, False, True, False],
+            expected_dtype=bool,
+            proto_json=None,
+        ),
         # Each type is a struct
-        # TODO(Aditya): Fix this test case
         ExprTestCase(
             expr=(col("a").isnull()),
             df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), None]}),
diff --git a/fennel/internal_lib/schema/schema.py b/fennel/internal_lib/schema/schema.py
index 5dd98180f..643cd51ea 100644
--- a/fennel/internal_lib/schema/schema.py
+++ b/fennel/internal_lib/schema/schema.py
@@ -292,17 +292,25 @@ def convert_dtype_to_arrow_type_with_nullable(
         return pa.decimal128(28, dtype.decimal_type.scale)
     elif dtype.HasField("array_type"):
         return pa.list_(
-            value_type=convert_dtype_to_arrow_type_with_nullable(dtype.array_type.of, nullable)
+            value_type=convert_dtype_to_arrow_type_with_nullable(
+                dtype.array_type.of, nullable
+            )
         )
     elif dtype.HasField("map_type"):
-        key_pa_type = convert_dtype_to_arrow_type_with_nullable(dtype.map_type.key, nullable=False)
-        value_pa_type = convert_dtype_to_arrow_type_with_nullable(dtype.map_type.value, nullable)
+        key_pa_type = convert_dtype_to_arrow_type_with_nullable(
+            dtype.map_type.key, nullable=False
+        )
+        value_pa_type = convert_dtype_to_arrow_type_with_nullable(
+            dtype.map_type.value, nullable
+        )
         return pa.map_(key_pa_type, value_pa_type, False)
     elif dtype.HasField("embedding_type"):
         embedding_size = dtype.embedding_type.embedding_size
         return pa.list_(pa.float64(), embedding_size)
     elif dtype.HasField("one_of_type"):
-        return convert_dtype_to_arrow_type_with_nullable(dtype.one_of_type.of, nullable)
+        return convert_dtype_to_arrow_type_with_nullable(
+            dtype.one_of_type.of, nullable
+        )
     elif dtype.HasField("between_type"):
         return convert_dtype_to_arrow_type_with_nullable(
             dtype.between_type.dtype, nullable
@@ -517,9 +525,6 @@ def validate_field_in_df(
     name = field.name
     dtype = field.dtype
     arrow_type = convert_dtype_to_arrow_type(dtype)
-    for col in df.columns:
-        print("Type of column: ", col, df[col].dtype)
-    print("Dtype: ", dtype)
     if df.shape[0] == 0:
         return
     if name not in df.columns:
@@ -856,7 +861,6 @@ def is_hashable(dtype: Any) -> bool:
 def data_schema_check(
     schema: schema_proto.DSSchema, df: pd.DataFrame, dataset_name=""
 ) -> List[ValueError]:
-    print("Checkung schema", df.dtypes) 
     exceptions = []
     fields = []
     for key in schema.keys.fields:
@@ -960,11 +964,10 @@ def cast_col_to_arrow_dtype(
     # Let's convert structs into json, this is done because arrow
     # dtype conversion fails with fennel struct
     # Parse datetime values
-    series = series.apply(lambda x: parse_datetime_in_value(x, dtype))
     if check_dtype_has_struct_type(dtype):
         series = series.apply(lambda x: parse_struct_into_dict(x, dtype))
+    series = series.apply(lambda x: parse_datetime_in_value(x, dtype))
     arrow_type = convert_dtype_to_arrow_type_with_nullable(dtype)
-    print("cast_col_to_arrow_dtype Arrow type: ", arrow_type, "dtype: ", dtype)
     return series.astype(pd.ArrowDtype(arrow_type))
 
 
diff --git a/fennel/internal_lib/utils/utils.py b/fennel/internal_lib/utils/utils.py
index 306b26fad..2c8688d3b 100644
--- a/fennel/internal_lib/utils/utils.py
+++ b/fennel/internal_lib/utils/utils.py
@@ -129,7 +129,9 @@ def cast_col_to_pandas(
         return series.fillna(pd.NA)
 
 
-def parse_struct_into_dict(value: Any, dtype: schema_proto.DataType) -> Optional[Union[dict, list]]:
+def parse_struct_into_dict(
+    value: Any, dtype: schema_proto.DataType
+) -> Optional[Union[dict, list]]:
     """
     This function assumes that there's a struct somewhere in the value that needs to be converted into json.
     """
@@ -143,11 +145,19 @@ def parse_struct_into_dict(value: Any, dtype: schema_proto.DataType) -> Optional
     elif isinstance(value, list) or isinstance(value, np.ndarray):
         return [parse_struct_into_dict(x, dtype.array_type.of) for x in value]
     elif isinstance(value, dict) or isinstance(value, frozendict):
-        return {key: parse_struct_into_dict(val, dtype.map_type.value) for key, val in value.items()}
+        return {
+            key: parse_struct_into_dict(val, dtype.map_type.value)
+            for key, val in value.items()
+        }
     elif value is None or pd.isna(value):
         # If dtype is an optional struct type return, a dict with all fields as None
-        if dtype.HasField("optional_type") and dtype.optional_type.of.HasField("struct_type"):
-            return {field.name: None for field in dtype.optional_type.of.struct_type.fields}
+        if dtype.HasField("optional_type") and dtype.optional_type.of.HasField(
+            "struct_type"
+        ):
+            return {
+                field.name: None
+                for field in dtype.optional_type.of.struct_type.fields
+            }
         else:
             return None
     else:
diff --git a/fennel/testing/executor.py b/fennel/testing/executor.py
index de1d8b7d2..54ada5b29 100644
--- a/fennel/testing/executor.py
+++ b/fennel/testing/executor.py
@@ -814,13 +814,11 @@ def visitAssign(self, obj):
             df = copy.deepcopy(input_df)
             df.reset_index(drop=True, inplace=True)
             for col, typed_expr in obj.output_expressions.items():
-                if col in input_ret.df.columns:
-                    raise Exception(
-                        f"Column `{col}` already present in dataframe"
-                    )
                 input_dsschema = obj.node.dsschema().schema()
                 try:
-                    df[col] = typed_expr.expr.eval(input_df, input_dsschema)
+                    df[col] = typed_expr.expr.eval(
+                        input_df, input_dsschema, typed_expr.dtype, parse=False
+                    )
                 except Exception as e:
                     raise Exception(
                         f"Error in assign node for column `{col}` for pipeline "
diff --git a/poetry.lock b/poetry.lock
index 0cd14396e..872848ae4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -766,44 +766,44 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc
 
 [[package]]
 name = "fennel-data-lib"
-version = "0.1.15"
+version = "0.1.16"
 description = ""
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:42ad4c903b09bc72486a81a5e4250e648d6d766241509facf0ebe72572e11585"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a48792d49000de3f1d2b8d7f1fc24a9b0a342f410f01ebb092141526d691842e"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3da42b3a7230ebfa0ee087c39c4b04b617bf5f13808b3cc593f3b50220921c59"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ec33813adb02a37d6943ed0014927739f1b098c6d575f59ebf85399c364d2f1"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc35dcee6c0afef19558698bc425ff73954d99ccae0f6d24a7dd0835c21439ef"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:fae1e1a8a9cc925e2ba42a72ba27a070e9a63fd3dc68116d2894477d01b53394"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:21f469a4e6197ac6f64910a98fb6cdea88144dac2d582cc428e60be7f9c6fca8"},
-    {file = "fennel_data_lib-0.1.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:93e90b0d02e37ff879879cb6a76f11870c42267c65a73e21b96e8999e3eba81e"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f8215b50977267cfd4af6c0ad1ae1a7a5e4b73ac73ae089f196e3bfc573288db"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f94fd8c649210c4968aabf9a573f6616bd7cdc0a825672d077ca1e684eee32cc"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05e8deff1ac986f9c9c3da2ed626402b3bcc01f28b1ac25f004c53f00567878d"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eea0f44dfb2f401648a21fe94d8eb446f4e2cd0a1eb89384046906831b429f09"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74c79b4b67e8af8e9d39a7316bf95c228d82c9ac99eabcad2889482c8a6f889a"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:e50ba983c0fa58e606593a961dca099ff5bab92865c3f164b3296caa061c34e4"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:444b275022b2b7f19126787d86ca6265c654199e22a230aaf0b8dd0870822262"},
-    {file = "fennel_data_lib-0.1.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8f74d429b3e6d2c838e2e871809241a160adf39e923628e6a3b548a077a41872"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:0a46a9a094255b3c4984c963dc0c4aaeb63983b0a57966b5c6e85085de6a65dc"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7720e9880248d819145f38aefc8b72d691960b461e29de5e49cf9894ff4ba7a"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9d0e6f3164c21819d3002c10ba3b70a3aed1dfa70369fba872559fa77d69badf"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c034981c23904e4f13d9657b3667ce1df386133603cd3ed105719779759a4"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a37af5a8c627d7d003fadfd9cf0430d45aeddaa9d4e788d91e224f9f1e28f94"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:11d4859099357021ee2741b9cee84d481e69d3cb2e37ad5f7bb08e5517b805ea"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c2b129aebdb74b74019ee294cfcaba915f76344310430788792ee46c3d7cd3a1"},
-    {file = "fennel_data_lib-0.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e277852036f79d65734d065bcd9ca0abcdefd6d773f285c528dfa851888451f4"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:631ce0e801a631ec4f15abc6a2b886e47b25076b7a444ae5d0838aee89090fae"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d3abba61270217bece19bc331ad16f55fb058f1aabef4ab3726cd18dc6fce89"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f094498f46803298c932ed521537d02bbdcc464a014b9e83d01ee00e053b193"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa53fd1863c7f277c718bbd24891722223bee90d6b249216523cf7d1c9eefe42"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c058c90921fa7d1f4d178592ba6e054695ab9c619a33e07c935f2415faeb1104"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-manylinux_2_34_x86_64.whl", hash = "sha256:e1aae677fa041a1a4906ff846ed6585a6a9f9dc4c4909683354ad930c1e5d3a1"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d360649a168a9452f5915c307d9dcfd8d2a910c5ae769a5eb7f7be80caddac68"},
-    {file = "fennel_data_lib-0.1.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5048ae590669033cbe6fcd6a88fb905a9c53f4d46611f9aed1510bdfbbde4347"},
-    {file = "fennel_data_lib-0.1.15.tar.gz", hash = "sha256:158ff36a5e732ad48125881d03314dd25eece6ab3515ea672bf4fa49b2dea522"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:11b06ff8bf7787ccec56563da9450fcc412d90f22363581e99fe7e6f35e205eb"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b6c3ef5d05c6e5511ad0d2918c24d090ccd1ce3c4479a8b9bd1bde6873d22c3"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8523aa90d231ea87badf2dd9167d5c932eb38ab5ed9ec793547aef320f12881"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd568b6231fbfbf363cc919dcf06c5baad57f039d4d9d765532106b1c04d4e63"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15b3bd238169b4806d852fcbe700aa53886e3f5e49af7b9eb424c04e4e325528"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:1d3a0616387486406ec7642ec5b1799e04757e7c33b0c274f93a8e7cde7f609e"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0fbeb721d03587acfaf1036553cc3081ec53b32516da1525f964ab19f19a24ee"},
+    {file = "fennel_data_lib-0.1.16-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:518b80c4091491561d8c2c808197694d8d68e453183eba14a9637104dc00700b"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:94c803f77dcdcad9a510614271425710a9c2bdfb402fcb4669002143eab62910"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e57db109a623a2858a6caf17ceb23dc8309a961d4e7d61c451f97615ffd22cd4"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a42224d01267792daf2c1c920396ca9224361e548ae56c2d7ed117194c3e6adf"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d943c614452b7ba3bc8d7deff8a0b3f45b8c43e1a287fe193a67d66969f3c1a6"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a079adfe00a907d5b44d33af62bd556bc5aa45d98bb35ed03be7f25b0627b5b5"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:d3b6fce44931b0bd5ad1ccb012058de956f637035be2ccf83e2f6169c012ee7c"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f64edaefcbe28528815571e1af629456a6a106fd198c71f01bc56e3538278117"},
+    {file = "fennel_data_lib-0.1.16-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6460055fad1e954b2573122a34cae9218328738c5b233b551fe12074f24b958e"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b16bbd97f0ae6aeb78d2a5aaf2071b3fe67d134d945264e8b19afc0fb93ff3fd"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc7adb37bde0671140288e2c25da95488bf1c2d33c66abaa59378b456cd0af63"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1f6ad71152ed0e1b97e37ba3cf83bb657f562625bf21ef3e79a72e2391604a0b"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de4e0034989a3387cd2595725d2c6b0528665c0503294e2d3c18e418d029c2ea"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b58d2e4193afeb83281216be32e77db472273160eeadd9e2f5ae1eb98fa8bc50"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:9530eb18989cc111c5d490de47c309cb86a8a0fc66073b14f15ad9e5f5094d27"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:eea2890d58695f67f3e13aab938d5a59eb0ae635b19a1e686b05e96c895b464f"},
+    {file = "fennel_data_lib-0.1.16-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a6dcbf7226565e328d6ddb5a91f71e30713087f4a0577ca6dc65b9ca11f2ba8"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3b47d6c216de3714507b014ddaaed65832f5279ef9d07d33bf24c36d23b661e1"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94537e21649552f1664c922b8646c227e1e7ca4b22f8d55f728e70dda1fead43"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f36e19036b5541213cfd2957f916a1613c640f0147a5203c3f841d8462d93f3"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f8c705e9aa825326d3f46164659141db4980e19f6bbb09e4c1b57ea46e51c6b"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7718f5f3362c2c40411d0cc96841e28e4902570b23db3fcd4c5803720939332"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-manylinux_2_34_x86_64.whl", hash = "sha256:1f6c0de8cf4fdda0a8353ede190b25aac067316ff027c3ce8b75f0326898007e"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:460fbe7b6e394968f214a933c0b3aff8a5754508cd607753500b53222d5d932f"},
+    {file = "fennel_data_lib-0.1.16-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22a94a261ccfe24678ee98786ef2b35be15041ccb31667df1697fa70d6132e50"},
+    {file = "fennel_data_lib-0.1.16.tar.gz", hash = "sha256:fb0001dfcb995356190d19cf4b33855a133ea072fe5452dd72728ad223320e3c"},
 ]
 
 [[package]]
@@ -1861,13 +1861,13 @@ files = [
 
 [[package]]
 name = "more-itertools"
-version = "10.4.0"
+version = "10.5.0"
 description = "More routines for operating on iterables, beyond itertools"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "more-itertools-10.4.0.tar.gz", hash = "sha256:fe0e63c4ab068eac62410ab05cccca2dc71ec44ba8ef29916a0090df061cf923"},
-    {file = "more_itertools-10.4.0-py3-none-any.whl", hash = "sha256:0f7d9f83a0a8dcfa8a2694a770590d98a67ea943e3d9f5298309a484758c4e27"},
+    {file = "more-itertools-10.5.0.tar.gz", hash = "sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6"},
+    {file = "more_itertools-10.5.0-py3-none-any.whl", hash = "sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef"},
 ]
 
 [[package]]
@@ -3328,13 +3328,13 @@ win32 = ["pywin32"]
 
 [[package]]
 name = "setuptools"
-version = "74.1.1"
+version = "74.1.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-74.1.1-py3-none-any.whl", hash = "sha256:fc91b5f89e392ef5b77fe143b17e32f65d3024744fba66dc3afe07201684d766"},
-    {file = "setuptools-74.1.1.tar.gz", hash = "sha256:2353af060c06388be1cecbf5953dcdb1f38362f87a2356c480b6b4d5fcfc8847"},
+    {file = "setuptools-74.1.2-py3-none-any.whl", hash = "sha256:5f4c08aa4d3ebcb57a50c33b1b07e94315d7fc7230f7115e47fc99776c8ce308"},
+    {file = "setuptools-74.1.2.tar.gz", hash = "sha256:95b40ed940a1c67eb70fc099094bd6e99c6ee7c23aa2306f4d2697ba7916f9c6"},
 ]
 
 [package.extras]
@@ -3540,13 +3540,13 @@ files = [
 
 [[package]]
 name = "types-python-dateutil"
-version = "2.9.0.20240821"
+version = "2.9.0.20240906"
 description = "Typing stubs for python-dateutil"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-python-dateutil-2.9.0.20240821.tar.gz", hash = "sha256:9649d1dcb6fef1046fb18bebe9ea2aa0028b160918518c34589a46045f6ebd98"},
-    {file = "types_python_dateutil-2.9.0.20240821-py3-none-any.whl", hash = "sha256:f5889fcb4e63ed4aaa379b44f93c32593d50b9a94c9a60a0c854d8cc3511cd57"},
+    {file = "types-python-dateutil-2.9.0.20240906.tar.gz", hash = "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e"},
+    {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"},
 ]
 
 [[package]]
@@ -3714,4 +3714,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "38238318d918ada7382d0d335dc4ad3e8c16c55d7ce0650c685acfafe5c72489"
+content-hash = "d3fbd04d20d5dfb6f1bd354348f847db347d3b9273dfae5e5b4364c5e83424b0"
diff --git a/pyproject.toml b/pyproject.toml
index 7cf8bced6..edd57735a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fennel-ai"
-version = "1.5.17"
+version = "1.5.18"
 description = "The modern realtime feature engineering platform"
 authors = ["Fennel AI <developers@fennel.ai>"]
 packages = [{ include = "fennel" }]
@@ -20,7 +20,7 @@ pytest = "7.1.3"
 pytest-rerunfailures = "^13.0"
 sortedcontainers = "^2.4.0"
 typing-extensions = "^4.12.0"
-fennel-data-lib = "0.1.15"
+fennel-data-lib = "0.1.16"
 pyarrow = "^14.0.2"
 
 [tool.poetry.dev-dependencies]
@@ -43,20 +43,20 @@ pyyaml = "^6.0.1"
 
 # For production, we use poetry to build the python package
 
-# [build-system]
-# requires = ["poetry-core>=1.0.0"]
-# build-backend = "poetry.core.masonry.api"
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
 
 # For local development, we use maturin to build the rust library
 
-[build-system]
-requires = ["maturin", "setuptools>=42", "wheel"]
-build-backend = "maturin"
+# [build-system]
+# requires = ["maturin", "setuptools>=42", "wheel"]
+# build-backend = "maturin"
 
-[tool.maturin]
-name = "fennel_data_lib"
-sdist-directory = "python_package"
-manifest-path = "../server/fennel_data_lib/Cargo.toml"
+# [tool.maturin]
+# name = "fennel_data_lib"
+# sdist-directory = "python_package"
+# manifest-path = "../server/fennel_data_lib/Cargo.toml"
 
 
 # inspired from - https://github.com/pypa/pip/blob/main/pyproject.toml