Skip to content

Commit

Permalink
Move now_col_name to context
Browse files Browse the repository at this point in the history
  • Loading branch information
nonibansal committed Sep 27, 2024
1 parent e150326 commit 1070ca5
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 145 deletions.
54 changes: 52 additions & 2 deletions fennel/client_tests/test_expr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from typing import Optional

import pandas as pd
Expand Down Expand Up @@ -47,6 +47,7 @@ class UserInfoFeatures:
assert response.status_code == requests.codes.OK, response.json()

now = datetime.now(timezone.utc)
now_1y = now - timedelta(days=365)
df = pd.DataFrame(
{
"user_id": [1, 2, 3, 4, 5],
Expand All @@ -59,7 +60,7 @@ class UserInfoFeatures:
datetime(2001, 1, 21, tzinfo=timezone.utc),
],
"country": ["India", "USA", "Africa", "UK", "Chile"],
"ts": [now, now, now, now, now],
"ts": [now_1y, now_1y, now_1y, now_1y, now_1y],
}
)
response = client.log("fennel_webhook", "UserInfoDataset", df)
Expand Down Expand Up @@ -97,3 +98,52 @@ class UserInfoFeatures:
"Chile",
pd.NA,
]

if not client.is_integration_client():
df = client.query_offline(
inputs=[UserInfoFeatures.user_id],
outputs=[
UserInfoFeatures.name,
UserInfoFeatures.age,
UserInfoFeatures.country,
],
input_dataframe=pd.DataFrame(
{
"UserInfoFeatures.user_id": [1, 2, 3, 4, 5, 6],
"timestamp": [
now_1y,
now_1y,
now_1y,
now_1y,
now_1y,
now_1y,
],
}
),
timestamp_column="timestamp",
)
assert df.shape == (6, 4)
assert df["UserInfoFeatures.name"].tolist() == [
"Ross",
"Monica",
"Chandler",
"Joey",
"Rachel",
pd.NA,
]
assert df["UserInfoFeatures.age"].tolist() == [
53,
43,
33,
25,
22,
pd.NA,
]
assert df["UserInfoFeatures.country"].tolist() == [
"India",
"USA",
"Africa",
"UK",
"Chile",
pd.NA,
]
13 changes: 11 additions & 2 deletions fennel/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Callable, Dict, Type, Optional
import pandas as pd

from fennel._vendor.pydantic import BaseModel # type: ignore
from fennel.dtypes.dtypes import FENNEL_STRUCT, FENNEL_STRUCT_SRC_CODE
from fennel.internal_lib.schema.schema import (
convert_dtype_to_arrow_type_with_nullable,
Expand All @@ -17,6 +18,7 @@
from fennel_data_lib import assign, type_of, matches

import fennel.gen.schema_pb2 as schema_proto
import fennel.gen.expr_pb2 as expr_proto
from fennel.internal_lib.schema import (
get_datatype,
cast_col_to_arrow_dtype,
Expand All @@ -26,7 +28,9 @@
is_user_defined_class,
)

NOW_COL_NAME = "__fennel_ts_col__"

class ExprContext(BaseModel):
now_col_name: Optional[str] = None


class InvalidExprException(Exception):
Expand Down Expand Up @@ -362,6 +366,7 @@ def eval(
schema: Dict,
output_dtype: Optional[Type] = None,
parse=True,
context: Optional[ExprContext] = None
) -> pd.Series:
from fennel.expr.serializer import ExprSerializer

Expand Down Expand Up @@ -412,8 +417,12 @@ def pa_to_pd(pa_data, ret_type, parse=True):
ret_type = output_dtype

serialized_ret_type = get_datatype(ret_type).SerializeToString()
if context is None:
serialized_context = expr_proto.ExprContext().SerializeToString()
else:
serialized_context = expr_proto.ExprContext(**context.dict()).SerializeToString()
arrow_col = assign(
proto_bytes, df_pa, proto_schema, serialized_ret_type
proto_bytes, df_pa, proto_schema, serialized_ret_type, serialized_context
)
return pa_to_pd(arrow_col, ret_type, parse)

Expand Down
17 changes: 5 additions & 12 deletions fennel/expr/test_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
TimeUnit,
from_epoch,
make_struct,
NOW_COL_NAME,
)
from fennel.expr.visitor import ExprPrinter, FetchReferences
from fennel.expr.serializer import ExprSerializer
Expand Down Expand Up @@ -1359,23 +1358,17 @@ def test_now():
df=pd.DataFrame(
{
"birthdate": [
datetime(2023, 1, 1, tzinfo=timezone.utc),
datetime(2023, 1, 2, tzinfo=timezone.utc),
datetime.now(timezone.utc),
datetime.now(timezone.utc),
None,
datetime(2023, 1, 3, tzinfo=timezone.utc),
],
NOW_COL_NAME: [
datetime(2023, 1, 31, tzinfo=timezone.utc),
datetime(2023, 1, 31, tzinfo=timezone.utc),
datetime(2023, 1, 31, tzinfo=timezone.utc),
datetime(2023, 1, 31, tzinfo=timezone.utc),
datetime.now(timezone.utc),
],
}
),
schema={"birthdate": Optional[datetime], NOW_COL_NAME: datetime},
schema={"birthdate": Optional[datetime]},
display='SINCE(NOW(), col("birthdate"), unit=TimeUnit.DAY)',
refs={"birthdate"},
eval_result=[30, 29, pd.NA, 28],
eval_result=[0, 0, pd.NA, 0],
expected_dtype=Optional[int],
proto_json=None,
),
Expand Down
236 changes: 119 additions & 117 deletions fennel/gen/expr_pb2.py

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions fennel/gen/expr_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,23 @@ MICROSECOND: TimeUnit.ValueType # 8
MILLISECOND: TimeUnit.ValueType # 9
global___TimeUnit = TimeUnit

@typing_extensions.final
class ExprContext(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor

NOW_COL_NAME_FIELD_NUMBER: builtins.int
now_col_name: builtins.str
def __init__(
self,
*,
now_col_name: builtins.str | None = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["_now_col_name", b"_now_col_name", "now_col_name", b"now_col_name"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["_now_col_name", b"_now_col_name", "now_col_name", b"now_col_name"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions.Literal["_now_col_name", b"_now_col_name"]) -> typing_extensions.Literal["now_col_name"] | None: ...

global___ExprContext = ExprContext

@typing_extensions.final
class Expr(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
Expand Down
6 changes: 4 additions & 2 deletions fennel/testing/query_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import fennel.datasets.datasets
import fennel.gen.schema_pb2 as schema_proto
from fennel.expr.expr import NOW_COL_NAME
from fennel.expr.expr import ExprContext
from fennel.featuresets import Extractor, Feature, Featureset, is_valid_feature
from fennel.gen.featureset_pb2 import (
ExtractorType as ProtoExtractorType,
Expand All @@ -23,6 +23,8 @@
cast_df_to_arrow_dtype,
)

NOW_COL_NAME = "__ts_col_name__"


class QueryEngine:
"""
Expand Down Expand Up @@ -376,7 +378,7 @@ def _compute_expr_extractor(
input_schema[NOW_COL_NAME] = datetime
df = pd.DataFrame(input_features)
df[NOW_COL_NAME] = timestamps
res = expr.eval(df, input_schema) # type: ignore
res = expr.eval(df, input_schema, context=ExprContext(now_col_name=NOW_COL_NAME)) # type: ignore
output_dtype = expr.typeof(input_schema) # type: ignore
# Cast to correct arrow dtype after evaluating the expression
res = cast_col_to_arrow_dtype(res, get_datatype(output_dtype))
Expand Down
20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@ pyyaml = "^6.0.1"

# For production, we use poetry to build the python package

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
#[build-system]
#requires = ["poetry-core>=1.0.0"]
#build-backend = "poetry.core.masonry.api"

# For local development, we use maturin to build the rust library

# [build-system]
# requires = ["maturin", "setuptools>=42", "wheel"]
# build-backend = "maturin"
[build-system]
requires = ["maturin", "setuptools>=42", "wheel"]
build-backend = "maturin"

# [tool.maturin]
# name = "fennel_data_lib"
# sdist-directory = "python_package"
# manifest-path = "../server/fennel_data_lib/Cargo.toml"
[tool.maturin]
name = "fennel_data_lib"
sdist-directory = "python_package"
manifest-path = "../server/fennel_data_lib/Cargo.toml"


# inspired from - https://github.com/pypa/pip/blob/main/pyproject.toml
Expand Down

0 comments on commit 1070ca5

Please sign in to comment.