Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
Utilize Polars.DataFrame for performance in ModelbitComponent (#80)
Browse files Browse the repository at this point in the history
* Add polars dependency

* Use data frame to gather features

* Add ddtrace to feature retrieval

* Bump version

* Set retrieved features to request context

* Handle case when identifier type isn't in feature map

* Remove FeatureMap and FeatureData

* Bump version

* Use class str method for key generation, fix crucial bug in build_requests

* Check if there are RT features before merging DFs

* Initialize RT feature df with empty df

* Cast all f32 columns to f64 before concat

* Remove feature map

* Fix some bugs during testing

* Update tests, fix concat vs join bugs

* Update feature_store_main.py

* Fix logging to use original identifiers instead of primary identifiers

* Don't replace : with __ for requests

* Add a null check

* Keep replacing only 1 feature name separator

* Change component.get_feature return type
  • Loading branch information
ykeremy authored Oct 16, 2023
1 parent 99571b8 commit 3c4a9e6
Show file tree
Hide file tree
Showing 22 changed files with 537 additions and 395 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ repos:
- types-tqdm
- nest-asyncio
- aiohttp
- polars
exclude: "^tests/"

# Check for spelling
Expand Down
24 changes: 18 additions & 6 deletions examples/feature_store_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import typer
from pydantic import BaseModel

from wyvern import Identifier
from wyvern.components.api_route_component import APIRouteComponent
from wyvern.components.features.feature_store import (
FeatureStoreRetrievalRequest,
Expand Down Expand Up @@ -33,13 +34,24 @@ async def execute(
self, input: FeatureStoreRetrievalRequest, **kwargs
) -> FeatureStoreResponse:
logger.info(f"Executing input {input}")
feature_map = await feature_store_retrieval_component.execute(input)

feature_df = await feature_store_retrieval_component.execute(input)
feature_dicts = feature_df.df.to_dicts()
feature_data: Dict[str, FeatureData] = {
str(feature_dict["IDENTIFIER"]): FeatureData(
identifier=Identifier(
identifier_type=feature_dict["IDENTIFIER"].split("::")[0],
identifier=feature_dict["IDENTIFIER"].split("::")[1],
),
features={
feature_name: feature_value
for feature_name, feature_value in feature_dict.items()
if feature_name != "IDENTIFIER"
},
)
for feature_dict in feature_dicts
}
return FeatureStoreResponse(
feature_data={
identifier.identifier: feature_map.feature_map[identifier]
for identifier in feature_map.feature_map.keys()
},
feature_data=feature_data,
)


Expand Down
22 changes: 17 additions & 5 deletions examples/real_time_features_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,27 @@ async def execute(
)

time_start = time()
feature_map = await self.feature_retrieval_pipeline.execute(request)
feature_df = await self.feature_retrieval_pipeline.execute(request)
logger.info(f"operation feature_retrieval took:{time()-time_start:2.4f} sec")
profiler.stop()
profiler.print()
feature_dicts = feature_df.df.to_dicts()
feature_data: Dict[str, FeatureData] = {
str(feature_dict["IDENTIFIER"]): FeatureData(
identifier=Identifier(
identifier_type=feature_dict["IDENTIFIER"].split("::")[0],
identifier=feature_dict["IDENTIFIER"].split("::")[1],
),
features={
feature_name: feature_value
for feature_name, feature_value in feature_dict.items()
if feature_name != "IDENTIFIER"
},
)
for feature_dict in feature_dicts
}
return FeatureStoreResponse(
feature_data={
str(identifier): feature_map.feature_map[identifier]
for identifier in feature_map.feature_map.keys()
},
feature_data=feature_data,
)


Expand Down
62 changes: 37 additions & 25 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "wyvern-ai"
version = "0.0.26"
version = "0.0.27"
description = ""
authors = ["Wyvern AI <info@wyvern.ai>"]
readme = "README.md"
Expand Down Expand Up @@ -33,6 +33,7 @@ aiohttp = {extras = ["speedups"], version = "^3.8.5"}
requests = "^2.31.0"
platformdirs = "^3.8"
posthog = "^3.0.2"
polars = "^0.19.6"


[tool.poetry.group.dev.dependencies]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
from typing import Dict, List

import pytest
Expand All @@ -13,7 +14,7 @@
PinningBusinessLogicComponent,
)
from wyvern.entities.candidate_entities import ScoredCandidate
from wyvern.entities.feature_entities import FeatureMap
from wyvern.entities.feature_entities import FeatureDataFrame
from wyvern.entities.identifier_entities import ProductEntity
from wyvern.entities.request import BaseWyvernRequest
from wyvern.wyvern_request import WyvernRequest
Expand Down Expand Up @@ -65,7 +66,8 @@ def __init__(self):
headers={},
entity_store={},
events=[],
feature_map=FeatureMap(feature_map={}),
feature_df=FeatureDataFrame(),
feature_orig_identifiers=defaultdict(dict),
model_output_map={},
),
)
Expand Down
Loading

0 comments on commit 3c4a9e6

Please sign in to comment.