Skip to content

Commit

Permalink
added backfill param to commit operation (#603)
Browse files Browse the repository at this point in the history
* added backfill param to commit operation

* spelling check

* addressed comments

* updated fennel version in pyproject.toml

---------

Co-authored-by: Hemanth Kannekanti <hemanth@fennel.ai>
  • Loading branch information
hemanthk269 and Hemanth Kannekanti authored Nov 12, 2024
1 parent 1c56f84 commit 4eb02be
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 4 deletions.
40 changes: 40 additions & 0 deletions docs/examples/api-reference/client/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,43 @@ def some_fn(cls, ts, amount: pd.Series):
)
# docsnip-highlight end
# /docsnip


@mock
def test_backfill(client):
# docsnip backfill
from fennel.datasets import dataset, field
from fennel.connectors import source, Webhook

webhook = Webhook(name="some_webhook")

@source(webhook.endpoint("endpoint"), disorder="14d", cdc="upsert")
@dataset(index=True)
class Transaction:
txid: int = field(key=True)
amount: int
timestamp: datetime

client.checkout("test_backfill", init=True)
client.commit(
message="transaction: add transaction dataset",
datasets=[Transaction],
backfill=True, # default is True, so didn't need to include this
)

@source(webhook.endpoint("endpoint"), disorder="14d", cdc="upsert")
@dataset(index=True)
class Transaction:
txid: int = field(key=True)
amount: int
timestamp: datetime

# docsnip-highlight start
client.checkout("main", init=True)
client.commit(
message="adding transaction dataset to main",
datasets=[Transaction],
backfill=False, # set backfill as False to prevent accidental backfill for Transaction dataset
)
# docsnip-highlight end
# /docsnip
10 changes: 10 additions & 0 deletions docs/pages/api-reference/client/commit.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,20 @@ those with matching values. Rules of selection:
or same as `env` or is `~x` for some other x
</Expandable>

<Expandable title="backfill" type="bool" defaultVal="True">
If you set the backfill parameter to False, the system will return an error if committing changes would result in a backfill of any dataset/pipeline.
A backfill occurs when there is no existing dataset that is isomorphic to the new dataset.
Setting backfill to False helps prevent accidental backfill by ensuring that only datasets matching the existing structure are committed.
</Expandable>

<pre snippet="api-reference/client/commit#basic" status="success"
message="Silver source and no extractor are committed">
</pre>

<pre snippet="api-reference/client/commit#incremental" status="success"
message="Second commit adds a featureset & leaves dataset unchanged">
</pre>

<pre snippet="api-reference/client/commit#backfill" status="success"
message="Backfill param will prevent backfill of Transaction dataset when committing to main branch">
</pre>
3 changes: 3 additions & 0 deletions fennel/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## [1.5.54] - 2024-11-12
- Add backfill parameter to commit operation

## [1.5.53] - 2024-11-12
- Add support for workflow parameter in offline query

Expand Down
4 changes: 3 additions & 1 deletion fennel/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def commit(
preview=False,
env: Optional[str] = None,
incremental: bool = False,
backfill: bool = True,
):
"""Commit the changes to the branch pointed to by the client.
Expand All @@ -112,6 +113,7 @@ def commit(
env (Optional[str]): The environment to register the datasets and featuresets in.
incremental (bool): If the commit is only used for adding datasets and featuresets and not changing
anything existing.
backfill (bool): When False will return an error if commit will result in a backfill.
Returns:
----------
Expand Down Expand Up @@ -140,7 +142,7 @@ def commit(
self.add(featureset)
sync_request = self._get_sync_request_proto(message, env)
response = self._post_bytes(
f"{V1_API}/commit?preview={str(preview).lower()}&incremental={str(incremental).lower()}",
f"{V1_API}/commit?preview={str(preview).lower()}&incremental={str(incremental).lower()}&backfill={str(backfill).lower()}",
sync_request.SerializeToString(),
False,
300,
Expand Down
1 change: 1 addition & 0 deletions fennel/testing/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def commit(
preview=False,
incremental=False,
env: Optional[str] = None,
backfill=True,
):
if not incremental:
self._reset()
Expand Down
3 changes: 2 additions & 1 deletion fennel/testing/integration_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ def commit(
preview=False,
env: Optional[str] = None,
incremental: bool = False,
backfill: bool = True,
):
resp = super().commit(
message, datasets, featuresets, preview, env, incremental
message, datasets, featuresets, preview, env, incremental, backfill
)
# It takes a while to setup the server
time.sleep(10)
Expand Down
3 changes: 2 additions & 1 deletion fennel/testing/mock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def commit(
preview=False,
env: Optional[str] = None,
incremental: bool = False,
backfill: bool = True,
):
def is_superset_featureset(featureset_new, featureset_old):
features_new = set(
Expand Down Expand Up @@ -247,7 +248,7 @@ def is_new_dataset_eligible(dataset_new, dataset_old):
# Run all validation for converting them to protos
_ = to_sync_request_proto(self.to_register_objects, message, env)
return self._get_branch().commit(
datasets, featuresets, preview, incremental, env
datasets, featuresets, preview, incremental, env, backfill
)

def query(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "1.5.53"
version = "1.5.54"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <developers@fennel.ai>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit 4eb02be

Please sign in to comment.