Skip to content

Commit

Permalink
Merge pull request #313 from google/drop-operator
Browse files Browse the repository at this point in the history
Added drop operator
  • Loading branch information
javiber authored Nov 21, 2023
2 parents bd330ff + 0264b3b commit 3a41e32
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_notebooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- tutorials/anomaly_detection_unsupervised
- tutorials/bank_fraud_detection_with_tfdf
- tutorials/heart_rate_analysis
- tutorials/loan_outcomes_prediction
# - tutorials/loan_outcomes_prediction # TODO: re enable this notebook
- tutorials/m5_competition
steps:
- name: Checkout
Expand Down
3 changes: 2 additions & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"recommendations": [
"esbenp.prettier-vscode"
"esbenp.prettier-vscode",
"ms-python.pylint"
]
}
1 change: 1 addition & 0 deletions docs/src/reference/temporian/operators/drop.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: temporian.EventSet.drop
49 changes: 49 additions & 0 deletions temporian/core/event_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4333,3 +4333,52 @@ def where(
from temporian.core.operators.where import where

return where(self, on_true, on_false)

def drop(
self: EventSetOrNode,
feature_names: Union[str, List[str]],
) -> EventSetOrNode:
"""Removes a subset of features from an [`EventSet`][temporian.EventSet].
Usage example:
```python
>>> a = tp.event_set(
... timestamps=[1, 2],
... features={"A": [1, 2], "B": ['s', 'm'], "C": [5.0, 5.5]},
... )
>>> # Drop single feature
>>> bc = a.drop('A')
>>> bc
indexes: []
features: [('B', str_), ('C', float64)]
events:
(2 events):
timestamps: [1. 2.]
'B': [b's' b'm']
'C': [5. 5.5]
...
>>> # Drop multiple features
>>> c = a.drop(['A', 'B'])
>>> c
indexes: []
features: [('C', float64)]
events:
(2 events):
timestamps: [1. 2.]
'C': [5. 5.5]
...
```
Args:
feature_names: Name or list of names of the features to drop from the
input.
Returns:
EventSet containing all features execpt the ones dropped.
"""
from temporian.core.operators.select import drop

return drop(self, feature_names=feature_names)
2 changes: 1 addition & 1 deletion temporian/core/operators/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -448,4 +448,4 @@ py_library(
"//temporian/core/data:schema",
"//temporian/proto:core_py_proto",
],
)
)
24 changes: 24 additions & 0 deletions temporian/core/operators/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,27 @@ def select(
)

return SelectOperator(input, feature_names).outputs["output"]


@compile
def drop(
input: EventSetOrNode, feature_names: Union[str, List[str]]
) -> EventSetOrNode:
assert isinstance(input, EventSetNode)

if isinstance(feature_names, str):
feature_names = [feature_names]

input_features = input.schema.feature_names()

if not all([fn in input_features for fn in feature_names]):
raise TypeError(
"Features"
f" {[fn for fn in feature_names if fn not in input_features]} are"
" not present in the input"
)

return select(
input=input,
feature_names=[fn for fn in input_features if fn not in feature_names],
)
10 changes: 10 additions & 0 deletions temporian/core/operators/test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,13 @@ py_test(
"//temporian/test:utils",
],
)

py_test(
name = "test_drop",
srcs = ["test_drop.py"],
srcs_version = "PY3",
deps = [
"//temporian/implementation/numpy/data:io",
"//temporian/test:utils",
],
)
72 changes: 72 additions & 0 deletions temporian/core/operators/test/test_drop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from absl.testing import absltest, parameterized

from temporian.implementation.numpy.data.io import event_set
from temporian.test.utils import assertOperatorResult


class DropTest(parameterized.TestCase):
def test_drop_str(self):
evset = event_set(
timestamps=[1, 2, 3],
features={
"a": [1.0, 2.0, 3.0],
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
)
result = evset.drop("a")

expected = event_set(
timestamps=[1, 2, 3],
features={
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
same_sampling_as=evset,
)

assertOperatorResult(self, result, expected)

def test_drop_list(self):
evset = event_set(
timestamps=[1, 2, 3],
features={
"a": [1.0, 2.0, 3.0],
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
)
result = evset.drop(["a", "b"])

expected = event_set(
timestamps=[1, 2, 3],
features={
"c": ["A", "A", "B"],
},
indexes=["c"],
same_sampling_as=evset,
)

assertOperatorResult(self, result, expected)


if __name__ == "__main__":
absltest.main()

0 comments on commit 3a41e32

Please sign in to comment.