From 8c85888c530cfa858ef994fa3e1d07ed5e78fbdf Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 14 Sep 2023 14:30:09 +0200 Subject: [PATCH 1/3] Fix dependencies --- temporian/beam/BUILD | 7 +++++-- temporian/beam/io/BUILD | 8 +++----- temporian/beam/operators/BUILD | 10 +++++----- temporian/beam/operators/window/BUILD | 2 +- temporian/beam/test/BUILD | 1 - temporian/core/BUILD | 1 + temporian/core/operators/BUILD | 5 ++++- temporian/core/operators/window/BUILD | 6 ------ temporian/implementation/numpy/data/BUILD | 2 ++ temporian/implementation/numpy/operators/BUILD | 6 +++--- temporian/implementation/numpy/operators/window/BUILD | 3 +++ temporian/io/BUILD | 2 ++ temporian/utils/BUILD | 3 +++ tools/create_operator.py | 7 +++++-- 14 files changed, 37 insertions(+), 26 deletions(-) diff --git a/temporian/beam/BUILD b/temporian/beam/BUILD index fda8c79e7..a43214ce4 100644 --- a/temporian/beam/BUILD +++ b/temporian/beam/BUILD @@ -25,7 +25,6 @@ py_library( deps = [ # already_there/apache_beam "//temporian/core:evaluation", - "//temporian/beam/io:dict", ":implementation_lib", "//temporian/beam/operators", "//temporian/core/data:node", @@ -43,5 +42,9 @@ py_library( name = "typing", srcs = ["typing.py"], srcs_version = "PY3", - deps = ["//temporian/core:typing"], + deps = [ + # already_there/apache_beam + # already_there/numpy + "//temporian/core:typing", + ], ) diff --git a/temporian/beam/io/BUILD b/temporian/beam/io/BUILD index 44113b2e1..f1caffa16 100644 --- a/temporian/beam/io/BUILD +++ b/temporian/beam/io/BUILD @@ -13,7 +13,6 @@ py_library( deps = [ # already_there/apache_beam # already_there/numpy - "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy/data:dtype_normalization", "//temporian/core/data:dtype", "//temporian/core/data:node", @@ -28,9 +27,7 @@ py_library( srcs_version = "PY3", deps = [ # already_there/apache_beam - # already_there/numpy - "//temporian/implementation/numpy/data:event_set", - "//temporian/core/data:dtype", + "//temporian/beam/io:dict", "//temporian/core/data:node", "//temporian/beam:typing", ], @@ -43,7 +40,8 @@ py_library( deps = [ # already_there/apache_beam # already_there/numpy - "//temporian/implementation/numpy/data:event_set", + "//temporian/io:tensorflow", + "//temporian/beam/io:dict", "//temporian/implementation/numpy/data:dtype_normalization", "//temporian/core/data:dtype", "//temporian/core/data:node", diff --git a/temporian/beam/operators/BUILD b/temporian/beam/operators/BUILD index 7544ba6b6..1335f64cb 100644 --- a/temporian/beam/operators/BUILD +++ b/temporian/beam/operators/BUILD @@ -11,8 +11,9 @@ py_library( srcs = ["base.py"], srcs_version = "PY3", deps = [ - "//temporian/beam/io:dict", + # already_there/apache_beam "//temporian/core/operators:base", + "//temporian/beam:typing", ], ) @@ -32,11 +33,10 @@ py_library( srcs = ["select.py"], srcs_version = "PY3", deps = [ - # already_there/apache_beam - "//temporian/beam/io:dict", + "//temporian/beam:implementation_lib", + "//temporian/beam:typing", "//temporian/beam/operators:base", "//temporian/core/operators:select", - "//temporian/beam:implementation_lib", ], ) @@ -47,8 +47,8 @@ py_library( deps = [ # already_there/apache_beam # already_there/numpy + "//temporian/beam:typing", "//temporian/beam:implementation_lib", - "//temporian/beam/io:dict", "//temporian/beam/operators:base", "//temporian/core/operators:add_index", ], diff --git a/temporian/beam/operators/window/BUILD b/temporian/beam/operators/window/BUILD index 093aab93d..7d6839691 100644 --- a/temporian/beam/operators/window/BUILD +++ b/temporian/beam/operators/window/BUILD @@ -12,11 +12,11 @@ py_library( srcs_version = "PY3", deps = [ # already_there/apache_beam + "//temporian/beam:typing", "//temporian/beam:implementation_lib", "//temporian/beam/operators:base", "//temporian/core/operators/window:moving_sum", "//temporian/implementation/numpy/operators/window:moving_sum", - "//temporian/beam/io:dict", "//temporian/implementation/numpy/operators:base", ], ) diff --git a/temporian/beam/test/BUILD b/temporian/beam/test/BUILD index 59b1b38f2..07ead5b08 100644 --- a/temporian/beam/test/BUILD +++ b/temporian/beam/test/BUILD @@ -30,7 +30,6 @@ py_library( # already_there/absl/testing:absltest # already_there/apache_beam "//temporian/beam/io:csv", - "//temporian/beam/io:dict", "//temporian/beam:evaluation", "//temporian/io:csv", "//temporian/core/data:node", diff --git a/temporian/core/BUILD b/temporian/core/BUILD index e7bd5cb9c..5bf1bbc61 100644 --- a/temporian/core/BUILD +++ b/temporian/core/BUILD @@ -92,6 +92,7 @@ py_library( deps = [ "//temporian/core:event_set_ops", "//temporian/core/data:dtype", + "//temporian/core/data:duration", "//temporian/core/data:node", "//temporian/implementation/numpy/data:event_set", ], diff --git a/temporian/core/operators/BUILD b/temporian/core/operators/BUILD index 58bbd4cfa..ad44205cb 100644 --- a/temporian/core/operators/BUILD +++ b/temporian/core/operators/BUILD @@ -368,9 +368,12 @@ py_library( srcs_version = "PY3", deps = [ ":base", + "//temporian/core:compilation", "//temporian/core:operator_lib", + "//temporian/core:typing", "//temporian/core/data:node", - "//temporian/core/data:schema", + "//temporian/implementation/numpy/data:dtype_normalization", "//temporian/proto:core_py_proto", + "//temporian/utils:typecheck", ], ) diff --git a/temporian/core/operators/window/BUILD b/temporian/core/operators/window/BUILD index d1db507f9..cd42102cd 100644 --- a/temporian/core/operators/window/BUILD +++ b/temporian/core/operators/window/BUILD @@ -45,7 +45,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core:typing", "//temporian/core/data:dtype", - "//temporian/core/data:duration_utils", "//temporian/core/data:node", "//temporian/core/data:schema", "//temporian/utils:typecheck", @@ -62,7 +61,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core:typing", "//temporian/core/data:dtype", - "//temporian/core/data:duration_utils", "//temporian/core/data:node", "//temporian/core/data:schema", ], @@ -78,7 +76,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core/data:dtype", "//temporian/core:typing", - "//temporian/core/data:duration_utils", "//temporian/core:compilation", "//temporian/core/data:node", "//temporian/core/data:schema", @@ -95,7 +92,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core:typing", "//temporian/core/data:dtype", - "//temporian/core/data:duration_utils", "//temporian/core/data:node", "//temporian/core/data:schema", ], @@ -111,7 +107,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core:typing", "//temporian/core/data:dtype", - "//temporian/core/data:duration_utils", "//temporian/core/data:node", "//temporian/core/data:schema", ], @@ -127,7 +122,6 @@ py_library( "//temporian/core:operator_lib", "//temporian/core:typing", "//temporian/core/data:dtype", - "//temporian/core/data:duration_utils", "//temporian/core/data:node", "//temporian/core/data:schema", ], diff --git a/temporian/implementation/numpy/data/BUILD b/temporian/implementation/numpy/data/BUILD index fb11975e9..9875e6099 100644 --- a/temporian/implementation/numpy/data/BUILD +++ b/temporian/implementation/numpy/data/BUILD @@ -27,6 +27,8 @@ py_library( "//temporian/core/data:schema", "//temporian/utils:config", "//temporian/core:event_set_ops", + "//temporian/utils:string", + "//temporian/core/data:duration_utils", ], ) diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index 3da09c757..2fbb88fd5 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -206,6 +206,7 @@ py_library( "//temporian/implementation/numpy:implementation_lib", "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy_cc/operators:operators_cc", + "//temporian/implementation/numpy/data:dtype_normalization", ], ) @@ -234,6 +235,7 @@ py_library( "//temporian/core/operators:cast", "//temporian/implementation/numpy:implementation_lib", "//temporian/implementation/numpy/data:event_set", + "//temporian/implementation/numpy/data:dtype_normalization", ], ) @@ -312,6 +314,7 @@ py_library( "//temporian/implementation/numpy:implementation_lib", "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy_cc/operators:operators_cc", + "//temporian/implementation/numpy/data:dtype_normalization", ], ) @@ -372,12 +375,9 @@ py_library( srcs = ["select_index_values.py"], srcs_version = "PY3", deps = [ - # already_there/numpy ":base", - "//temporian/core/data:duration_utils", "//temporian/core/operators:select_index_values", "//temporian/implementation/numpy:implementation_lib", - "//temporian/implementation/numpy:utils", "//temporian/implementation/numpy/data:event_set", ], ) diff --git a/temporian/implementation/numpy/operators/window/BUILD b/temporian/implementation/numpy/operators/window/BUILD index 00f50e561..6f95e15f4 100644 --- a/temporian/implementation/numpy/operators/window/BUILD +++ b/temporian/implementation/numpy/operators/window/BUILD @@ -21,6 +21,8 @@ py_library( "//temporian/core/operators/window:base", "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy/operators:base", + "//temporian/core/data:duration_utils", + "//temporian/implementation/numpy/data:dtype_normalization", ], ) @@ -70,6 +72,7 @@ py_library( "//temporian/core/operators/window:moving_count", "//temporian/implementation/numpy:implementation_lib", "//temporian/implementation/numpy_cc/operators:operators_cc", + "//temporian/core/data:duration_utils", ], ) diff --git a/temporian/io/BUILD b/temporian/io/BUILD index 8da3b41b0..1f694c545 100644 --- a/temporian/io/BUILD +++ b/temporian/io/BUILD @@ -47,6 +47,8 @@ py_library( srcs_version = "PY3", deps = [ # force/tensorflow + # already_there/numpy + "//temporian/core/data:dtype", ":format", "//temporian/implementation/numpy/data:event_set", "//temporian/implementation/numpy/data:dtype_normalization", diff --git a/temporian/utils/BUILD b/temporian/utils/BUILD index 985fa4145..2df2b177d 100644 --- a/temporian/utils/BUILD +++ b/temporian/utils/BUILD @@ -43,4 +43,7 @@ py_library( name = "golden", srcs = ["golden.py"], srcs_version = "PY3", + deps = [ + # already_there/absl + ], ) diff --git a/tools/create_operator.py b/tools/create_operator.py index 5e04e15c2..673446672 100755 --- a/tools/create_operator.py +++ b/tools/create_operator.py @@ -286,7 +286,8 @@ def __call__( encoding="utf-8", ) as file: file.write( - f"""{license_content()} + f"""\ +{license_content()} from absl.testing import absltest @@ -369,7 +370,8 @@ def test_base(self): ) print( - """Don't forget to update the following code: + """\ +Don't forget to update the following code: - The imports in the top-level init file temporian/__init__.py (if global) - The EventSetOperations class in temporian/core/event_set_ops.py (if not global) - Move the docstring from the operator's .py file to the EventSetOperations class (if not global) @@ -382,6 +384,7 @@ def test_base(self): - The docs API ref's home page docs/reference/index.md - The tests in temporian/core/test/event_set_ops_test.py - The unit test in temporian/core/operators/test +- Once your op is implemented, run `python tools/build_cleaner.py` and fix Bazel dependencies. """ ) From c1bc01ea63f66f69dadbae87cb08cb7b4e238c76 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 14 Sep 2023 14:59:08 +0200 Subject: [PATCH 2/3] wip --- temporian/implementation/numpy/BUILD | 6 ------ temporian/implementation/numpy/utils.py | 0 temporian/utils/BUILD | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) delete mode 100644 temporian/implementation/numpy/utils.py diff --git a/temporian/implementation/numpy/BUILD b/temporian/implementation/numpy/BUILD index 16b3cffca..d1cb04191 100644 --- a/temporian/implementation/numpy/BUILD +++ b/temporian/implementation/numpy/BUILD @@ -25,12 +25,6 @@ py_library( ], ) -py_library( - name = "utils", - srcs = ["utils.py"], - srcs_version = "PY3", -) - py_library( name = "implementation_lib", srcs = ["implementation_lib.py"], diff --git a/temporian/implementation/numpy/utils.py b/temporian/implementation/numpy/utils.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/temporian/utils/BUILD b/temporian/utils/BUILD index 2df2b177d..60e0c6666 100644 --- a/temporian/utils/BUILD +++ b/temporian/utils/BUILD @@ -44,6 +44,6 @@ py_library( srcs = ["golden.py"], srcs_version = "PY3", deps = [ - # already_there/absl + # already_there/absl/flags ], ) From 584d50168b77345f094aaa6d444bb62e97c8c47e Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 14 Sep 2023 15:21:30 +0200 Subject: [PATCH 3/3] wip --- temporian/beam/io/tensorflow.py | 8 +- temporian/io/test/tensorflow_test.py | 162 +++++++++------------------ 2 files changed, 53 insertions(+), 117 deletions(-) diff --git a/temporian/beam/io/tensorflow.py b/temporian/beam/io/tensorflow.py index cc1496a0a..ebec7c5c3 100644 --- a/temporian/beam/io/tensorflow.py +++ b/temporian/beam/io/tensorflow.py @@ -24,9 +24,7 @@ def __init__(self, schema: Schema, timestamp_key: str): self._schema = schema self._timestamp_key = timestamp_key - def process( - self, example: "example_pb2.Example" - ) -> Iterator[Dict[str, Any]]: + def process(self, example: "tf.train.Example") -> Iterator[Dict[str, Any]]: dict_example = {} def get_value(key): @@ -81,10 +79,10 @@ def __init__(self, schema: Schema, timestamp_key: str): def process( self, dict_example: Dict[str, Any] - ) -> Iterator["example_pb2.Example"]: + ) -> Iterator["tensorflow.train.Example"]: ex = self._tf.train.Example() - def f(example: "tf.train.Example", key: str): + def f(example: "tensorflow.train.Example", key: str): return example.features.feature[key] # Timestamps diff --git a/temporian/io/test/tensorflow_test.py b/temporian/io/test/tensorflow_test.py index f2dfebb3e..65c700127 100644 --- a/temporian/io/test/tensorflow_test.py +++ b/temporian/io/test/tensorflow_test.py @@ -18,7 +18,6 @@ from absl.testing import absltest from numpy.testing import assert_array_equal import tensorflow as tf -from tensorflow.core.example import example_pb2 from temporian.implementation.numpy.operators.test.test_util import ( assertEqualEventSet, ) @@ -76,119 +75,58 @@ def test_to_tensorflow_record_grouped_by_index(self) -> None: to_tensorflow_record(evset, path=tmp_file, format="grouped_by_index") - self.assertEqual( - str(_extract_tfrecord(tmp_file)), - """[features { - feature { - key: "timestamp" - value { - float_list { - value: 3 - value: 4 - } - } - } - feature { - key: "i2" - value { - bytes_list { - value: "y" - } - } - } - feature { - key: "i1" - value { - int64_list { - value: 2 - } - } - } - feature { - key: "f3" - value { - bytes_list { - value: "c" - value: "d" - } - } - } - feature { - key: "f2" - value { - float_list { - value: 0.3 - value: 0.4 - } - } - } - feature { - key: "f1" - value { - int64_list { - value: 12 - value: 13 - } - } - } -} -, features { - feature { - key: "timestamp" - value { - float_list { - value: 1 - value: 2 - } - } - } - feature { - key: "i2" - value { - bytes_list { - value: "x" - } - } - } - feature { - key: "i1" - value { - int64_list { - value: 1 - } - } - } - feature { - key: "f3" - value { - bytes_list { - value: "a" - value: "b" - } - } - } - feature { - key: "f2" - value { - float_list { - value: 0.1 - value: 0.2 - } - } - } - feature { - key: "f1" - value { - int64_list { - value: 10 - value: 11 - } - } - } -} -]""", + expected_1 = tf.train.Example( + features=tf.train.Features( + feature={ + "timestamp": tf.train.Feature( + float_list=tf.train.FloatList(value=[3, 4]) + ), + "i1": tf.train.Feature( + int64_list=tf.train.Int64List(value=[2]) + ), + "i2": tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b"y"]) + ), + "f1": tf.train.Feature( + int64_list=tf.train.Int64List(value=[12, 13]) + ), + "f2": tf.train.Feature( + float_list=tf.train.FloatList(value=[0.3, 0.4]) + ), + "f3": tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b"c", b"d"]) + ), + } + ) ) + expected_2 = tf.train.Example( + features=tf.train.Features( + feature={ + "timestamp": tf.train.Feature( + float_list=tf.train.FloatList(value=[1, 2]) + ), + "i1": tf.train.Feature( + int64_list=tf.train.Int64List(value=[1]) + ), + "i2": tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b"x"]) + ), + "f1": tf.train.Feature( + int64_list=tf.train.Int64List(value=[10, 11]) + ), + "f2": tf.train.Feature( + float_list=tf.train.FloatList(value=[0.1, 0.2]) + ), + "f3": tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b"a", b"b"]) + ), + } + ) + ) + + self.assertEqual(_extract_tfrecord(tmp_file), [expected_1, expected_2]) + def test_from_tensorflow_record(self) -> None: data_dict = { "f1": [10, 11, 12, 13],