From 2be896e17bab36ef8999360c781457f86f8c2802 Mon Sep 17 00:00:00 2001 From: Matthew Laurence Chen Date: Mon, 19 Aug 2024 17:48:50 +0000 Subject: [PATCH 1/2] feat: recover struct columns from exploded Series --- bigframes/bigquery/__init__.py | 11 ++++++- tests/system/small/bigquery/test_struct.py | 38 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/system/small/bigquery/test_struct.py diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index fb9503dc72..70e29418e3 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -21,7 +21,7 @@ from __future__ import annotations import typing -from typing import Literal, Optional, Union +from typing import Any, Dict,Literal, List, Optional, Union import bigframes.constants as constants import bigframes.core.groupby as groupby @@ -239,6 +239,15 @@ def json_extract( return series._apply_unary_op(ops.JSONExtract(json_path=json_path)) +def struct(value: bigframes.dataframe.DataFrame) -> series.Series: + data: List[Dict[str, Any]] = [{} for _ in value.index] + for col_name in value.columns: + col = value[col_name] + for i, val in enumerate(col): + data[i][col_name] = val + return bigframes.series.Series(data) + + # Search functions defined from # https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py new file mode 100644 index 0000000000..714bd99cda --- /dev/null +++ b/tests/system/small/bigquery/test_struct.py @@ -0,0 +1,38 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pandas as pd +import pytest + +import bigframes.bigquery as bbq +import bigframes.dataframe +import bigframes.pandas as bpd +import bigframes.series as series + + +def test_struct_from_dataframe(): + srs = series.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "pandas"}, + {"version": 1, "project": "numpy"}, + ], + ) + pd.testing.assert_series_equal( + srs.to_pandas(), + bbq.struct(srs.struct.explode()).to_pandas(), + check_index_type=False, + check_dtype=False, + ) From 69e6273e102b20d3afd4f8bbf216eddf69a9c4c8 Mon Sep 17 00:00:00 2001 From: Matthew Laurence Chen Date: Mon, 19 Aug 2024 17:48:50 +0000 Subject: [PATCH 2/2] feat: recover struct columns from exploded Series --- bigframes/bigquery/__init__.py | 11 ++++++- tests/system/small/bigquery/test_struct.py | 34 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tests/system/small/bigquery/test_struct.py diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index fb9503dc72..fd287eca29 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -21,7 +21,7 @@ from __future__ import annotations import typing -from typing import Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union import bigframes.constants as constants import bigframes.core.groupby as groupby @@ -239,6 +239,15 @@ def json_extract( return series._apply_unary_op(ops.JSONExtract(json_path=json_path)) +def struct(value: bigframes.dataframe.DataFrame) -> series.Series: + data: List[Dict[str, Any]] = [{} for _ in value.index] + for col_name in value.columns: + col = value[col_name] + for i, val in enumerate(col): + data[i][col_name] = val + return bigframes.series.Series(data) + + # Search functions defined from # https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py new file mode 100644 index 0000000000..aa8fad4b89 --- /dev/null +++ b/tests/system/small/bigquery/test_struct.py @@ -0,0 +1,34 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd + +import bigframes.bigquery as bbq +import bigframes.series as series + + +def test_struct_from_dataframe(): + srs = series.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "pandas"}, + {"version": 1, "project": "numpy"}, + ], + ) + pd.testing.assert_series_equal( + srs.to_pandas(), + bbq.struct(srs.struct.explode()).to_pandas(), + check_index_type=False, + check_dtype=False, + )