Skip to content

Commit 6d2a84e

Browse files
committed
add tests for multiple columns and custom separator
1 parent c4b5b7f commit 6d2a84e

File tree

5 files changed

+75
-3
lines changed

5 files changed

+75
-3
lines changed

bigframes/operations/structs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class StructFrameAccessor(vendoracessors.StructFrameAccessor):
6666
def __init__(self, data: bigframes.dataframe.DataFrame) -> None:
6767
self._parent = data
6868

69-
def explode(self, column, separator: str = ".") -> bigframes.dataframe.DataFrame:
69+
def explode(self, column, *, separator: str = ".") -> bigframes.dataframe.DataFrame:
7070
df = self._parent
7171
column_labels = bigframes.core.explode.check_column(column)
7272

tests/data/nested.jsonl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{"rowindex":0,"customer_id":"jkl","day":"2023-12-18","flag":1,"event_sequence":[{"category":"B","timestamp":"2023-12-18 03:43:58","data":[{"key":"x","value":20.2533015856},{"key":"y","value":42.8363462389}]},{"category":"D","timestamp":"2023-12-18 07:15:37","data":[{"key":"x","value":62.0762664928},{"key":"z","value":83.6655402432}]}]}
1+
{"rowindex":0,"customer_id":"jkl","day":"2023-12-18","flag":1,"label":{"key": "my-key","value":"my-value"},"event_sequence":[{"category":"B","timestamp":"2023-12-18 03:43:58","data":[{"key":"x","value":20.2533015856},{"key":"y","value":42.8363462389}]},{"category":"D","timestamp":"2023-12-18 07:15:37","data":[{"key":"x","value":62.0762664928},{"key":"z","value":83.6655402432}]}],"address":{"street":"123 Test Lane","city":"Testerchon"}}
22
{"rowindex":1,"customer_id":"def","day":"2023-12-18","flag":2,"event_sequence":[{"category":"D","timestamp":"2023-12-18 23:11:11","data":[{"key":"w","value":36.1388065179}]},{"category":"B","timestamp":"2023-12-18 07:12:50","data":[{"key":"z","value":68.7673488304}]},{"category":"D","timestamp":"2023-12-18 09:09:03","data":[{"key":"x","value":57.4139647019}]},{"category":"C","timestamp":"2023-12-18 13:05:30","data":[{"key":"z","value":36.087871201}]}]}
33
{"rowindex":2,"customer_id":"abc","day":"2023-12-6","flag":0,"event_sequence":[{"category":"C","timestamp":"2023-12-06 10:37:11","data":[]},{"category":"A","timestamp":"2023-12-06 03:35:44","data":[]},{"category":"D","timestamp":"2023-12-06 13:10:57","data":[{"key":"z","value":21.8487807658}]},{"category":"B","timestamp":"2023-12-06 01:39:16","data":[{"key":"y","value":1.6380505139}]}]}
44
{"rowindex":3,"customer_id":"mno","day":"2023-12-16","flag":2,"event_sequence":[]}

tests/data/nested_schema.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@
1919
"name": "flag",
2020
"type": "INTEGER"
2121
},
22+
{
23+
"fields": [
24+
{
25+
"name": "key",
26+
"type": "STRING"
27+
},
28+
{
29+
"name": "value",
30+
"type": "STRING"
31+
}
32+
],
33+
"name": "label",
34+
"type": "RECORD"
35+
},
2236
{
2337
"fields": [
2438
{
@@ -52,5 +66,19 @@
5266
"mode": "REPEATED",
5367
"name": "event_sequence",
5468
"type": "RECORD"
69+
},
70+
{
71+
"fields": [
72+
{
73+
"name": "street",
74+
"type": "STRING"
75+
},
76+
{
77+
"name": "city",
78+
"type": "STRING"
79+
}
80+
],
81+
"name": "address",
82+
"type": "RECORD"
5583
}
5684
]
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_dataframe_struct_explode_multiple_columns(nested_df):
17+
got = nested_df.struct.explode(["label", "address"])
18+
assert got.columns.to_list() == [
19+
"customer_id",
20+
"day",
21+
"flag",
22+
"label.key",
23+
"label.value",
24+
"event_sequence",
25+
"address.street",
26+
"address.city",
27+
]
28+
29+
30+
def test_dataframe_struct_explode_separator(nested_df):
31+
got = nested_df.struct.explode("label", separator="__sep__")
32+
assert got.columns.to_list() == [
33+
"customer_id",
34+
"day",
35+
"flag",
36+
"label__sep__key",
37+
"label__sep__value",
38+
"event_sequence",
39+
"address",
40+
]

third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ def dtypes(self):
124124

125125

126126
class StructFrameAccessor:
127-
def explode(self, column, separator: str = "."):
127+
"""
128+
Accessor object for structured data properties of the DataFrame values.
129+
"""
130+
131+
def explode(self, column, *, separator: str = "."):
128132
"""
129133
Extract all child fields of struct column(s) and add to the DataFrame.
130134

0 commit comments

Comments
 (0)