Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions _unittests/ut_df/data/classic.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[{"name":"cudaGetDeviceCount",
"ph":"X",
"cat":"cuda",
"ts":1634290065724226794,
"dur":800,
"tid":"Thread 2080429824: Runtime API",
"pid":"[89792] Process",
"args":{}},
{"name":"_Z25full_reduce_tensor_kernelIfLi256ELi1ELi1ELi256EL21cudnnReduceTensorOp_t0EL21cudnnNanPropagation_t0EEv17cudnnTensorStructPjS3_PT_S5_S4_bii",
"ph":"X",
"cat":"cuda",
"ts":1634290112071305413,
"dur":1888,
"tid":"_Z25full_reduce_tensor_kernelIfLi256ELi1ELi1ELi256EL21cudnnReduceTensorOp_t0EL21cudnnNanPropagation_t0EEv17cudnnTensorStructPjS3_PT_S5_S4_bii",
"pid":"[0:1] Compute",
"args":{"Grid size":"[ 1, 1, 1 ]",
"Block size":"[ 256, 1, 1 ]"}},
{"name":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
"ph":"X",
"cat":"cuda",
"ts":1634290112071308133,
"dur":1440,
"tid":"Compute",
"pid":"[0:1] Overview",
"args":{"Grid size":"[ 1, 1, 1 ]",
"Block size":"[ 1, 256, 1 ]"}},
{"name":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
"ph":"X",
"cat":"cuda",
"ts":1634290112071308133,
"dur":1440,
"tid":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
"pid":"[0:1] Compute",
"args":{"Grid size":"[ 1, 1, 1 ]",
"Block size":"[ 1, 256, 1 ]"}}]
45 changes: 45 additions & 0 deletions _unittests/ut_df/test_dataframe_io_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,51 @@ def test_bug_documentation(self):
items.append(item)
self.assertEqual(len(items), 2)

def test_read_json_classic(self):
data = self.abs_path_join(__file__, 'data', 'classic.json')
dfs = pandas.read_json(data, orient='records')
dfs['ts2'] = dfs['ts'].apply(lambda t: t / 1e9)
self.assertEqual(dfs.shape[1], 9)
self.assertGreater(dfs.shape[0], 2)
it = StreamingDataFrame.read_json(data)
it['ts2'] = it['ts'].apply(lambda t: t / 1e9)
h1 = it.to_df()
h2 = it.to_df()
self.assertNotEmpty(h1)
self.assertNotEmpty(h2)
self.assertEqualDataFrame(h1, h2)
self.assertEqual(h1.shape[1], 9)

def test_read_json_classic_file(self):
data = self.abs_path_join(__file__, 'data', 'classic.json')
dfs = pandas.read_json(data, orient='records')
self.assertEqual(dfs.shape[1], 8)
self.assertGreater(dfs.shape[0], 2)
with open(data, "r", encoding="utf-8") as f:
it = StreamingDataFrame.read_json(f, orient='records')
h1 = it.to_df()
h2 = it.to_df()
self.assertNotEmpty(h1)
self.assertNotEmpty(h2)
self.assertEqualDataFrame(h1, h2)
self.assertEqual(h1.shape[1], 8)

def test_read_json_classic_file_formula(self):
data = self.abs_path_join(__file__, 'data', 'classic.json')
dfs = pandas.read_json(data, orient='records')
dfs['ts2'] = dfs['ts'].apply(lambda t: t / 1e9)
self.assertEqual(dfs.shape[1], 9)
self.assertGreater(dfs.shape[0], 2)
with open(data, "r", encoding="utf-8") as f:
it = StreamingDataFrame.read_json(f)
it['ts2'] = it['ts'].apply(lambda t: t / 1e9)
h1 = it.to_df()
h2 = it.to_df()
self.assertNotEmpty(h1)
self.assertNotEmpty(h2)
self.assertEqualDataFrame(h1, h2)
self.assertEqual(h1.shape[1], 9)


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions pandas_streaming/df/dataframe_io_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
encoding=encoding, lines=False, flatten=flatten, fLOG=fLOG):
yield el
else:
if hasattr(filename, 'seek'):
filename.seek(0)
parser = ijson.parse(filename)
current = None
curkey = None
Expand Down