Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions _unittests/ut_df/test_dataframe_io_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ def test_enumerate_json_items(self):
items = list(enumerate_json_items(
BytesIO(TestDataFrameIOHelpers.text_json)))
self.assertEqual(TestDataFrameIOHelpers.text_json_exp, items)
items = list(enumerate_json_items(
BytesIO(TestDataFrameIOHelpers.text_json)))
self.assertEqual(TestDataFrameIOHelpers.text_json_exp, items)

def test_read_json_raw(self):
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
Expand All @@ -133,6 +136,15 @@ def test_read_json_raw(self):
js_exp = loads(exp)
self.assertEqual(js_exp, js_read)

def test_read_json_raw_head(self):
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
{'name': {'given': 'Mose', 'family': 'Regner'}},
{'id': 2, 'name': 'FayeRaker'}]
it = StreamingDataFrame.read_json(data, flatten=True, chunksize=1)
h1 = it.head()
h2 = it.head()
self.assertEqualDataFrame(h1, h2)

def test_pandas_json_chunksize(self):
jsonl = '''{"a": 1, "b": 2}
{"a": 3, "b": 4}'''
Expand Down Expand Up @@ -161,6 +173,18 @@ def test_read_json_rows2(self):
js = dfs[0].to_json(orient='records')
self.assertEqual('[{"a":1,"b":2},{"a":3,"b":4}]', js)

def test_read_json_rows2_head(self):
data = b'''{"a": 1, "b": 2}
{"a": 3, "b": 4}'''
dfs = pandas.read_json(BytesIO(data), lines=True)
self.assertEqual(dfs.shape, (2, 2))
it = StreamingDataFrame.read_json(BytesIO(data), lines="stream")
h1 = it.head()
h2 = it.head()
self.assertNotEmpty(h1)
self.assertNotEmpty(h2)
self.assertEqualDataFrame(h1, h2)

def test_read_json_ijson(self):
it = StreamingDataFrame.read_json(
BytesIO(TestDataFrameIOHelpers.text_json))
Expand Down
34 changes: 33 additions & 1 deletion _unittests/ut_df/test_streaming_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,6 @@ def test_getitem(self):
df1 = sdf.to_df()
df2 = sdf2.to_df()
self.assertEqualDataFrame(df1[["cint"]], df2)
self.assertRaise(lambda: sdf["cint"], NotImplementedError)
self.assertRaise(lambda: sdf[:, "cint"], NotImplementedError)

def test_read_csv_names(self):
Expand Down Expand Up @@ -523,6 +522,39 @@ def test_describe(self):
self.assertEqualArray(desc.loc['std', :], numpy.array(
[2.886795e-01, 28867.946472]), decimal=4)

def test_set_item(self):
df = pandas.DataFrame(data=dict(a=[4.5], b=[6], c=[7]))
self.assertRaise(lambda: StreamingDataFrame(df), TypeError)
sdf = StreamingDataFrame.read_df(df)

def f():
sdf[['a']] = 10
self.assertRaise(f, ValueError)

def g():
sdf['a'] = [10]
self.assertRaise(g, NotImplementedError)

sdf['aa'] = 10
df = sdf.to_df()
ddf = pandas.DataFrame(data=dict(a=[4.5], b=[6], c=[7], aa=[10]))
self.assertEqualDataFrame(df, ddf)
sdf['bb'] = sdf['b'] + 10
df = sdf.to_df()
ddf = ddf = pandas.DataFrame(
data=dict(a=[4.5], b=[6], c=[7], aa=[10], bb=[16]))
self.assertEqualDataFrame(df, ddf)

def test_set_item_function(self):
df = pandas.DataFrame(data=dict(a=[4.5], b=[6], c=[7]))
self.assertRaise(lambda: StreamingDataFrame(df), TypeError)
sdf = StreamingDataFrame.read_df(df)
sdf['bb'] = sdf['b'].apply(lambda x: x + 11)
df = sdf.to_df()
ddf = ddf = pandas.DataFrame(
data=dict(a=[4.5], b=[6], c=[7], bb=[17]))
self.assertEqualDataFrame(df, ddf)


if __name__ == "__main__":
# TestStreamingDataFrame().test_describe()
Expand Down
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ jobs:
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python37-Linux:
Python39-Linux:
python.version: '3.9'
maxParallel: 3

Expand Down Expand Up @@ -51,7 +51,7 @@ jobs:
vmImage: 'macOS-latest'
strategy:
matrix:
Python37-Mac:
Python39-Mac:
python.version: '3.9'
maxParallel: 3

Expand Down
4 changes: 2 additions & 2 deletions pandas_streaming/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def check(log=False):
It raises an exception.
If you want to disable the logs:

@param log if True, display information, otherwise
@return 0 or exception
:param log: if True, display information, otherwise none
:return: 0 or exception
"""
return True

Expand Down
10 changes: 5 additions & 5 deletions pandas_streaming/data/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
Returns a dummy streaming dataframe
mostly for unit test purposes.

@param n number of rows
@param chunksize chunk size
@param asfloat use random float and not random int
@param cols additional columns
@return a @see cl StreamingDataFrame
:param n: number of rows
:param chunksize: chunk size
:param asfloat: use random float and not random int
:param cols: additional columns
:return: a @see cl StreamingDataFrame
"""
if asfloat:
df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[
Expand Down
Loading