Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix column drop issue when first row has missing value #353

Merged
merged 17 commits into from
May 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autovizwidget/autovizwidget/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.11.3'
__version__ = '0.11.4'
2 changes: 1 addition & 1 deletion hdijupyterutils/hdijupyterutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.11.3'
__version__ = '0.11.4'
2 changes: 1 addition & 1 deletion sparkmagic/sparkmagic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.11.3'
__version__ = '0.11.4'
from sparkmagic.serverextension.handlers import load_jupyter_server_extension


Expand Down
23 changes: 22 additions & 1 deletion sparkmagic/sparkmagic/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from IPython.core.error import UsageError
from mock import MagicMock
from nose.tools import assert_equals, assert_is
import pandas as pd
from pandas.util.testing import assert_frame_equal

from sparkmagic.livyclientlib.exceptions import BadUserDataException
from sparkmagic.utils.utils import parse_argstring_or_throw
from sparkmagic.utils.utils import parse_argstring_or_throw, records_to_dataframe
from sparkmagic.utils.constants import SESSION_KIND_PYSPARK


def test_parse_argstring_or_throw():
Expand All @@ -20,3 +23,21 @@ def test_parse_argstring_or_throw():
assert False
except ValueError as e:
assert_is(e, parse_argstring.side_effect)


def test_records_to_dataframe_missing_value_first():
result = """{"z":100, "y":50}
{"z":25, "nullv":1.0, "y":10}"""

df = records_to_dataframe(result, SESSION_KIND_PYSPARK)
expected = pd.DataFrame([{'z': 100, "nullv": None, 'y': 50}, {'z':25, "nullv":1, 'y':10}], columns=['z', "nullv", 'y'])
assert_frame_equal(expected, df)


def test_records_to_dataframe_missing_value_later():
result = """{"z":25, "nullv":1.0, "y":10}
{"z":100, "y":50}"""

df = records_to_dataframe(result, SESSION_KIND_PYSPARK)
expected = pd.DataFrame([{'z':25, "nullv":1, 'y':10}, {'z': 100, "nullv": None, 'y': 50}], columns=['z', "nullv", 'y'])
assert_frame_equal(expected, df)
17 changes: 13 additions & 4 deletions sparkmagic/sparkmagic/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,19 @@ def records_to_dataframe(records_text, kind):
if kind == constants.SESSION_KIND_SPARKR and len(data_array) > 0:
data_array = data_array[0]

if len(data_array) > 0:
df = pd.DataFrame(data_array, columns=data_array[0].keys())
else:
df = pd.DataFrame(data_array)
df = pd.DataFrame(data_array)

if len(data_array) > 0:
# This will assign the columns in the right order. If we simply did
# df = pd.DataFrame(data_array, columns=data_array[0].keys())
# in the code defining df, above, we could get an issue where the first element
# has some columns as null, and thus would drop the columns from the df altogether.
# Refer to https://github.com/jupyter-incubator/sparkmagic/issues/346 for
# more details.
for data in data_array:
if len(data.keys()) == len(df.columns):
df = df[list(data.keys())]
break

coerce_pandas_df_to_numeric_datetime(df)
return df
Expand Down