-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Get dummies #4458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Get dummies #4458
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,13 +7,15 @@ | |
|
||
import nose | ||
|
||
from pandas import DataFrame | ||
from pandas import DataFrame, Series | ||
import pandas as pd | ||
|
||
from numpy import nan | ||
import numpy as np | ||
|
||
from pandas.core.reshape import melt, convert_dummies, lreshape | ||
from pandas.util.testing import assert_frame_equal | ||
|
||
from pandas.core.reshape import melt, convert_dummies, lreshape, get_dummies | ||
import pandas.util.testing as tm | ||
from pandas.compat import StringIO, cPickle, range | ||
|
||
|
@@ -145,6 +147,60 @@ def test_multiindex(self): | |
self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) | ||
|
||
|
||
class TestGetDummies(unittest.TestCase): | ||
def test_basic(self): | ||
s_list = list('abc') | ||
s_series = Series(s_list) | ||
s_series_index = Series(s_list, list('ABC')) | ||
|
||
expected = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0}, | ||
'b': {0: 0.0, 1: 1.0, 2: 0.0}, | ||
'c': {0: 0.0, 1: 0.0, 2: 1.0}}) | ||
assert_frame_equal(get_dummies(s_list), expected) | ||
assert_frame_equal(get_dummies(s_series), expected) | ||
|
||
expected.index = list('ABC') | ||
assert_frame_equal(get_dummies(s_series_index), expected) | ||
|
||
def test_just_na(self): | ||
just_na_list = [np.nan] | ||
just_na_series = Series(just_na_list) | ||
just_na_series_index = Series(just_na_list, index = ['A']) | ||
|
||
res_list = get_dummies(just_na_list) | ||
res_series = get_dummies(just_na_series) | ||
res_series_index = get_dummies(just_na_series_index) | ||
|
||
self.assertEqual(res_list.empty, True) | ||
self.assertEqual(res_series.empty, True) | ||
self.assertEqual(res_series_index.empty, True) | ||
|
||
self.assertEqual(res_list.index.tolist(), [0]) | ||
self.assertEqual(res_series.index.tolist(), [0]) | ||
self.assertEqual(res_series_index.index.tolist(), ['A']) | ||
|
||
def test_include_na(self): | ||
s = ['a', 'b', np.nan] | ||
res = get_dummies(s) | ||
exp = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0}, | ||
'b': {0: 0.0, 1: 1.0, 2: 0.0}}) | ||
assert_frame_equal(res, exp) | ||
|
||
res_na = get_dummies(s, dummy_na=True) | ||
exp_na = DataFrame({nan: {0: 0.0, 1: 0.0, 2: 1.0}, | ||
'a': {0: 1.0, 1: 0.0, 2: 0.0}, | ||
'b': {0: 0.0, 1: 1.0, 2: 0.0}}).iloc[:, [1, 2, 0]] | ||
# hack (NaN handling in assert_index_equal) | ||
exp_na.columns = res_na.columns | ||
assert_frame_equal(res_na, exp_na) | ||
|
||
res_just_na = get_dummies([nan], dummy_na=True) | ||
exp_just_na = DataFrame({nan: {0: 1.0}}) | ||
# hack (NaN handling in assert_index_equal) | ||
exp_just_na.columns = res_just_na.columns | ||
assert_frame_equal(res_just_na, exp_just_na) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback the weird assert_frame_equal bug is here (if you remove the hack, this fails, and can't repo outside of this) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ahh...i see, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm..that's not it...let me look further There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if you read my comment above: #4458 (comment) (I blame numpy) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hayd I actually think this is a more general issue; your hack ok for now.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. funny thing is I canno repro this, e.g. while in your example, the same is False! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know! It's really weird... it's the |
||
|
||
|
||
class TestConvertDummies(unittest.TestCase): | ||
def test_convert_dummies(self): | ||
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ha! I was just looking at that test before I saw it failed and thinking "hmmm does that work in python 3" - doh!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
obviously should be using
exp_na.reindex_axis(['a', 'b', np.nan], 1)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pushed fix to master