Skip to content

BUG: Preserve index order when constructing DataFrame from dict #26113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ Conversion
^^^^^^^^^^

- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was ignored. (:issue:`25905`)
-
- Bug in :class:`DataFrame` construction from dict, where the index would be sorted instead of using dict insertion order. (:issue:`24859`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls make clear of how this affects 3.5 and 3.6+

let's move this to the other api changes section as its not a bug rather an api change.

-

Strings
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6304,11 +6304,11 @@ def _gotitem(self,

Different aggregations per column.

>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
A B
max NaN 8.0
min 1.0 2.0
sum 12.0 NaN
>>> df.agg({'A' : ['max', 'min'], 'B' : ['min', 'sum']})
A B
max 7.0 NaN
min 1.0 2.0
sum NaN 15.0

Aggregate over the columns.

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def _union_indexes(indexes, sort=True):
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
result = Index(sorted(result))
result = sorted(result) if sort else result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this branch be removed altogether or does that break 35 compatibility?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it breaks python 3.5 compat.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just move the compat check from the construction module to here then? Would be simpler

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a little more complicated here since we would have to do another compat check in the len(indexes) > 1 case

result = Index(result)
return result

indexes, kind = _sanitize_and_check(indexes)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pandas._libs import lib
from pandas._libs.tslibs import IncompatibleFrequency
from pandas.compat import lmap, lrange, raise_with_traceback
from pandas.compat import PY36, lmap, lrange, raise_with_traceback

from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na,
Expand Down Expand Up @@ -301,7 +301,7 @@ def extract_index(data):
' an index')

if have_series or have_dicts:
index = _union_indexes(indexes)
index = _union_indexes(indexes, sort=not PY36)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs a comment on why here


if have_raw_arrays:
lengths = list(set(raw_lengths))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,10 +620,9 @@ def test_rename(self, float_frame):

# index
data = {
'A': {'foo': 0, 'bar': 1}
'A': {'bar': 0, 'foo': 1}
}

# gets sorted alphabetical
df = DataFrame(data)
renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'})
tm.assert_index_equal(renamed.index, Index(['foo', 'bar']))
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,8 @@ def test_constructor_subclass_dict(self):
dct.update(v.to_dict())
data[k] = dct
frame = DataFrame(data)
tm.assert_frame_equal(self.frame.sort_index(), frame)
old_frame = self.frame if PY36 else self.frame.sort_index()
tm.assert_frame_equal(old_frame, frame)

def test_constructor_dict_block(self):
expected = np.array([[4., 3., 2., 1.]])
Expand Down Expand Up @@ -1110,7 +1111,8 @@ def test_constructor_list_of_series(self):

sdict = OrderedDict(zip(['x', 'Unnamed 0'], data))
expected = DataFrame.from_dict(sdict, orient='index')
tm.assert_frame_equal(result.sort_index(), expected)
result = result if PY36 else result.sort_index()
tm.assert_frame_equal(result, expected)

# none named
data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
Expand Down Expand Up @@ -1245,7 +1247,7 @@ def test_constructor_list_of_namedtuples(self):
def test_constructor_orient(self):
data_dict = self.mixed_frame.T._series
recons = DataFrame.from_dict(data_dict, orient='index')
expected = self.mixed_frame.sort_index()
expected = self.mixed_frame if PY36 else self.mixed_frame.sort_index()
tm.assert_frame_equal(recons, expected)

# dict of sequence
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ def test_multiindex_setitem(self):
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
df_orig = DataFrame.from_dict({'price': {
('DE', 'Coal', 'Stock'): 2,
('DE', 'Gas', 'Stock'): 4,
('DE', 'Elec', 'Demand'): 1,
('DE', 'Gas', 'Stock'): 4,
('FR', 'Gas', 'Stock'): 5,
('FR', 'Solar', 'SupIm'): 0,
('FR', 'Wind', 'SupIm'): 0
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1588,7 +1588,7 @@ def test_concat_series_axis1(self, sort=sort):
s = Series(randn(3), index=['c', 'a', 'b'], name='A')
s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
result = concat([s, s2], axis=1, sort=sort)
expected = DataFrame({'A': s, 'B': s2})
expected = DataFrame({'A': s, 'B': s2}).sort_index()
assert_frame_equal(result, expected)

def test_concat_series_axis1_names_applied(self):
Expand Down