diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d436385ba61ce..977fac518e863 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9,7 +9,7 @@ labeling information """ import collections -from collections import OrderedDict, abc +from collections import abc from io import StringIO import itertools import sys @@ -8189,10 +8189,10 @@ def isin(self, values): def _from_nested_dict(data): # TODO: this should be seriously cythonized - new_data = OrderedDict() + new_data = {} for index, s in data.items(): for col, v in s.items(): - new_data[col] = new_data.get(col, OrderedDict()) + new_data[col] = new_data.get(col, {}) new_data[col][index] = v return new_data diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c442f0d9bf66c..44254f54cbc7a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,5 +1,4 @@ import abc -from collections import OrderedDict from datetime import date, datetime, timedelta from io import BytesIO import os @@ -429,9 +428,9 @@ def parse( sheets = [sheet_name] # handle same-type duplicates. - sheets = list(OrderedDict.fromkeys(sheets).keys()) + sheets = list(dict.fromkeys(sheets).keys()) - output = OrderedDict() + output = {} for asheetname in sheets: if verbose: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index bd5e215730397..1f90bb12e11a3 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -10,7 +10,6 @@ http://www.statsmodels.org/devel/ """ -from collections import OrderedDict import datetime from io import BytesIO import os @@ -1677,7 +1676,7 @@ def read( else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_dict(OrderedDict(data_formatted)) + data = DataFrame.from_dict(dict(data_formatted)) del data_formatted data = self._do_convert_missing(data, convert_missing) @@ -1716,7 +1715,7 @@ def any_startswith(x: str) -> bool: convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_dict(OrderedDict(retyped_data)) + data = DataFrame.from_dict(dict(retyped_data)) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1846,7 +1845,7 @@ def _do_convert_categoricals( cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_dict(OrderedDict(cat_converted_data)) + data = DataFrame.from_dict(dict(cat_converted_data)) return data @property @@ -2195,7 +2194,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_dict(OrderedDict(data_formatted)) + return DataFrame.from_dict(dict(data_formatted)) def _replace_nans(self, data): # return data @@ -2674,7 +2673,7 @@ def __init__(self, df, columns, version=117, byteorder=None): self.df = df self.columns = columns - self._gso_table = OrderedDict((("", (0, 0)),)) + self._gso_table = {"": (0, 0)} if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) @@ -2704,7 +2703,7 @@ def generate_table(self): Returns ------- - gso_table : OrderedDict + gso_table : dict Ordered dictionary using the string found as keys and their lookup position (v,o) as values gso_df : DataFrame @@ -2762,7 +2761,7 @@ def generate_blob(self, gso_table): Parameters ---------- - gso_table : OrderedDict + gso_table : dict Ordered dictionary (str, vo) Returns @@ -2992,7 +2991,7 @@ def _write_map(self): the map with 0s. The second call writes the final map locations when all blocks have been written.""" if self._map is None: - self._map = OrderedDict( + self._map = dict( ( ("stata_data", 0), ("map", self._file.tell()), diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index e4de2147586f5..9543c9d5b59de 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,7 +1,6 @@ """ test .agg behavior / note that .apply is tested generally in test_groupby.py """ -from collections import OrderedDict import functools import numpy as np @@ -175,18 +174,14 @@ def test_aggregate_str_func(tsframe, groupbyfunc): tm.assert_frame_equal(result, expected) # group frame by function dict - result = grouped.agg( - OrderedDict([["A", "var"], ["B", "std"], ["C", "mean"], ["D", "sem"]]) - ) + result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"}) expected = DataFrame( - OrderedDict( - [ - ["A", grouped["A"].var()], - ["B", grouped["B"].std()], - ["C", grouped["C"].mean()], - ["D", grouped["D"].sem()], - ] - ) + { + "A": grouped["A"].var(), + "B": grouped["B"].std(), + "C": grouped["C"].mean(), + "D": grouped["D"].sem(), + } ) tm.assert_frame_equal(result, expected) @@ -261,22 +256,20 @@ def test_multiple_functions_tuples_and_non_tuples(df): def test_more_flexible_frame_multi_function(df): grouped = df.groupby("A") - exmean = grouped.agg(OrderedDict([["C", np.mean], ["D", np.mean]])) - exstd = grouped.agg(OrderedDict([["C", np.std], ["D", np.std]])) + exmean = grouped.agg({"C": np.mean, "D": np.mean}) + exstd = grouped.agg({"C": np.std, "D": np.std}) expected = concat([exmean, exstd], keys=["mean", "std"], axis=1) expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - d = OrderedDict([["C", [np.mean, np.std]], ["D", [np.mean, np.std]]]) + d = {"C": [np.mean, np.std], "D": [np.mean, np.std]} result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) # be careful - result = grouped.aggregate(OrderedDict([["C", np.mean], ["D", [np.mean, np.std]]])) - expected = grouped.aggregate( - OrderedDict([["C", np.mean], ["D", [np.mean, np.std]]]) - ) + result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) tm.assert_frame_equal(result, expected) def foo(x): @@ -288,13 +281,11 @@ def bar(x): # this uses column selection & renaming msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): - d = OrderedDict( - [["C", np.mean], ["D", OrderedDict([["foo", np.mean], ["bar", np.std]])]] - ) + d = dict([["C", np.mean], ["D", dict([["foo", np.mean], ["bar", np.std]])]]) grouped.aggregate(d) # But without renaming, these functions are OK - d = OrderedDict([["C", [np.mean]], ["D", [foo, bar]]]) + d = {"C": [np.mean], "D": [foo, bar]} grouped.aggregate(d) @@ -303,26 +294,20 @@ def test_multi_function_flexible_mix(df): grouped = df.groupby("A") # Expected - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", {"sum": "sum"}]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}} # this uses column selection & renaming msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) # Test 1 - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", "sum"]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} # this uses column selection & renaming with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) # Test 2 - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", ["sum"]]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} # this uses column selection & renaming with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) @@ -642,9 +627,7 @@ def test_maybe_mangle_lambdas_args(self): assert func["A"][0](0, 2, b=3) == (0, 2, 3) def test_maybe_mangle_lambdas_named(self): - func = OrderedDict( - [("C", np.mean), ("D", OrderedDict([("foo", np.mean), ("bar", np.mean)]))] - ) + func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} result = _maybe_mangle_lambdas(func) assert result == func diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index f14384928b979..765bc3bab5d4a 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -2,7 +2,6 @@ test all other .agg behavior """ -from collections import OrderedDict import datetime as dt from functools import partial @@ -96,8 +95,7 @@ def test_agg_period_index(): index = period_range(start="1999-01", periods=5, freq="M") s1 = Series(np.random.rand(len(index)), index=index) s2 = Series(np.random.rand(len(index)), index=index) - series = [("s1", s1), ("s2", s2)] - df = DataFrame.from_dict(OrderedDict(series)) + df = DataFrame.from_dict({"s1": s1, "s2": s2}) grouped = df.groupby(df.index.month) list(grouped) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 5f78e4860f1e9..89ffcd9ee313e 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from datetime import datetime import numpy as np @@ -1204,7 +1203,7 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): # GH 24880 expected = Series(data=data, index=index, name="C") result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( - lambda x: OrderedDict([("min", x.min()), ("max", x.max())]) + lambda x: {"min": x.min(), "max": x.max()} ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a6b9b0e35f865..3a16642641fca 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from datetime import datetime from decimal import Decimal from io import StringIO @@ -598,7 +597,7 @@ def test_groupby_as_index_agg(df): expected = grouped.mean() tm.assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) + result2 = grouped.agg({"C": np.mean, "D": np.sum}) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) @@ -617,7 +616,7 @@ def test_groupby_as_index_agg(df): expected = grouped.mean() tm.assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) + result2 = grouped.agg({"C": np.mean, "D": np.sum}) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index c0ec889d170d6..90e993a807bd2 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pytest @@ -654,14 +652,12 @@ def test_from_frame_error(non_frame): def test_from_frame_dtype_fidelity(): # GH 22420 df = pd.DataFrame( - OrderedDict( - [ - ("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")), - ("a", [1, 1, 1, 2, 2, 2]), - ("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)), - ("c", ["x", "x", "y", "z", "x", "y"]), - ] - ) + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } ) original_dtypes = df.dtypes.to_dict() diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index a0b17ae8924b7..fab4f72dc153b 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pytest @@ -107,14 +105,12 @@ def test_to_frame_dtype_fidelity(): original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} expected_df = pd.DataFrame( - OrderedDict( - [ - ("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")), - ("a", [1, 1, 1, 2, 2, 2]), - ("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)), - ("c", ["x", "x", "y", "z", "x", "y"]), - ] - ) + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } ) df = mi.to_frame(index=False) df_dtypes = df.dtypes.to_dict() diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d5b23653e8a72..a7e2363ec422e 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -2,7 +2,6 @@ The tests in this package are to ensure the proper resultant dtypes of set operations. """ -from collections import OrderedDict import itertools as it import numpy as np @@ -16,14 +15,12 @@ from pandas.tests.indexes.conftest import indices_dict import pandas.util.testing as tm -COMPATIBLE_INCONSISTENT_PAIRS = OrderedDict( - [ - ((Int64Index, RangeIndex), (tm.makeIntIndex, tm.makeRangeIndex)), - ((Float64Index, Int64Index), (tm.makeFloatIndex, tm.makeIntIndex)), - ((Float64Index, RangeIndex), (tm.makeFloatIndex, tm.makeIntIndex)), - ((Float64Index, UInt64Index), (tm.makeFloatIndex, tm.makeUIntIndex)), - ] -) +COMPATIBLE_INCONSISTENT_PAIRS = { + (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex), + (Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex), +} @pytest.fixture(params=it.combinations(indices_dict, 2), ids="-".join) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 2cc80a6e5565d..4203d0b0241ff 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1,4 +1,3 @@ -from collections import OrderedDict import datetime as dt from datetime import datetime import gzip @@ -1029,7 +1028,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame.from_dict(OrderedDict(cols)) + expected = DataFrame.from_dict(dict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file)