From aca7a0827050754d2c636d9c3ed74f2f29663c1c Mon Sep 17 00:00:00 2001 From: Guillaume Gay Date: Sun, 30 Aug 2015 12:39:58 +0200 Subject: [PATCH 1/2] updating docs for the new sorting mechanisms - GH #10886 --- doc/source/10min.rst | 4 ++-- doc/source/advanced.rst | 18 +++++++++--------- doc/source/api.rst | 1 - doc/source/basics.rst | 10 +++++----- doc/source/cookbook.rst | 6 +++--- doc/source/reshaping.rst | 6 +++--- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 1714e00030026..359ec76533520 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -157,7 +157,7 @@ Sorting by values .. ipython:: python - df.sort(columns='B') + df.sort_values(by='B') Selection --------- @@ -680,7 +680,7 @@ Sorting is per order in the categories, not lexical order. .. ipython:: python - df.sort("grade") + df.sort_values(by="grade") Grouping by a categorical column shows also empty categories. diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 850f59c2713eb..973674fe62745 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -286,7 +286,7 @@ As usual, **both sides** of the slicers are included as this is label indexing. names=['lvl0', 'lvl1']) dfmi = pd.DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))), index=miindex, - columns=micolumns).sortlevel().sortlevel(axis=1) + columns=micolumns).sort_index().sort_index(axis=1) dfmi Basic multi-index slicing using slices, lists, and labels. @@ -458,7 +458,7 @@ correctly. You can think about breaking the axis into unique groups, where at the hierarchical level of interest, each distinct group shares a label, but no two have the same label. However, the ``MultiIndex`` does not enforce this: **you are responsible for ensuring that things are properly sorted**. There is -an important new method ``sortlevel`` to sort an axis within a ``MultiIndex`` +an important new method ``sort_index`` to sort an axis within a ``MultiIndex`` so that its labels are grouped and sorted by the original ordering of the associated factor at that level. Note that this does not necessarily mean the labels will be sorted lexicographically! @@ -468,19 +468,19 @@ labels will be sorted lexicographically! import random; random.shuffle(tuples) s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples)) s - s.sortlevel(0) - s.sortlevel(1) + s.sort_index(level=0) + s.sort_index(level=1) .. _advanced.sortlevel_byname: -Note, you may also pass a level name to ``sortlevel`` if the MultiIndex levels +Note, you may also pass a level name to ``sort_index`` if the MultiIndex levels are named. .. ipython:: python s.index.set_names(['L1', 'L2'], inplace=True) - s.sortlevel(level='L1') - s.sortlevel(level='L2') + s.sort_index(level='L1') + s.sort_index(level='L2') Some indexing will work even if the data are not sorted, but will be rather inefficient and will also return a copy of the data rather than a view: @@ -488,14 +488,14 @@ inefficient and will also return a copy of the data rather than a view: .. ipython:: python s['qux'] - s.sortlevel(1)['qux'] + s.sort_index(level=1)['qux'] On higher dimensional objects, you can sort any of the other axes by level if they have a MultiIndex: .. ipython:: python - df.T.sortlevel(1, axis=1) + df.T.sort_index(level=1, axis=1) The ``MultiIndex`` object has code to **explicity check the sort depth**. Thus, if you try to index at a depth at which the index is not sorted, it will raise diff --git a/doc/source/api.rst b/doc/source/api.rst index 2f4fd860f270a..5fc2790d1e68c 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -437,7 +437,6 @@ Reshaping, sorting Series.reorder_levels Series.sort_values Series.sort_index - Series.sortlevel Series.swaplevel Series.unstack Series.searchsorted diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 956c90ae63034..3ea90447dd44f 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -328,7 +328,7 @@ equality to be True: df1 = pd.DataFrame({'col':['foo', 0, np.nan]}) df2 = pd.DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) df1.equals(df2) - df1.equals(df2.sort()) + df1.equals(df2.sort_index()) Comparing array-like objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1489,7 +1489,7 @@ The ``by`` argument can take a list of column names, e.g.: .. ipython:: python - df1[['one', 'two', 'three']].sort_index(by=['one','two']) + df1[['one', 'two', 'three']].sort_values(by=['one','two']) These methods have special treatment of NA values via the ``na_position`` argument: @@ -1497,8 +1497,8 @@ argument: .. ipython:: python s[2] = np.nan - s.order() - s.order(na_position='first') + s.sort_values() + s.sort_values(na_position='first') .. _basics.searchsorted: @@ -1564,7 +1564,7 @@ all levels to ``by``. .. ipython:: python df1.columns = pd.MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')]) - df1.sort_index(by=('a','two')) + df1.sort_values(by=('a','two')) Copying diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 9e7b9ad0b7582..0b05f062f5fce 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -309,7 +309,7 @@ Method 2 : sort then take first of each .. ipython:: python - df.sort("BBB").groupby("AAA", as_index=False).first() + df.sort_values(by="BBB").groupby("AAA", as_index=False).first() Notice the same results, with the exception of the index. @@ -410,7 +410,7 @@ Sorting .. ipython:: python - df.sort(('Labs', 'II'), ascending=False) + df.sort_values(by=('Labs', 'II'), ascending=False) `Partial Selection, the need for sortedness; `__ @@ -547,7 +547,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to code_groups = df.groupby('code') - agg_n_sort_order = code_groups[['data']].transform(sum).sort('data') + agg_n_sort_order = code_groups[['data']].transform(sum).sort_values(by='data') sorted_df = df.ix[agg_n_sort_order.index] diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 26aaf9c2be69d..dbf3b838593a9 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -164,9 +164,9 @@ will result in a **sorted** copy of the original DataFrame or Series: index = pd.MultiIndex.from_product([[2,1], ['a', 'b']]) df = pd.DataFrame(np.random.randn(4), index=index, columns=['A']) df - all(df.unstack().stack() == df.sort()) + all(df.unstack().stack() == df.sort_index()) -while the above code will raise a ``TypeError`` if the call to ``sort`` is +while the above code will raise a ``TypeError`` if the call to ``sort_index`` is removed. .. _reshaping.stack_multiple: @@ -206,7 +206,7 @@ Missing Data These functions are intelligent about handling missing data and do not expect each subgroup within the hierarchical index to have the same set of labels. They also can handle the index being unsorted (but you can make it sorted by -calling ``sortlevel``, of course). Here is a more complex example: +calling ``sort_index``, of course). Here is a more complex example: .. ipython:: python From 586c77ec7aa79237b529569fbc8c95ca5751ae16 Mon Sep 17 00:00:00 2001 From: Guillaume Gay Date: Sun, 30 Aug 2015 14:47:22 +0200 Subject: [PATCH 2/2] shut down warnings in the whats new files --- doc/source/api.rst | 1 + doc/source/categorical.rst | 12 ++++++------ doc/source/whatsnew/v0.13.1.txt | 3 ++- doc/source/whatsnew/v0.15.0.txt | 3 ++- doc/source/whatsnew/v0.7.3.txt | 2 +- doc/source/whatsnew/v0.9.1.txt | 3 ++- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 5fc2790d1e68c..2f4fd860f270a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -437,6 +437,7 @@ Reshaping, sorting Series.reorder_levels Series.sort_values Series.sort_index + Series.sortlevel Series.swaplevel Series.unstack Series.searchsorted diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 0c63759201517..3c9b538caa555 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -280,9 +280,9 @@ meaning and certain operations are possible. If the categorical is unordered, `` .. ipython:: python s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False)) - s.sort() + s.sort_values(inplace=True) s = pd.Series(["a","b","c","a"]).astype('category', ordered=True) - s.sort() + s.sort_values(inplace=True) s s.min(), s.max() @@ -302,7 +302,7 @@ This is even true for strings and numeric data: s = pd.Series([1,2,3,1], dtype="category") s = s.cat.set_categories([2,3,1], ordered=True) s - s.sort() + s.sort_values(inplace=True) s s.min(), s.max() @@ -320,7 +320,7 @@ necessarily make the sort order the same as the categories order. s = pd.Series([1,2,3,1], dtype="category") s = s.cat.reorder_categories([2,3,1], ordered=True) s - s.sort() + s.sort_values(inplace=True) s s.min(), s.max() @@ -349,14 +349,14 @@ The ordering of the categorical is determined by the ``categories`` of that colu dfs = pd.DataFrame({'A' : pd.Categorical(list('bbeebbaa'), categories=['e','a','b'], ordered=True), 'B' : [1,2,1,2,2,1,2,1] }) - dfs.sort(['A', 'B']) + dfs.sort_values(by=['A', 'B']) Reordering the ``categories`` changes a future sort. .. ipython:: python dfs['A'] = dfs['A'].cat.reorder_categories(['a','b','e']) - dfs.sort(['A','B']) + dfs.sort_values(by=['A','B']) Comparisons ----------- diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.txt index 64ca1612f00c1..349acf508bbf3 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.txt @@ -120,7 +120,8 @@ API changes equal. (:issue:`5283`) See also :ref:`the docs` for a motivating example. .. ipython:: python - + :okwarning: + df = DataFrame({'col':['foo', 0, np.nan]}) df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) df.equals(df2) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index 01dc8bb080726..a33e0f19961ab 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -67,7 +67,8 @@ For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python - + :okwarning: + df = DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = df["raw_grade"].astype("category") diff --git a/doc/source/whatsnew/v0.7.3.txt b/doc/source/whatsnew/v0.7.3.txt index afb4b8faac2cc..21aa16e5fcb06 100644 --- a/doc/source/whatsnew/v0.7.3.txt +++ b/doc/source/whatsnew/v0.7.3.txt @@ -83,6 +83,7 @@ When calling ``apply`` on a grouped Series, the return value will also be a Series, to be more consistent with the ``groupby`` behavior with DataFrame: .. ipython:: python + :okwarning: df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], @@ -93,4 +94,3 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: grouped = df.groupby('A')['C'] grouped.describe() grouped.apply(lambda x: x.order()[-2:]) # top 2 values - diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index 6718a049a0ab9..ce7439b8ecd92 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -21,6 +21,7 @@ New features specified in a per-column manner to support multiple sort orders (:issue:`928`) .. ipython:: python + :okwarning: df = DataFrame(np.random.randint(0, 2, (6, 3)), columns=['A', 'B', 'C']) @@ -66,7 +67,7 @@ New features .. ipython:: python df[df>0] - + df.where(df>0) df.where(df>0,-df)