From ce2499a855b18eb8db79ce4f497ba94dee462b8e Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Fri, 22 Sep 2017 01:22:46 -0400 Subject: [PATCH 1/6] BUG: coerce pd.wide_to_long suffixes to numeric --- doc/source/whatsnew/v0.21.0.txt | 1 - pandas/core/frame.py | 44 +++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 89e2d3006696c..4911ecbb161a5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1172,4 +1172,3 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9ce6b6148be56..19aa50640b688 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4587,10 +4587,10 @@ def unstack(self, level=-1, fill_value=None): leaving identifier variables set. This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. + or more columns are identifier variables (`id_vars`), while other groups of + columns, considered measured variables (`value_vars`), are "unpivoted" so + that each group consists of two new columns, a 'variable', labeled by + `var_name`, and its corresponding 'value', labeled by `value_name`. %(versionadded)s Parameters @@ -4599,13 +4599,14 @@ def unstack(self, level=-1, fill_value=None): id_vars : tuple, list, or ndarray, optional Column(s) to use as identifier variables. value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses + Column(s) to unpivot. If list of lists, simultaneously unpivot + each sublist into its own variable column. If not specified, uses all + columns that are not set as `id_vars`. + var_name : scalar or list + Name(s) to use for the 'variable' column(s). If None it uses ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. + value_name : scalar or list, default 'value' + Name(s) to use for the 'value' column(s). col_level : int or string, optional If columns are a MultiIndex then use this level to melt. @@ -4673,6 +4674,29 @@ def unstack(self, level=-1, fill_value=None): 1 b B E 3 2 c B E 5 + Simultaneously melt multiple groups of columns: + + >>> df2 = pd.DataFrame({'City': ['Houston', 'Miami'], + 'Mango':[4, 10], + 'Orange': [10, 8], + 'Gin':[16, 200], + 'Vodka':[20, 33]}, + columns=['City','Mango', 'Orange', 'Gin', 'Vodka']) + >>> df2 + City Mango Orange Gin Vodka + 0 Houston 4 10 16 20 + 1 Miami 10 8 200 33 + + >>> %(caller)sid_vars='City', + value_vars=[['Mango', 'Orange'], ['Gin', 'Vodka']], + var_name=['Fruit', 'Drink'], + value_name=['Pounds', 'Ounces']) + City Fruit Pounds Drink Ounces + 0 Houston Mango 4 Gin 16 + 1 Miami Mango 10 Gin 200 + 2 Houston Orange 10 Vodka 20 + 3 Miami Orange 8 Vodka 33 + """) @Appender(_shared_docs['melt'] % From b4f3a30f18cef6d1f09d699adb030d62de9c35ec Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Mon, 25 Sep 2017 19:27:08 -0400 Subject: [PATCH 2/6] ENH: simultaneous melting --- doc/source/reshaping.rst | 30 +++++++++++++++++++++++------- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/frame.py | 2 ++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 1b81d83bb76c7..2d1850a07817e 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -276,13 +276,29 @@ For instance, .. ipython:: python - cheese = pd.DataFrame({'first' : ['John', 'Mary'], - 'last' : ['Doe', 'Bo'], - 'height' : [5.5, 6.0], - 'weight' : [130, 150]}) - cheese - cheese.melt(id_vars=['first', 'last']) - cheese.melt(id_vars=['first', 'last'], var_name='quantity') + df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'], + 'State': ['Texas', 'Texas', 'Alabama'], + 'Mango':[4, 10, 90], + 'Orange': [10, 8, 14], + 'Watermelon':[40, 99, 43], + 'Gin':[16, 200, 34], + 'Vodka':[20, 33, 18]}, + columns=['City', 'State', 'Mango', 'Orange', + 'Watermelon', 'Gin', 'Vodka']) + df.melt(id_vars=['City', 'State'], value_vars=['Mango', 'Orange'], + var_name='Fruit', value_name='Pounds') + +.. versionadded:: 0.22.0 + +Passing a list of lists to `value_vars` allows you to simultaneously melt +independent variable groups. The groups need not be the same size. + +.. ipython:: python + + df.melt(id_vars=['City', 'State'], + value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']], + var_name=['Fruit', 'Drink'], + value_name=['Pounds', 'Ounces']) Another way to transform is to use the ``wide_to_long`` panel data convenience function. diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 93fd218bd7743..c463a90ebe09b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -13,7 +13,7 @@ version. New features ~~~~~~~~~~~~ -- +- Simultaneous melting of independent groups of columns is now possible with ``melt``. - - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 19aa50640b688..3085327f63bb9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4674,6 +4674,8 @@ def unstack(self, level=-1, fill_value=None): 1 b B E 3 2 c B E 5 + .. versionadded:: 0.22.0 + Simultaneously melt multiple groups of columns: >>> df2 = pd.DataFrame({'City': ['Houston', 'Miami'], From d570a715baa60d0329de29480963a7fc1a4a5778 Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Tue, 26 Sep 2017 12:18:14 -0400 Subject: [PATCH 3/6] added lots of comments --- pandas/core/reshape/melt.py | 299 +++++++++++++++++++++++++++++------- 1 file changed, 246 insertions(+), 53 deletions(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 46edc0b96b7c2..42733112bfc5a 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -6,7 +6,8 @@ from pandas import compat from pandas.core.categorical import Categorical -from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.frame import DataFrame +from pandas.core.index import MultiIndex from pandas.core.frame import _shared_docs from pandas.util._decorators import Appender @@ -16,72 +17,264 @@ from pandas.core.tools.numeric import to_numeric -@Appender(_shared_docs['melt'] % - dict(caller='pd.melt(df, ', - versionadded="", - other='DataFrame.melt')) -def melt(frame, id_vars=None, value_vars=None, var_name=None, - value_name='value', col_level=None): - # TODO: what about the existing index? - if id_vars is not None: - if not is_list_like(id_vars): - id_vars = [id_vars] - elif (isinstance(frame.columns, ABCMultiIndex) and - not isinstance(id_vars, list)): - raise ValueError('id_vars must be a list of tuples when columns' - ' are a MultiIndex') +# ensure that the the correct tuple/list is used when selecting +# from a MultiIndex/Index frame +def check_vars(frame, var, var_string, num_col_levels): + for v in var: + if num_col_levels > 1: + if not isinstance(v, tuple): + raise ValueError('{} must be a list of tuples' + ' when columns are a MultiIndex' + .format(var_string)) + elif len(v) != num_col_levels: + raise ValueError('all tuples in {} must be length {}' + .format(var_string, + frame.columns.nlevels)) else: - id_vars = list(id_vars) - else: - id_vars = [] - - if value_vars is not None: - if not is_list_like(value_vars): - value_vars = [value_vars] - elif (isinstance(frame.columns, ABCMultiIndex) and - not isinstance(value_vars, list)): - raise ValueError('value_vars must be a list of tuples when' - ' columns are a MultiIndex') - else: - value_vars = list(value_vars) - frame = frame.loc[:, id_vars + value_vars] + if is_list_like(v) and len(v) > 1: + raise ValueError('DataFrame has only a single level of ' + 'columns. {} is not a column'.format(v)) + + +def melt_one(frame, id_vars=None, value_vars=None, var_name=None, + value_name='value', col_level=None, extra_group=0, + var_end=None): + """ + melts exactly one group. Parameters are a single list not + a list of lists like the main melt function + """ + + # TODO: what about the existing index? + # Assume all column levels used when none given + if len(col_level) == 0: + num_col_levels = frame.columns.nlevels else: - frame = frame.copy() + num_col_levels = len(col_level) - if col_level is not None: # allow list or other? - # frame is a copy - frame.columns = frame.columns.get_level_values(col_level) + check_vars(frame, id_vars, 'id_vars', num_col_levels) + check_vars(frame, value_vars, 'value_vars', num_col_levels) - if var_name is None: - if isinstance(frame.columns, ABCMultiIndex): - if len(frame.columns.names) == len(set(frame.columns.names)): - var_name = frame.columns.names + if var_name != [] and len(var_name) != num_col_levels: + raise ValueError('Length of var_name must match effective number of ' + 'column levels.') + + # allow both integer location and label for column levels + if col_level != []: + droplevels = list(range(frame.columns.nlevels)) + for level in col_level: + if isinstance(level, int): + droplevels.remove(level) + else: + droplevels.remove(frame.columns.names.index(level)) + if droplevels != []: + frame = frame.copy() + frame.columns = frame.columns.droplevel(droplevels) + + for iv in id_vars: + if iv not in frame.columns: + raise KeyError('{} not in columns'.format(iv)) + + if value_vars != []: + for vv in value_vars: + if vv not in frame.columns: + raise KeyError('{} not in columns'.format(vv)) + + # use column level names if available, if not auto-name them + if var_name == []: + names = list(frame.columns.names) + if len(names) == 1: + if names[0] is None: + var_name.append('variable') else: - var_name = ['variable_{i}'.format(i=i) - for i in range(len(frame.columns.names))] + var_name.append(names[0]) + elif names.count(None) == 1: + names[names.index(None)] = 'variable' + var_name = names + else: + # small API break - use column level names when avaialable + missing_name_count = 0 + for name in names: + if name is None: + var_name.append('variable_{}'.format(missing_name_count)) + missing_name_count += 1 + else: + var_name.append(name) + + # when using default var_name, append int to make unique + if var_end is not None: + var_name = [vn + '_' + str(var_end) for vn in var_name] + + N = len(frame) + + # find integer location of all the melted columns + non_id_ilocs = [] + if value_vars != []: + for v in value_vars: + for i, v1 in enumerate(frame.columns): + if v == v1: + non_id_ilocs.append(i) + else: + if id_vars == []: + non_id_ilocs = list(range(frame.shape[1])) else: - var_name = [frame.columns.name if frame.columns.name is not None - else 'variable'] - if isinstance(var_name, compat.string_types): - var_name = [var_name] + for i, v in enumerate(frame.columns): + if v not in id_vars: + non_id_ilocs.append(i) - N, K = frame.shape - K -= len(id_vars) + K = len(non_id_ilocs) mdata = {} + mcolumns = [] + + # id_vars do not get melted, but need to repeat for each + # column in melted group. extra_group is used for cases + # when first group is not the longest for col in id_vars: - mdata[col] = np.tile(frame.pop(col).values, K) + pandas_obj = frame[col] + if isinstance(pandas_obj, DataFrame): + for i in range(pandas_obj.shape[1]): + col_name = col + '_id_' + str(i) + mdata[col_name] = np.tile(pandas_obj.iloc[:, i].values, + K + extra_group) + mcolumns.append(col_name) + else: + mdata[col] = np.tile(pandas_obj, K + extra_group) + mcolumns.append(col) - mcolumns = id_vars + var_name + [value_name] + # melt all the columns into one long array + values = np.concatenate([frame.iloc[:, i] for i in non_id_ilocs]) + if extra_group > 0: + values = np.concatenate((values, np.full([N * extra_group], np.nan))) + mdata[value_name[0]] = values - mdata[value_name] = frame.values.ravel('F') + # the column names of the melted groups need to repeat for i, col in enumerate(var_name): - # asanyarray will keep the columns as an Index - mdata[col] = np.asanyarray(frame.columns - ._get_level_values(i)).repeat(N) + values = frame.columns[non_id_ilocs]._get_level_values(i) - from pandas import DataFrame - return DataFrame(mdata, columns=mcolumns) + if isinstance(values, MultiIndex): + # asanyarray will keep the columns as an Index + values = np.asanyarray(values).repeat(N) + else: + # faster to use lists than np.repeat? + data_list = [] + for v in values.tolist(): + data_list.extend([v] * N) + values = data_list + + # Append missing values for any groups shorter than largest + if extra_group > 0: + values = np.concatenate((values, + np.full([N * extra_group], np.nan))) + mdata[col] = values + mcolumns += var_name + value_name + + return mdata, mcolumns + + +def convert_to_list(val): + if val is None: + return [] + elif isinstance(val, np.ndarray): + return val.tolist() + elif not is_list_like(val): + return [val] + else: + return list(val) + + +@Appender(_shared_docs['melt'] % + dict(caller='pd.melt(df, ', + versionadded="", + other='DataFrame.melt')) +def melt(frame, id_vars=None, value_vars=None, var_name=None, + value_name='value', col_level=None): + + # much easier to handle parameters when they are all lists + id_vars = convert_to_list(id_vars) + value_vars = convert_to_list(value_vars) + var_name = convert_to_list(var_name) + value_name = convert_to_list(value_name) + col_level = convert_to_list(col_level) + + # Whan a list of list is passed, assume multiple melt groups + if value_vars != [] and isinstance(value_vars[0], list): + if var_name != []: + if len(value_vars) != len(var_name): + raise ValueError('Number of inner lists of value_vars must ' + 'equal length of var_name ' + '{} != {}'.format(len(value_vars), + len(var_name))) + else: + # for consistency, when the default var_name is used + var_name = [[]] * len(value_vars) + + if len(value_name) > 1: + if len(value_vars) != len(value_name): + raise ValueError('Number of inner lists of value_vars must ' + 'equal length of value_name ' + '{} != {}'.format(len(value_vars), + len(value_name))) + # allow for value_name to be a single item and attach int to it + else: + value_name = [value_name[0] + '_' + str(i) + for i in range(len(value_vars))] + + # get the total number of columns in each melt group + # This is not just the length of the list because this function + # handles columns with the same names, which is commong when + # using multiindex frames + value_vars_length = [] + for vv in value_vars: + count = 0 + for col in frame.columns.values: + if col in vv: + count += 1 + value_vars_length.append(count) + + # Need the max number of columns for all the melt groups to + # correctly append NaNs to end of unbalanced melt groups + max_group_len = max(value_vars_length) + + # store each melted group as a dictionary in a list + mdata_list = [] + + # store columns from each melted group in a list of lists + mcolumns_list = [] + + # individually melt each group + vars_zipped = zip(value_vars, var_name, value_name, value_vars_length) + for i, (val_v, var_n, val_n, vvl) in enumerate(vars_zipped): + var_n = convert_to_list(var_n) + val_n = convert_to_list(val_n) + + # only melt the id_vars for the first group + id_vars_ = [] if i > 0 else id_vars + + # append int at end of var_name to make unique + var_end = i if var_n == [] else None + + md, mc = melt_one(frame, id_vars=id_vars_, value_vars=val_v, + var_name=var_n, value_name=val_n, + col_level=col_level, + extra_group=max_group_len - vvl, + var_end=var_end) + + mdata_list.append(md) + mcolumns_list.append(mc) + + # make one large dictionary with all data for constructor + mdata = {} + for d in mdata_list: + mdata.update(d) + + mcolumns = [e for lst in mcolumns_list for e in lst] + return DataFrame(mdata, columns=mcolumns) + + else: + mdata, mcolumns = melt_one(frame, id_vars=id_vars, + value_vars=value_vars, var_name=var_name, + value_name=value_name, col_level=col_level) + return DataFrame(mdata, columns=mcolumns) def lreshape(data, groups, dropna=True, label=None): From 755c3db960fa34db66efbc0c3787b64acfc60a92 Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Thu, 2 Nov 2017 15:22:17 -0400 Subject: [PATCH 4/6] updated melt docs and put melt in own module --- doc/source/reshaping.rst | 57 +++++++++++++------- doc/source/whatsnew/v0.22.0.txt | 40 +++++++++++++- pandas/tests/reshape/test_melt.py | 86 ++++++++++++++++++++++++++++++- 3 files changed, 162 insertions(+), 21 deletions(-) diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 2d1850a07817e..2a5fc732ccbb4 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -265,40 +265,59 @@ the right thing: Reshaping by Melt ----------------- -The top-level :func:`melt` and :func:`~DataFrame.melt` functions are useful to +The top-level :func:`melt` function and the equivalent :func:`DataFrame.melt` method are useful to massage a DataFrame into a format where one or more columns are identifier variables, while all other columns, considered measured variables, are "unpivoted" to the -row axis, leaving just two non-identifier columns, "variable" and "value". The -names of those columns can be customized by supplying the ``var_name`` and -``value_name`` parameters. +row axis, leaving just two non-identifier columns, "variable" and "value". -For instance, +For instance, it is possible to unpivot the fruit columns (``Mango``, ``Orange``, and ``Watermelon``) into a single column +with their corresponding values in another. .. ipython:: python - df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'], - 'State': ['Texas', 'Texas', 'Alabama'], + df = pd.DataFrame({'State': ['Texas', 'Florida', 'Alabama'], 'Mango':[4, 10, 90], 'Orange': [10, 8, 14], - 'Watermelon':[40, 99, 43], - 'Gin':[16, 200, 34], - 'Vodka':[20, 33, 18]}, - columns=['City', 'State', 'Mango', 'Orange', - 'Watermelon', 'Gin', 'Vodka']) - df.melt(id_vars=['City', 'State'], value_vars=['Mango', 'Orange'], - var_name='Fruit', value_name='Pounds') + 'Watermelon':[40, 99, 43]}, + columns=['State', 'Mango', 'Orange', 'Watermelon']) + + df + + df.melt(id_vars='State', value_vars=['Mango', 'Orange', 'Watermelon']) + +The resulting names of the unpivoted columns can be customized by supplying strings to the ``var_name`` and +``value_name`` parameters. + +.. ipython:: python + + df.melt(id_vars='State', value_vars=['Mango', 'Orange', 'Watermelon'], + var_name='Fruit', value_name='Pounds') .. versionadded:: 0.22.0 Passing a list of lists to `value_vars` allows you to simultaneously melt -independent variable groups. The groups need not be the same size. +independent column groups. The following DataFrame contains an addtional column grouping of drinks (``Gin`` and ``Vokda``) +that may be unpivoted along with the fruit columns. The groups need not be the same size. Additionally, +the ``var_name`` and ``value_name`` parameters may be passed a list of strings to name each of the returned +variable and value columns. .. ipython:: python - df.melt(id_vars=['City', 'State'], - value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']], - var_name=['Fruit', 'Drink'], - value_name=['Pounds', 'Ounces']) + df = pd.DataFrame({'State': ['Texas', 'Florida', 'Alabama'], + 'Mango':[4, 10, 90], + 'Orange': [10, 8, 14], + 'Watermelon':[40, 99, 43], + 'Gin':[16, 200, 34], + 'Vodka':[20, 33, 18]}, + columns=['State', 'Mango', 'Orange', 'Watermelon', + 'Gin', 'Vodka']) + + df + + df.melt(id_vars='State', + value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']], + var_name=['Fruit', 'Drink'], + value_name=['Pounds', 'Ounces']) Another way to transform is to use the ``wide_to_long`` panel data convenience function. diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index c463a90ebe09b..1a540167ed28d 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -8,12 +8,50 @@ deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. +Highlights include: + +- The :meth:`DataFrame.melt` method and top-level :func:`melt` function can now simultaneously unpivot independent groups of columns, see :ref:`here `. + +.. contents:: What's new in v0.22.0 + :local: + :backlinks: none + :depth: 2 + .. _whatsnew_0220.enhancements: New features ~~~~~~~~~~~~ -- Simultaneous melting of independent groups of columns is now possible with ``melt``. +.. _whatsnew_0220.enhancements.melt: + +Simultaneous unpivoting of independent groups of columns with ``melt`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, ``melt`` was only able to unpivot a single group of columns. This was done by passing all the column names in the group as a list to the ``value_vars`` parameter. + +In the following DataFrame, there are two groups, fruits (``Mango``, ``Orange``, ``Watermelon``) and drinks (``Gin``, ``Vodka``) that can each be unpivoted into their own column. Previously, ``melt`` could only unpivot a single column grouping: + +.. ipython:: python + + df = pd.DataFrame({'State': ['Texas', 'Florida', 'Alabama'], + 'Mango':[4, 10, 90], + 'Orange': [10, 8, 14], + 'Watermelon':[40, 99, 43], + 'Gin':[16, 200, 34], + 'Vodka':[20, 33, 18]}, + columns=['State', 'Mango', 'Orange', + 'Watermelon', 'Gin', 'Vodka']) + + df.melt(id_vars='State', value_vars=['Mango', 'Orange', 'Watermelon'], + var_name='Fruit', value_name='Pounds') + +Now, ``melt`` can unpivot any number of column groups by passing a list of lists to the ``value_vars`` parameter. The resulting unpivoted columns can be named by passing a list to ``var_name``. The corresponding values of each group may also be named by passing a list to ``value_name``. Notice that the column groups need not be equal in length: + +.. ipython:: python + + df.melt(id_vars='State', + value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']], + var_name=['Fruit', 'Drink'], + value_name=['Pounds', 'Ounces']) - - diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index b7422dfd7e911..42e87f9791011 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -30,6 +30,33 @@ def setup_method(self, method): self.df1.columns = [list('ABC'), list('abc')] self.df1.columns.names = ['CAP', 'low'] + self.df2 = DataFrame( + {'City': ['Houston', 'Austin', 'Hoover'], + 'State': ['Texas', 'Texas', 'Alabama'], + 'Name': ['Aria', 'Penelope', 'Niko'], + 'Mango': [4, 10, 90], + 'Orange': [10, 8, 14], + 'Watermelon': [40, 99, 43], + 'Gin': [16, 200, 34], + 'Vodka': [20, 33, 18]}, columns=['City', 'State', 'Name', 'Mango', + 'Orange', 'Watermelon', 'Gin', + 'Vodka']) + + self.df3 = DataFrame( + {'group': ['a', 'b', 'c'], + 'exp_1': [4, 10, -9], + 'exp_2': [10, 8, 14], + 'res_1': [8, 5, 4], + 'res_3': [11, 0, 7]}, columns=['group', 'exp_1', 'exp_2', + 'res_1', 'res_3']) + + self.df4 = self.df2.copy() + self.df4.columns = pd.MultiIndex.from_arrays([list('aabbcccd'), + list('ffffgggg'), + self.df4.columns], + names=[None, None, + 'some vars']) + def test_top_level_method(self): result = melt(self.df) assert result.columns.tolist() == ['variable', 'value'] @@ -212,6 +239,63 @@ def test_multiindex(self): res = self.df1.melt() assert res.columns.tolist() == ['CAP', 'low', 'value'] + def test_simultaneous_melt(self): + data = {'City': ['Houston', 'Austin', 'Hoover', 'Houston', 'Austin', + 'Hoover', 'Houston', 'Austin', 'Hoover'], + 'State': ['Texas', 'Texas', 'Alabama', 'Texas', 'Texas', + 'Alabama', 'Texas', 'Texas', 'Alabama'], + 'Fruit': ['Mango', 'Mango', 'Mango', 'Orange', 'Orange', + 'Orange', 'Watermelon', 'Watermelon', 'Watermelon'], + 'Pounds': [4, 10, 90, 10, 8, 14, 40, 99, 43], + 'Drink': ['Gin', 'Gin', 'Gin', 'Vodka', 'Vodka', 'Vodka', + 'nan', 'nan', 'nan'], + 'Ounces': [16.0, 200.0, 34.0, 20.0, 33.0, 18.0, nan, + nan, nan]} + expected1 = DataFrame(data, columns=['City', 'State', 'Fruit', + 'Pounds', 'Drink', 'Ounces']) + result1 = self.df2.melt(id_vars=['City', 'State'], + value_vars=[['Mango', 'Orange', 'Watermelon'], + ['Gin', 'Vodka']], + var_name=['Fruit', 'Drink'], + value_name=['Pounds', 'Ounces']) + tm.assert_frame_equal(result1, expected1) + + # single item groups + result2 = self.df2.melt(id_vars='State', + value_vars=[['Mango'], ['Vodka']], + var_name=['Fruit', 'Drink']) + + data = {'Drink': ['Vodka', 'Vodka', 'Vodka'], + 'Fruit': ['Mango', 'Mango', 'Mango'], + 'State': ['Texas', 'Texas', 'Alabama'], + 'value_0': [4, 10, 90], + 'value_1': [20, 33, 18]} + expected2 = DataFrame(data, columns=['State', 'Fruit', 'value_0', + 'Drink', 'value_1']) + tm.assert_frame_equal(result2, expected2) + + with pytest.raises(ValueError): + self.df2.melt(id_vars='State', + value_vars=[['Vodka'], ['Mango', 'Name'], + ['Orange', 'Watermelon']], + var_name=['Fruit', 'Drink']) + + def test_melt_multiindex(self): + data = {('a', 'f', 'State'): ['Texas', 'Texas', 'Alabama', + 'Texas', 'Texas', 'Alabama'], + 'variable_0': ['b', 'b', 'b', 'c', 'c', 'c'], + 'variable_1': ['f', 'f', 'f', 'g', 'g', 'g'], + 'some vars': ['Name', 'Name', 'Name', 'Watermelon', + 'Watermelon', 'Watermelon'], + 'value': ['Aria', 'Penelope', 'Niko', 40, 99, 43]} + expected = DataFrame(data, columns=[('a', 'f', 'State'), 'variable_0', + 'variable_1', 'some vars', + 'value']) + result = self.df4.melt(id_vars=[('a', 'f', 'State')], + value_vars=[('b', 'f', 'Name'), + ('c', 'g', 'Watermelon')]) + tm.assert_frame_equal(expected, result) + class TestLreshape(object): @@ -618,4 +702,4 @@ def test_float_suffix(self): expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) \ No newline at end of file From 68e55d947c62da9c58543429a75cbf255fa5cd91 Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Sun, 10 Dec 2017 17:50:27 -0500 Subject: [PATCH 5/6] added newline --- pandas/tests/reshape/test_melt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 42e87f9791011..14d60fcda9d4f 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -702,4 +702,4 @@ def test_float_suffix(self): expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From 614fc019d1f021ea61e8d5d170e1c6bfb418280f Mon Sep 17 00:00:00 2001 From: tdpetrou Date: Sun, 10 Dec 2017 17:57:23 -0500 Subject: [PATCH 6/6] 0.21 whatsnew back to original --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4911ecbb161a5..89e2d3006696c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1172,3 +1172,4 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +