DOC, CI: Correct wide_to_long docstring and add reshape/melt to CI (#26273)

vandenn · WillAyd · commit f46ab96fc3f6 · 2019-05-03T07:52:14.000-07:00
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -245,6 +245,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
         pandas/core/reshape/pivot.py \
         pandas/core/reshape/reshape.py \
         pandas/core/reshape/tile.py \
+        pandas/core/reshape/melt.py \
         -k"-crosstab -pivot_table -cut"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -270,15 +270,15 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
     ...     'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
     ... })
     >>> df
-       birth  famid  ht1  ht2
+       famid  birth  ht1  ht2
     0      1      1  2.8  3.4
-    1      2      1  2.9  3.8
-    2      3      1  2.2  2.9
-    3      1      2  2.0  3.2
+    1      1      2  2.9  3.8
+    2      1      3  2.2  2.9
+    3      2      1  2.0  3.2
     4      2      2  1.8  2.8
-    5      3      2  1.9  2.4
-    6      1      3  2.2  3.3
-    7      2      3  2.3  3.4
+    5      2      3  1.9  2.4
+    6      3      1  2.2  3.3
+    7      3      2  2.3  3.4
     8      3      3  2.1  2.9
     >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
     >>> l
@@ -323,33 +323,29 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
     Less wieldy column names are also handled
 
     >>> np.random.seed(0)
-    >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
-    ...                    'A(quarterly)-2011': np.random.rand(3),
-    ...                    'B(quarterly)-2010': np.random.rand(3),
-    ...                    'B(quarterly)-2011': np.random.rand(3),
+    >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3),
+    ...                    'A(weekly)-2011': np.random.rand(3),
+    ...                    'B(weekly)-2010': np.random.rand(3),
+    ...                    'B(weekly)-2011': np.random.rand(3),
     ...                    'X' : np.random.randint(3, size=3)})
     >>> df['id'] = df.index
     >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-       A(quarterly)-2010  A(quarterly)-2011  B(quarterly)-2010  ...
-    0           0.548814           0.544883           0.437587  ...
-    1           0.715189           0.423655           0.891773  ...
-    2           0.602763           0.645894           0.963663  ...
-       X  id
-    0  0   0
-    1  1   1
-    2  1   2
-
-    >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
+       A(weekly)-2010  A(weekly)-2011  B(weekly)-2010  B(weekly)-2011  X  id
+    0        0.548814        0.544883        0.437587        0.383442  0   0
+    1        0.715189        0.423655        0.891773        0.791725  1   1
+    2        0.602763        0.645894        0.963663        0.528895  1   2
+
+    >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id',
     ...                 j='year', sep='-')
     ... # doctest: +NORMALIZE_WHITESPACE
-             X  A(quarterly)  B(quarterly)
+             X  A(weekly)  B(weekly)
     id year
-    0  2010  0      0.548814     0.437587
-    1  2010  1      0.715189     0.891773
-    2  2010  1      0.602763     0.963663
-    0  2011  0      0.544883     0.383442
-    1  2011  1      0.423655     0.791725
-    2  2011  1      0.645894     0.528895
+    0  2010  0   0.548814   0.437587
+    1  2010  1   0.715189   0.891773
+    2  2010  1   0.602763   0.963663
+    0  2011  0   0.544883   0.383442
+    1  2011  1   0.423655   0.791725
+    2  2011  1   0.645894   0.528895
 
     If we have many columns, we could also use a regex to find our
     stubnames and pass that list on to wide_to_long
@@ -359,7 +355,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
     ...         r'[A-B]\(.*\)').values if match != [] ])
     ... )
     >>> list(stubnames)
-    ['A(quarterly)', 'B(quarterly)']
+    ['A(weekly)', 'B(weekly)']
 
     All of the above examples have integers as suffixes. It is possible to
     have non-integers as suffixes.
@@ -371,19 +367,19 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
     ...     'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
     ... })
     >>> df
-       birth  famid  ht_one  ht_two
+       famid  birth  ht_one  ht_two
     0      1      1     2.8     3.4
-    1      2      1     2.9     3.8
-    2      3      1     2.2     2.9
-    3      1      2     2.0     3.2
+    1      1      2     2.9     3.8
+    2      1      3     2.2     2.9
+    3      2      1     2.0     3.2
     4      2      2     1.8     2.8
-    5      3      2     1.9     2.4
-    6      1      3     2.2     3.3
-    7      2      3     2.3     3.4
+    5      2      3     1.9     2.4
+    6      3      1     2.2     3.3
+    7      3      2     2.3     3.4
     8      3      3     2.1     2.9
 
     >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age',
-                            sep='_', suffix='\w')
+    ...                     sep='_', suffix='\w+')
     >>> l
     ... # doctest: +NORMALIZE_WHITESPACE
                       ht