From 5f8890c1e6aeeda41e9818ce0c46b259efaa7f2f Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 5 Sep 2018 02:51:16 +0200
Subject: [PATCH 1/9] CLN: str.cat internals

---
 pandas/core/strings.py       | 165 +++++++++++------------------------
 pandas/tests/test_strings.py |  49 +----------
 2 files changed, 53 insertions(+), 161 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 5a23951145cb4..490e048053719 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -3,7 +3,7 @@
 
 from pandas.compat import zip
 from pandas.core.dtypes.generic import ABCSeries, ABCIndex
-from pandas.core.dtypes.missing import isna, notna
+from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_categorical_dtype,
@@ -36,114 +36,28 @@
 _shared_docs = dict()
 
 
-def _get_array_list(arr, others):
-    """
-    Auxiliary function for :func:`str_cat`
-
-    Parameters
-    ----------
-    arr : ndarray
-        The left-most ndarray of the concatenation
-    others : list, ndarray, Series
-        The rest of the content to concatenate. If list of list-likes,
-        all elements must be passable to ``np.asarray``.
-
-    Returns
-    -------
-    list
-        List of all necessary arrays
-    """
-    from pandas.core.series import Series
-
-    if len(others) and isinstance(com.values_from_object(others)[0],
-                                  (list, np.ndarray, Series)):
-        arrays = [arr] + list(others)
-    else:
-        arrays = [arr, others]
-
-    return [np.asarray(x, dtype=object) for x in arrays]
-
-
-def str_cat(arr, others=None, sep=None, na_rep=None):
-    """
+def interleave_sep(all_cols, sep):
+    '''
     Auxiliary function for :meth:`str.cat`
 
-    If `others` is specified, this function concatenates the Series/Index
-    and elements of `others` element-wise.
-    If `others` is not being passed then all values in the Series are
-    concatenated in a single string with a given `sep`.
-
     Parameters
     ----------
-    others : list-like, or list of list-likes, optional
-        List-likes (or a list of them) of the same length as calling object.
-        If None, returns str concatenating strings of the Series.
-    sep : string or None, default None
-        If None, concatenates without any separator.
-    na_rep : string or None, default None
-        If None, NA in the series are ignored.
+    all_cols : list of numpy arrays
+        List of arrays to be concatenated with sep
+    sep : string
+        The separator string for concatenating the columns
 
     Returns
     -------
-    concat
-        ndarray containing concatenated results (if `others is not None`)
-        or str (if `others is None`)
-    """
-    if sep is None:
-        sep = ''
-
-    if others is not None:
-        arrays = _get_array_list(arr, others)
-
-        n = _length_check(arrays)
-        masks = np.array([isna(x) for x in arrays])
-        cats = None
-
-        if na_rep is None:
-            na_mask = np.logical_or.reduce(masks, axis=0)
-
-            result = np.empty(n, dtype=object)
-            np.putmask(result, na_mask, np.nan)
-
-            notmask = ~na_mask
-
-            tuples = zip(*[x[notmask] for x in arrays])
-            cats = [sep.join(tup) for tup in tuples]
-
-            result[notmask] = cats
-        else:
-            for i, x in enumerate(arrays):
-                x = np.where(masks[i], na_rep, x)
-                if cats is None:
-                    cats = x
-                else:
-                    cats = cats + sep + x
-
-            result = cats
-
-        return result
-    else:
-        arr = np.asarray(arr, dtype=object)
-        mask = isna(arr)
-        if na_rep is None and mask.any():
-            if sep == '':
-                na_rep = ''
-            else:
-                return sep.join(arr[notna(arr)])
-        return sep.join(np.where(mask, na_rep, arr))
-
-
-def _length_check(others):
-    n = None
-    for x in others:
-        try:
-            if n is None:
-                n = len(x)
-            elif len(x) != n:
-                raise ValueError('All arrays must be same length')
-        except TypeError:
-            raise ValueError('Must pass arrays containing strings to str_cat')
-    return n
+    list
+        The list of arrays interleaved with sep; to be fed to np.sum
+    '''
+    if sep == '':
+        # no need to add empty strings
+        return all_cols
+    result = [sep] * (2 * len(all_cols) - 1)
+    result[::2] = all_cols
+    return result
 
 
 def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2283,6 +2197,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
 
         if isinstance(others, compat.string_types):
             raise ValueError("Did you mean to supply a `sep` keyword?")
+        if sep is None:
+            sep = ''
 
         if isinstance(self._orig, Index):
             data = Series(self._orig, index=self._orig)
@@ -2291,9 +2207,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
 
         # concatenate Series/Index with itself if no "others"
         if others is None:
-            result = str_cat(data, others=others, sep=sep, na_rep=na_rep)
-            return self._wrap_result(result,
-                                     use_codes=(not self._is_categorical))
+            data = data.astype(object).values
+            mask = isna(data)
+            if mask.any():
+                if na_rep is None:
+                    return sep.join(data[~mask])
+                return sep.join(np.where(mask, na_rep, data))
+            return sep.join(data)
 
         try:
             # turn anything in "others" into lists of Series
@@ -2320,23 +2240,42 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "'outer'|'inner'|'right'`. The future default will "
                           "be `join='left'`.", FutureWarning, stacklevel=2)
 
-        # align if required
-        if join is not None:
+        # if join is None, _get_series_list already aligned indexes
+        join = 'left' if join is None else join
+
+        if any(not data.index.equals(x.index) for x in others):
             # Need to add keys for uniqueness in case of duplicate columns
             others = concat(others, axis=1,
                             join=(join if join == 'inner' else 'outer'),
-                            keys=range(len(others)))
+                            keys=range(len(others)), copy=False)
             data, others = data.align(others, join=join)
             others = [others[x] for x in others]  # again list of Series
 
-        # str_cat discards index
-        res = str_cat(data, others=others, sep=sep, na_rep=na_rep)
+        all_cols = [x.astype(object).values for x in [data] + others]
+        masks = np.array([isna(x) for x in all_cols])
+        union_mask = np.logical_or.reduce(masks, axis=0)
+
+        if na_rep is None and union_mask.any():
+            result = np.empty(len(data), dtype=object)
+            np.putmask(result, union_mask, np.nan)
+
+            not_masked = ~union_mask
+            all_cols = interleave_sep([x[not_masked] for x in all_cols], sep)
+
+            result[not_masked] = np.sum(all_cols, axis=0)
+        elif na_rep is not None and union_mask.any():
+            # fill NaNs
+            all_cols = [np.where(masks[i], na_rep, all_cols[i])
+                        for i in range(len(all_cols))]
+            result = np.sum(interleave_sep(all_cols, sep), axis=0)
+        else:  # no NaNs
+            result = np.sum(interleave_sep(all_cols, sep), axis=0)
 
         if isinstance(self._orig, Index):
-            res = Index(res, name=self._orig.name)
+            result = Index(result, name=self._orig.name)
         else:  # Series
-            res = Series(res, index=data.index, name=self._orig.name)
-        return res
+            result = Series(result, index=data.index, name=self._orig.name)
+        return result
 
     _shared_docs['str_split'] = ("""
     Split strings around given separator/delimiter.
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index bd450cdcf8054..b9cc153d67c93 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -97,53 +97,6 @@ def test_iter_object_try_string(self):
         assert i == 100
         assert s == 'h'
 
-    def test_cat(self):
-        one = np.array(['a', 'a', 'b', 'b', 'c', NA], dtype=np.object_)
-        two = np.array(['a', NA, 'b', 'd', 'foo', NA], dtype=np.object_)
-
-        # single array
-        result = strings.str_cat(one)
-        exp = 'aabbc'
-        assert result == exp
-
-        result = strings.str_cat(one, na_rep='NA')
-        exp = 'aabbcNA'
-        assert result == exp
-
-        result = strings.str_cat(one, na_rep='-')
-        exp = 'aabbc-'
-        assert result == exp
-
-        result = strings.str_cat(one, sep='_', na_rep='NA')
-        exp = 'a_a_b_b_c_NA'
-        assert result == exp
-
-        result = strings.str_cat(two, sep='-')
-        exp = 'a-b-d-foo'
-        assert result == exp
-
-        # Multiple arrays
-        result = strings.str_cat(one, [two], na_rep='NA')
-        exp = np.array(['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA'],
-                       dtype=np.object_)
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = strings.str_cat(one, two)
-        exp = np.array(['aa', NA, 'bb', 'bd', 'cfoo', NA], dtype=np.object_)
-        tm.assert_almost_equal(result, exp)
-
-        # error for incorrect lengths
-        rgx = 'All arrays must be same length'
-        three = Series(['1', '2', '3'])
-
-        with tm.assert_raises_regex(ValueError, rgx):
-            strings.str_cat(one, three)
-
-        # error for incorrect type
-        rgx = "Must pass arrays containing strings to str_cat"
-        with tm.assert_raises_regex(ValueError, rgx):
-            strings.str_cat(one, 'three')
-
     @pytest.mark.parametrize('box', [Series, Index])
     @pytest.mark.parametrize('other', [None, Series, Index])
     def test_str_cat_name(self, box, other):
@@ -3136,7 +3089,7 @@ def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
         if compat.PY3:
-            pytest.raises(TypeError, lhs.str.cat, rhs)
+            pytest.raises(TypeError, lhs.str.cat, rhs, sep=',')
         else:
             result = lhs.str.cat(rhs)
             expected = Series(np.array(

From 285a1f7b8c382e9d36f1971e4d48efe887da815b Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 23 Sep 2018 19:07:36 +0200
Subject: [PATCH 2/9] Review (jreback)

---
 pandas/core/strings.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 490e048053719..8622a5d705176 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -5,6 +5,7 @@
 from pandas.core.dtypes.generic import ABCSeries, ABCIndex
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.common import (
+    ensure_object,
     is_bool_dtype,
     is_categorical_dtype,
     is_object_dtype,
@@ -36,13 +37,13 @@
 _shared_docs = dict()
 
 
-def interleave_sep(all_cols, sep):
-    '''
+def interleave_sep(list_of_columns, sep):
+    """
     Auxiliary function for :meth:`str.cat`
 
     Parameters
     ----------
-    all_cols : list of numpy arrays
+    list_of_columns : list of numpy arrays
         List of arrays to be concatenated with sep
     sep : string
         The separator string for concatenating the columns
@@ -51,12 +52,12 @@ def interleave_sep(all_cols, sep):
     -------
     list
         The list of arrays interleaved with sep; to be fed to np.sum
-    '''
+    """
     if sep == '':
         # no need to add empty strings
-        return all_cols
-    result = [sep] * (2 * len(all_cols) - 1)
-    result[::2] = all_cols
+        return list_of_columns
+    result = [sep] * (2 * len(list_of_columns) - 1)
+    result[::2] = list_of_columns
     return result
 
 
@@ -2207,12 +2208,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
 
         # concatenate Series/Index with itself if no "others"
         if others is None:
-            data = data.astype(object).values
+            data = ensure_object(data)
             mask = isna(data)
-            if mask.any():
-                if na_rep is None:
-                    return sep.join(data[~mask])
-                return sep.join(np.where(mask, na_rep, data))
+            if na_rep is None and mask.any():
+                data = data[~mask]
+            elif na_rep is not None and mask.any():
+                data = np.where(mask, na_rep, data)
             return sep.join(data)
 
         try:
@@ -2251,11 +2252,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             data, others = data.align(others, join=join)
             others = [others[x] for x in others]  # again list of Series
 
-        all_cols = [x.astype(object).values for x in [data] + others]
+        all_cols = [ensure_object(x) for x in [data] + others]
         masks = np.array([isna(x) for x in all_cols])
         union_mask = np.logical_or.reduce(masks, axis=0)
 
         if na_rep is None and union_mask.any():
+            # no na_rep means NaNs for all rows where any column has a NaN
+            # only necessary if there are actually any NaNs
             result = np.empty(len(data), dtype=object)
             np.putmask(result, union_mask, np.nan)
 
@@ -2264,11 +2267,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
 
             result[not_masked] = np.sum(all_cols, axis=0)
         elif na_rep is not None and union_mask.any():
-            # fill NaNs
-            all_cols = [np.where(masks[i], na_rep, all_cols[i])
-                        for i in range(len(all_cols))]
+            # fill NaNs with na_rep in case there are actually any NaNs
+            all_cols = [np.where(mask, na_rep, col)
+                        for mask, col in zip(masks, all_cols)]
             result = np.sum(interleave_sep(all_cols, sep), axis=0)
-        else:  # no NaNs
+        else:
+            # no NaNs - can just concatenate
             result = np.sum(interleave_sep(all_cols, sep), axis=0)
 
         if isinstance(self._orig, Index):

From 28e78594e91640c1ea8928e1a6ef083cc225f228 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 23 Sep 2018 19:14:05 +0200
Subject: [PATCH 3/9] Refactor interleave_sep to use np.sum

---
 pandas/core/strings.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 8622a5d705176..3e7abfe56e9d7 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -37,28 +37,29 @@
 _shared_docs = dict()
 
 
-def interleave_sep(list_of_columns, sep):
+def cat_core(list_of_columns, sep):
     """
     Auxiliary function for :meth:`str.cat`
 
     Parameters
     ----------
     list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
     sep : string
         The separator string for concatenating the columns
 
     Returns
     -------
-    list
-        The list of arrays interleaved with sep; to be fed to np.sum
+    nd.array
+        The concatenation of list_of_columns with sep
     """
     if sep == '':
         # no need to add empty strings
-        return list_of_columns
-    result = [sep] * (2 * len(list_of_columns) - 1)
-    result[::2] = list_of_columns
-    return result
+        return np.sum(list_of_columns, axis=0)
+    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
+    list_with_sep[::2] = list_of_columns
+    return np.sum(list_with_sep, axis=0)
 
 
 def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2263,17 +2264,16 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             np.putmask(result, union_mask, np.nan)
 
             not_masked = ~union_mask
-            all_cols = interleave_sep([x[not_masked] for x in all_cols], sep)
-
-            result[not_masked] = np.sum(all_cols, axis=0)
+            result[not_masked] = cat_core([x[not_masked] for x in all_cols],
+                                          sep)
         elif na_rep is not None and union_mask.any():
             # fill NaNs with na_rep in case there are actually any NaNs
             all_cols = [np.where(mask, na_rep, col)
                         for mask, col in zip(masks, all_cols)]
-            result = np.sum(interleave_sep(all_cols, sep), axis=0)
+            result = cat_core(all_cols, sep)
         else:
             # no NaNs - can just concatenate
-            result = np.sum(interleave_sep(all_cols, sep), axis=0)
+            result = cat_core(all_cols, sep)
 
         if isinstance(self._orig, Index):
             result = Index(result, name=self._orig.name)

From 807f18e8047ec32e5f76268fdd4461ed0f68a857 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 23 Sep 2018 20:01:49 +0200
Subject: [PATCH 4/9] Lint; more consistent naming for masks

---
 pandas/core/strings.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 3e7abfe56e9d7..29095e90ce14f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2210,11 +2210,11 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
         # concatenate Series/Index with itself if no "others"
         if others is None:
             data = ensure_object(data)
-            mask = isna(data)
-            if na_rep is None and mask.any():
-                data = data[~mask]
-            elif na_rep is not None and mask.any():
-                data = np.where(mask, na_rep, data)
+            na_mask = isna(data)
+            if na_rep is None and na_mask.any():
+                data = data[~na_mask]
+            elif na_rep is not None and na_mask.any():
+                data = np.where(na_mask, na_rep, data)
             return sep.join(data)
 
         try:
@@ -2254,8 +2254,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             others = [others[x] for x in others]  # again list of Series
 
         all_cols = [ensure_object(x) for x in [data] + others]
-        masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(masks, axis=0)
+        na_masks = np.array([isna(x) for x in all_cols])
+        union_mask = np.logical_or.reduce(na_masks, axis=0)
 
         if na_rep is None and union_mask.any():
             # no na_rep means NaNs for all rows where any column has a NaN
@@ -2268,8 +2268,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                                           sep)
         elif na_rep is not None and union_mask.any():
             # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [np.where(mask, na_rep, col)
-                        for mask, col in zip(masks, all_cols)]
+            all_cols = [np.where(nm, na_rep, col)
+                        for nm, col in zip(na_masks, all_cols)]
             result = cat_core(all_cols, sep)
         else:
             # no NaNs - can just concatenate

From ed27c663681f2e0112109d74f8d450eb56218601 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 23 Sep 2018 20:13:53 +0200
Subject: [PATCH 5/9] Add comment

---
 pandas/core/strings.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 29095e90ce14f..69646e18dd3c3 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2245,6 +2245,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
         # if join is None, _get_series_list already aligned indexes
         join = 'left' if join is None else join
 
+        # align if required
         if any(not data.index.equals(x.index) for x in others):
             # Need to add keys for uniqueness in case of duplicate columns
             others = concat(others, axis=1,

From 0d3c6d21ed9f262acd712072a4bcde12329b51df Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 8 Oct 2018 20:13:15 +0200
Subject: [PATCH 6/9] Review (WillAyd)

---
 pandas/core/strings.py       | 3 ---
 pandas/tests/test_strings.py | 8 +++++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 69646e18dd3c3..2d5c17f3ac088 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -54,9 +54,6 @@ def cat_core(list_of_columns, sep):
     nd.array
         The concatenation of list_of_columns with sep
     """
-    if sep == '':
-        # no need to add empty strings
-        return np.sum(list_of_columns, axis=0)
     list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
     list_with_sep[::2] = list_of_columns
     return np.sum(list_with_sep, axis=0)
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index b9cc153d67c93..75b1bcb8b2938 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -367,6 +367,12 @@ def test_str_cat_align_mixed_inputs(self, join):
         with tm.assert_raises_regex(ValueError, rgx):
             s.str.cat([t, z], join=join)
 
+    def test_str_cat_raises(self):
+        # non-strings hiding behind object dtype
+        s = Series([1, 2, 3, 4], dtype='object')
+        with tm.assert_raises_regex(TypeError, "unsupported operand type.*"):
+            s.str.cat(s)
+
     def test_str_cat_special_cases(self):
         s = Series(['a', 'b', 'c', 'd'])
         t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1])
@@ -3089,7 +3095,7 @@ def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
         if compat.PY3:
-            pytest.raises(TypeError, lhs.str.cat, rhs, sep=',')
+            pytest.raises(TypeError, lhs.str.cat, rhs)
         else:
             result = lhs.str.cat(rhs)
             expected = Series(np.array(

From 36c6240857296ddb85858351de4e25791a9ff4d7 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Tue, 9 Oct 2018 08:27:33 +0200
Subject: [PATCH 7/9] Review (WillAyd)

---
 pandas/core/strings.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 2d5c17f3ac088..e63dc2b6e7e42 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -37,15 +37,15 @@
 _shared_docs = dict()
 
 
-def cat_core(list_of_columns, sep):
+def cat_core(all_cols, sep):
     """
     Auxiliary function for :meth:`str.cat`
 
     Parameters
     ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
+    all_cols : two-dimensional numpy array
+        array of columns to be concatenated with sep;
+        this array may not contain NaNs!
     sep : string
         The separator string for concatenating the columns
 
@@ -54,9 +54,12 @@ def cat_core(list_of_columns, sep):
     nd.array
         The concatenation of list_of_columns with sep
     """
+    list_of_columns = np.split(all_cols, all_cols.shape[1], axis=1)
     list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
     list_with_sep[::2] = list_of_columns
-    return np.sum(list_with_sep, axis=0)
+    # np.split splits into arrays of shape (N, 1); NOT (N,)
+    # need to reduce dimensionality of result
+    return np.sum(list_with_sep, axis=0)[:, 0]
 
 
 def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2239,21 +2242,21 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "'outer'|'inner'|'right'`. The future default will "
                           "be `join='left'`.", FutureWarning, stacklevel=2)
 
-        # if join is None, _get_series_list already aligned indexes
-        join = 'left' if join is None else join
+        # concatenate others into DataFrame; need to add keys for uniqueness in
+        # case of duplicate columns (for join is None, all indexes are already
+        # the same after _get_series_list, which forces alignment in this case)
+        others = concat(others, axis=1,
+                        join=(join if join == 'inner' else 'outer'),
+                        keys=range(len(others)), copy=False)
 
         # align if required
-        if any(not data.index.equals(x.index) for x in others):
-            # Need to add keys for uniqueness in case of duplicate columns
-            others = concat(others, axis=1,
-                            join=(join if join == 'inner' else 'outer'),
-                            keys=range(len(others)), copy=False)
+        if not data.index.equals(others.index):
             data, others = data.align(others, join=join)
-            others = [others[x] for x in others]  # again list of Series
 
-        all_cols = [ensure_object(x) for x in [data] + others]
-        na_masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(na_masks, axis=0)
+        # collect all columns
+        all_cols = ensure_object(concat([data, others], axis=1, copy=False))
+        na_masks = isna(all_cols)
+        union_mask = np.logical_or.reduce(na_masks, axis=1)
 
         if na_rep is None and union_mask.any():
             # no na_rep means NaNs for all rows where any column has a NaN
@@ -2262,13 +2265,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             np.putmask(result, union_mask, np.nan)
 
             not_masked = ~union_mask
-            result[not_masked] = cat_core([x[not_masked] for x in all_cols],
-                                          sep)
+            result[not_masked] = cat_core(all_cols[not_masked], sep)
         elif na_rep is not None and union_mask.any():
             # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [np.where(nm, na_rep, col)
-                        for nm, col in zip(na_masks, all_cols)]
-            result = cat_core(all_cols, sep)
+            result = cat_core(np.where(na_masks, na_rep, all_cols), sep)
         else:
             # no NaNs - can just concatenate
             result = cat_core(all_cols, sep)

From a97fe67ed33c842fd0d2f92a796d0768271b1bad Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 10 Oct 2018 22:38:16 +0200
Subject: [PATCH 8/9] Add np-compat

---
 pandas/core/strings.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index e63dc2b6e7e42..ebdd2905e29e1 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -18,6 +18,7 @@
 import pandas.core.common as com
 from pandas.core.algorithms import take_1d
 import pandas.compat as compat
+from pandas.compat.numpy import _np_version_under1p11
 from pandas.core.base import NoNewAttributesMixin
 from pandas.util._decorators import Appender
 import re
@@ -57,9 +58,12 @@ def cat_core(all_cols, sep):
     list_of_columns = np.split(all_cols, all_cols.shape[1], axis=1)
     list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
     list_with_sep[::2] = list_of_columns
-    # np.split splits into arrays of shape (N, 1); NOT (N,)
-    # need to reduce dimensionality of result
-    return np.sum(list_with_sep, axis=0)[:, 0]
+    res = np.sum(list_with_sep, axis=0)
+    if not (_np_version_under1p11 and len(res) == 0):
+        # np.split splits into arrays of shape (N, 1); NOT (N,)
+        # need to reduce dimensionality of result
+        res = res[:, 0]
+    return res
 
 
 def _na_map(f, arr, na_result=np.nan, dtype=object):

From e58ec9dfa82a459d9b316b678b77d50fc4901e9e Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 12 Oct 2018 08:45:06 +0200
Subject: [PATCH 9/9] Revert using more idiomatic code due to perf

---
 pandas/core/strings.py | 46 +++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index ebdd2905e29e1..4086021bc61a6 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -18,7 +18,6 @@
 import pandas.core.common as com
 from pandas.core.algorithms import take_1d
 import pandas.compat as compat
-from pandas.compat.numpy import _np_version_under1p11
 from pandas.core.base import NoNewAttributesMixin
 from pandas.util._decorators import Appender
 import re
@@ -38,15 +37,15 @@
 _shared_docs = dict()
 
 
-def cat_core(all_cols, sep):
+def cat_core(list_of_columns, sep):
     """
     Auxiliary function for :meth:`str.cat`
 
     Parameters
     ----------
-    all_cols : two-dimensional numpy array
-        array of columns to be concatenated with sep;
-        this array may not contain NaNs!
+    list_of_columns : list of numpy arrays
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
     sep : string
         The separator string for concatenating the columns
 
@@ -55,15 +54,9 @@ def cat_core(all_cols, sep):
     nd.array
         The concatenation of list_of_columns with sep
     """
-    list_of_columns = np.split(all_cols, all_cols.shape[1], axis=1)
     list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
     list_with_sep[::2] = list_of_columns
-    res = np.sum(list_with_sep, axis=0)
-    if not (_np_version_under1p11 and len(res) == 0):
-        # np.split splits into arrays of shape (N, 1); NOT (N,)
-        # need to reduce dimensionality of result
-        res = res[:, 0]
-    return res
+    return np.sum(list_with_sep, axis=0)
 
 
 def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2246,21 +2239,21 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "'outer'|'inner'|'right'`. The future default will "
                           "be `join='left'`.", FutureWarning, stacklevel=2)
 
-        # concatenate others into DataFrame; need to add keys for uniqueness in
-        # case of duplicate columns (for join is None, all indexes are already
-        # the same after _get_series_list, which forces alignment in this case)
-        others = concat(others, axis=1,
-                        join=(join if join == 'inner' else 'outer'),
-                        keys=range(len(others)), copy=False)
+        # if join is None, _get_series_list already force-aligned indexes
+        join = 'left' if join is None else join
 
         # align if required
-        if not data.index.equals(others.index):
+        if any(not data.index.equals(x.index) for x in others):
+            # Need to add keys for uniqueness in case of duplicate columns
+            others = concat(others, axis=1,
+                            join=(join if join == 'inner' else 'outer'),
+                            keys=range(len(others)), copy=False)
             data, others = data.align(others, join=join)
+            others = [others[x] for x in others]  # again list of Series
 
-        # collect all columns
-        all_cols = ensure_object(concat([data, others], axis=1, copy=False))
-        na_masks = isna(all_cols)
-        union_mask = np.logical_or.reduce(na_masks, axis=1)
+        all_cols = [ensure_object(x) for x in [data] + others]
+        na_masks = np.array([isna(x) for x in all_cols])
+        union_mask = np.logical_or.reduce(na_masks, axis=0)
 
         if na_rep is None and union_mask.any():
             # no na_rep means NaNs for all rows where any column has a NaN
@@ -2269,10 +2262,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             np.putmask(result, union_mask, np.nan)
 
             not_masked = ~union_mask
-            result[not_masked] = cat_core(all_cols[not_masked], sep)
+            result[not_masked] = cat_core([x[not_masked] for x in all_cols],
+                                          sep)
         elif na_rep is not None and union_mask.any():
             # fill NaNs with na_rep in case there are actually any NaNs
-            result = cat_core(np.where(na_masks, na_rep, all_cols), sep)
+            all_cols = [np.where(nm, na_rep, col)
+                        for nm, col in zip(na_masks, all_cols)]
+            result = cat_core(all_cols, sep)
         else:
             # no NaNs - can just concatenate
             result = cat_core(all_cols, sep)