pandas-dev · jreback · Nov 21, 2017 · Nov 5, 2017
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -38,11 +38,18 @@ Backwards incompatible API changes
 -
 -
 
+
+
+
+
+
 .. _whatsnew_0220.api:
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
+- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`)
+- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`)
 - ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
 - All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
 - :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
@@ -80,6 +87,7 @@ Performance Improvements
 - :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
 - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
 - The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
+- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
 
 .. _whatsnew_0220.docs:
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -13,7 +13,9 @@
                      is_datetimelike,
                      is_extension_type, is_object_dtype,
                      is_datetime64tz_dtype, is_datetime64_dtype,
-                     is_timedelta64_dtype, is_dtype_equal,
+                     is_datetime64_ns_dtype,
+                     is_timedelta64_dtype, is_timedelta64_ns_dtype,
+                     is_dtype_equal,
                      is_float_dtype, is_complex_dtype,
                      is_integer_dtype,
                      is_datetime_or_timedelta_dtype,
@@ -829,8 +831,10 @@ def maybe_castable(arr):
     # check datetime64[ns]/timedelta64[ns] are valid
     # otherwise try to coerce
     kind = arr.dtype.kind
-    if kind == 'M' or kind == 'm':
-        return is_datetime64_dtype(arr.dtype)
+    if kind == 'M':
+        return is_datetime64_ns_dtype(arr.dtype)
+    elif kind == 'm':
+        return is_timedelta64_ns_dtype(arr.dtype)
 
     return arr.dtype.name not in _POSSIBLY_CAST_DTYPES
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1027,13 +1027,16 @@ def to_frame(self, index=True):
             result.index = self
         return result
 
-    def _to_embed(self, keep_tz=False):
+    def _to_embed(self, keep_tz=False, dtype=None):
         """
         *this is an internal non-public method*
 
         return an array repr of this object, potentially casting to object
 
         """
+        if dtype is not None:
+            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
+
         return self.values.copy()
 
     _index_shared_docs['astype'] = """

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -917,7 +917,7 @@ def astype(self, dtype, copy=True):
             return Index(self.format(), name=self.name, dtype=object)
         elif is_period_dtype(dtype):
             return self.to_period(freq=dtype.freq)
-        raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
+        raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
 
     def _get_time_micros(self):
         values = self.asi8
@@ -957,12 +957,15 @@ def to_series(self, keep_tz=False):
                       index=self._shallow_copy(),
                       name=self.name)
 
-    def _to_embed(self, keep_tz=False):
+    def _to_embed(self, keep_tz=False, dtype=None):
         """
         return an array repr of this object, potentially casting to object
 
         This is for internal compat
         """
+        if dtype is not None:
+            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
+
         if keep_tz and self.tz is not None:
 
             # preserve the tz & copy

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -464,10 +464,14 @@ def __array_wrap__(self, result, context=None):
     def _box_func(self):
         return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
 
-    def _to_embed(self, keep_tz=False):
+    def _to_embed(self, keep_tz=False, dtype=None):
         """
         return an array repr of this object, potentially casting to object
         """
+
+        if dtype is not None:
+            return self.astype(dtype)._to_embed(keep_tz=keep_tz)
+
         return self.asobject.values
 
     @property
@@ -510,7 +514,7 @@ def astype(self, dtype, copy=True, how='start'):
             return self.to_timestamp(how=how).tz_localize(dtype.tz)
         elif is_period_dtype(dtype):
             return self.asfreq(freq=dtype.freq)
-        raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
+        raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
 
     @Substitution(klass='PeriodIndex')
     @Appender(_shared_docs['searchsorted'])

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -495,7 +495,7 @@ def astype(self, dtype, copy=True):
         elif is_integer_dtype(dtype):
             return Index(self.values.astype('i8', copy=copy), dtype='i8',
                          name=self.name)
-        raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
+        raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
 
     def union(self, other):
         """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -29,7 +29,8 @@
     _is_unorderable_exception,
     _ensure_platform_int,
     pandas_dtype)
-from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame
+from pandas.core.dtypes.generic import (
+    ABCSparseArray, ABCDataFrame, ABCIndexClass)
 from pandas.core.dtypes.cast import (
     maybe_upcast, infer_dtype_from_scalar,
     maybe_convert_platform,
@@ -184,8 +185,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                 if name is None:
                     name = data.name
 
-                data = data._to_embed(keep_tz=True)
-                copy = True
+                data = data._to_embed(keep_tz=True, dtype=dtype)
+                copy = False
             elif isinstance(data, np.ndarray):
                 pass
             elif isinstance(data, Series):
@@ -3139,7 +3140,9 @@ def _sanitize_index(data, index, copy=False):
     if len(data) != len(index):
         raise ValueError('Length of values does not match length of ' 'index')
 
-    if isinstance(data, PeriodIndex):
+    if isinstance(data, ABCIndexClass) and not copy:
+        pass
+    elif isinstance(data, PeriodIndex):
         data = data.asobject
     elif isinstance(data, DatetimeIndex):
         data = data._to_embed(keep_tz=True)
@@ -3209,12 +3212,11 @@ def _try_cast(arr, take_fast_path):
             # e.g. indexes can have different conversions (so don't fast path
             # them)
             # GH 6140
-            subarr = _sanitize_index(data, index, copy=True)
+            subarr = _sanitize_index(data, index, copy=copy)
         else:
-            subarr = _try_cast(data, True)
 
-        if copy:
-            subarr = data.copy()
+            # we will try to copy be-definition here
+            subarr = _try_cast(data, True)
 
     elif isinstance(data, Categorical):
         subarr = data

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -233,32 +233,13 @@ def test_construction_with_conversions(self):
 
         # convert from a numpy array of non-ns timedelta64
         arr = np.array([1, 2, 3], dtype='timedelta64[s]')
-        s = Series(arr)
-        expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s'))
-        assert_series_equal(s, expected)
-
         df = DataFrame(index=range(3))
         df['A'] = arr
         expected = DataFrame({'A': pd.timedelta_range('00:00:01', periods=3,
                                                       freq='s')},
                              index=range(3))
         assert_frame_equal(df, expected)
 
-        # convert from a numpy array of non-ns datetime64
-        # note that creating a numpy datetime64 is in LOCAL time!!!!
-        # seems to work for M8[D], but not for M8[s]
-
-        s = Series(np.array(['2013-01-01', '2013-01-02',
-                             '2013-01-03'], dtype='datetime64[D]'))
-        assert_series_equal(s, Series(date_range('20130101', periods=3,
-                                                 freq='D')))
-
-        # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01
-        # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]'))
-
-        # assert_series_equal(s,date_range('20130101
-        # 00:00:01',period=3,freq='s'))
-
         expected = DataFrame({
             'dt1': Timestamp('20130101'),
             'dt2': date_range('20130101', periods=3),
@@ -467,7 +448,7 @@ def test_convert_objects(self):
         self.mixed_frame['I'] = '1'
 
         # add in some items that will be nan
-        l = len(self.mixed_frame)
+        length = len(self.mixed_frame)
         self.mixed_frame['J'] = '1.'
         self.mixed_frame['K'] = '1'
         self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled'
@@ -476,8 +457,8 @@ def test_convert_objects(self):
         assert converted['I'].dtype == 'int64'
         assert converted['J'].dtype == 'float64'
         assert converted['K'].dtype == 'float64'
-        assert len(converted['J'].dropna()) == l - 5
-        assert len(converted['K'].dropna()) == l - 5
+        assert len(converted['J'].dropna()) == length - 5
+        assert len(converted['K'].dropna()) == length - 5
 
         # via astype
         converted = self.mixed_frame.copy()

diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py
@@ -130,11 +130,11 @@ def test_astype_raises(self):
         # GH 13149, GH 13209
         idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
 
-        pytest.raises(ValueError, idx.astype, float)
-        pytest.raises(ValueError, idx.astype, 'timedelta64')
-        pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
-        pytest.raises(ValueError, idx.astype, 'datetime64')
-        pytest.raises(ValueError, idx.astype, 'datetime64[D]')
+        pytest.raises(TypeError, idx.astype, float)
+        pytest.raises(TypeError, idx.astype, 'timedelta64')
+        pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
+        pytest.raises(TypeError, idx.astype, 'datetime64')
+        pytest.raises(TypeError, idx.astype, 'datetime64[D]')
 
     def test_index_convert_to_datetime_array(self):
         def _check_rng(rng):

diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
@@ -47,10 +47,10 @@ def test_astype_raises(self):
         # GH 13149, GH 13209
         idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
 
-        pytest.raises(ValueError, idx.astype, str)
-        pytest.raises(ValueError, idx.astype, float)
-        pytest.raises(ValueError, idx.astype, 'timedelta64')
-        pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
+        pytest.raises(TypeError, idx.astype, str)
+        pytest.raises(TypeError, idx.astype, float)
+        pytest.raises(TypeError, idx.astype, 'timedelta64')
+        pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
 
     def test_pickle_compat_construction(self):
         pass

diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py
@@ -66,10 +66,10 @@ def test_astype_raises(self):
         # GH 13149, GH 13209
         idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN])
 
-        pytest.raises(ValueError, idx.astype, float)
-        pytest.raises(ValueError, idx.astype, str)
-        pytest.raises(ValueError, idx.astype, 'datetime64')
-        pytest.raises(ValueError, idx.astype, 'datetime64[ns]')
+        pytest.raises(TypeError, idx.astype, float)
+        pytest.raises(TypeError, idx.astype, str)
+        pytest.raises(TypeError, idx.astype, 'datetime64')
+        pytest.raises(TypeError, idx.astype, 'datetime64[ns]')
 
     def test_pickle_compat_construction(self):
         pass