diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 324f30ed00bed..a73b5a9736e07 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -29,6 +29,7 @@ _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype, is_internal_type, is_datetimetz, _possibly_infer_to_datetimelike, _dict_compat) +from pandas.core.dtypes import ExtensionDtype from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, @@ -771,6 +772,36 @@ def dot(self, other): else: # pragma: no cover raise TypeError('unsupported type: %s' % type(other)) + @classmethod + def empty(cls, shape, dtype=float): + """ + Return a new DataFrame of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty DataFrame + dtype : data-type, optional + Desired output data-type + + Returns + ------- + out : DataFrame + DataFrame of uninitialized (arbitrary) data + with the given shape and dtype. + + See Also + -------- + numpy.empty : initializes an empty array of given shape and type + Series.empty : initializes an empty Series of given length and type + + """ + if ExtensionDtype.is_dtype(dtype): + return cls(np.empty(shape, dtype=object).tolist(), dtype=dtype) + + else: + return cls(np.empty(shape, dtype=dtype)) + # ---------------------------------------------------------------------- # IO methods (to / from other formats) diff --git a/pandas/core/series.py b/pandas/core/series.py index 286a2ba79585a..a0b08bac6a6dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,6 +26,7 @@ _coerce_to_dtype, SettingWithCopyError, _maybe_box_datetimelike, ABCDataFrame, _dict_compat) +from pandas.core.dtypes import ExtensionDtype from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -243,6 +244,36 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) + @classmethod + def empty(cls, length, dtype=float): + """ + Return a new Series of given length and type, without initializing entries. + + Parameters + ---------- + length : int + Length of the empty Series + dtype : data-type, optional + Desired output data-type + + Returns + ------- + out : Series + Series of uninitialized (arbitrary) data + with the given length and dtype. + + See Also + -------- + numpy.empty : initializes an empty array of given shape and type + DataFrame.empty : initializes an empty DataFrame of given shape and type + + """ + if ExtensionDtype.is_dtype(dtype): + return cls.from_array(np.empty(length, dtype=object), dtype=dtype) + + else: + return cls(np.empty(length, dtype=dtype)) + @property def _constructor(self): return Series diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 87c263e129361..1e8560aa77bf7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1995,3 +1995,24 @@ def test_from_records_len0_with_columns(self): self.assertTrue(np.array_equal(result.columns, ['bar'])) self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') + + def test_empty(self): + from pandas.core.dtypes import DatetimeTZDtype + + df = DataFrame({'dt': pd.date_range( + "2015-01-01", periods=3, tz='Europe/Brussels')}) + dt = df.values.dtype + + params = [ + (dt, (4, 8), dt), + (None, (7, 1), float), + (np.int64, (3, 5), np.int64), + (DatetimeTZDtype, (2, 3), object), + ] + + for in_dt, shape, out_dt in params: + df = DataFrame.empty(shape, dtype=in_dt) + self.assertEqual(df.shape, shape) + + for col in df.columns: + self.assertEqual(df[col].dtype, out_dt) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c5783779c67c8..f7af65f97415c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -706,3 +706,22 @@ def f(): self.assertEqual(s.dtype, 'timedelta64[ns]') s = Series([pd.NaT, np.nan, '1 Day']) self.assertEqual(s.dtype, 'timedelta64[ns]') + + def test_empty(self): + from pandas.core.dtypes import DatetimeTZDtype + + df = pd.DataFrame({'dt': pd.date_range( + "2015-01-01", periods=3, tz='Europe/Brussels')}) + dt = df.values.dtype + + params = [ + (dt, 6, dt), + (None, 3, float), + (np.int64, 10, np.int64), + (DatetimeTZDtype, 5, object), + ] + + for in_dt, length, out_dt in params: + s = Series.empty(length, dtype=in_dt) + self.assertEqual(s.size, length) + self.assertEqual(s.dtype, out_dt)