15
15
is_extension_array_dtype ,
16
16
is_float ,
17
17
is_float_dtype ,
18
- is_integer ,
19
18
is_integer_dtype ,
20
19
is_list_like ,
21
20
is_numeric_dtype ,
22
- is_object_dtype ,
23
21
is_scalar ,
24
- is_string_dtype ,
25
22
pandas_dtype ,
26
23
)
27
24
from pandas .core .dtypes .dtypes import register_extension_dtype
28
25
from pandas .core .dtypes .generic import ABCDataFrame , ABCIndexClass , ABCSeries
29
26
from pandas .core .dtypes .missing import isna , notna
30
27
31
28
from pandas .core import nanops , ops
32
- from pandas .core .algorithms import take
33
- from pandas .core .arrays import ExtensionArray , ExtensionOpsMixin
34
- import pandas .core .common as com
35
- from pandas .core .indexers import check_bool_array_indexer
29
+
30
+ from .masked import BaseMaskedArray
36
31
37
32
if TYPE_CHECKING :
38
33
from pandas ._typing import Scalar
@@ -199,7 +194,7 @@ def coerce_to_array(values, mask=None, copy: bool = False):
199
194
return values , mask
200
195
201
196
202
- class BooleanArray (ExtensionArray , ExtensionOpsMixin ):
197
+ class BooleanArray (BaseMaskedArray ):
203
198
"""
204
199
Array of boolean (True/False) data with missing values.
205
200
@@ -253,6 +248,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
253
248
Length: 3, dtype: boolean
254
249
"""
255
250
251
+ # The value used to fill '_data' to avoid upcasting
252
+ _internal_fill_value = False
253
+
256
254
def __init__ (self , values : np .ndarray , mask : np .ndarray , copy : bool = False ):
257
255
if not (isinstance (values , np .ndarray ) and values .dtype == np .bool_ ):
258
256
raise TypeError (
@@ -297,127 +295,6 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
297
295
def _from_factorized (cls , values , original : "BooleanArray" ):
298
296
return cls ._from_sequence (values , dtype = original .dtype )
299
297
300
- def _formatter (self , boxed = False ):
301
- return str
302
-
303
- @property
304
- def _hasna (self ) -> bool :
305
- # Note: this is expensive right now! The hope is that we can
306
- # make this faster by having an optional mask, but not have to change
307
- # source code using it..
308
- return self ._mask .any ()
309
-
310
- def __getitem__ (self , item ):
311
- if is_integer (item ):
312
- if self ._mask [item ]:
313
- return self .dtype .na_value
314
- return self ._data [item ]
315
-
316
- elif com .is_bool_indexer (item ):
317
- item = check_bool_array_indexer (self , item )
318
-
319
- return type (self )(self ._data [item ], self ._mask [item ])
320
-
321
- def to_numpy (
322
- self , dtype = None , copy = False , na_value : "Scalar" = lib .no_default ,
323
- ):
324
- """
325
- Convert to a NumPy Array.
326
-
327
- By default converts to an object-dtype NumPy array. Specify the `dtype` and
328
- `na_value` keywords to customize the conversion.
329
-
330
- Parameters
331
- ----------
332
- dtype : dtype, default object
333
- The numpy dtype to convert to.
334
- copy : bool, default False
335
- Whether to ensure that the returned value is a not a view on
336
- the array. Note that ``copy=False`` does not *ensure* that
337
- ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
338
- a copy is made, even if not strictly necessary. This is typically
339
- only possible when no missing values are present and `dtype`
340
- is a boolean dtype.
341
- na_value : scalar, optional
342
- Scalar missing value indicator to use in numpy array. Defaults
343
- to the native missing value indicator of this array (pd.NA).
344
-
345
- Returns
346
- -------
347
- numpy.ndarray
348
-
349
- Examples
350
- --------
351
- An object-dtype is the default result
352
-
353
- >>> a = pd.array([True, False], dtype="boolean")
354
- >>> a.to_numpy()
355
- array([True, False], dtype=object)
356
-
357
- When no missing values are present, a boolean dtype can be used.
358
-
359
- >>> a.to_numpy(dtype="bool")
360
- array([ True, False])
361
-
362
- However, requesting a bool dtype will raise a ValueError if
363
- missing values are present and the default missing value :attr:`NA`
364
- is used.
365
-
366
- >>> a = pd.array([True, False, pd.NA], dtype="boolean")
367
- >>> a
368
- <BooleanArray>
369
- [True, False, NA]
370
- Length: 3, dtype: boolean
371
-
372
- >>> a.to_numpy(dtype="bool")
373
- Traceback (most recent call last):
374
- ...
375
- ValueError: cannot convert to bool numpy array in presence of missing values
376
-
377
- Specify a valid `na_value` instead
378
-
379
- >>> a.to_numpy(dtype="bool", na_value=False)
380
- array([ True, False, False])
381
- """
382
- if na_value is lib .no_default :
383
- na_value = libmissing .NA
384
- if dtype is None :
385
- dtype = object
386
- if self ._hasna :
387
- if (
388
- not (is_object_dtype (dtype ) or is_string_dtype (dtype ))
389
- and na_value is libmissing .NA
390
- ):
391
- raise ValueError (
392
- f"cannot convert to '{ dtype } '-dtype NumPy array "
393
- "with missing values. Specify an appropriate 'na_value' "
394
- "for this dtype."
395
- )
396
- # don't pass copy to astype -> always need a copy since we are mutating
397
- data = self ._data .astype (dtype )
398
- data [self ._mask ] = na_value
399
- else :
400
- data = self ._data .astype (dtype , copy = copy )
401
- return data
402
-
403
- __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
404
-
405
- def __array__ (self , dtype = None ):
406
- """
407
- the array interface, return my values
408
- We return an object array here to preserve our scalar values
409
- """
410
- # by default (no dtype specified), return an object array
411
- return self .to_numpy (dtype = dtype )
412
-
413
- def __arrow_array__ (self , type = None ):
414
- """
415
- Convert myself into a pyarrow Array.
416
- """
417
- import pyarrow as pa
418
-
419
- return pa .array (self ._data , mask = self ._mask , type = type )
420
-
421
298
_HANDLED_TYPES = (np .ndarray , numbers .Number , bool , np .bool_ )
422
299
423
300
def __array_ufunc__ (self , ufunc , method , * inputs , ** kwargs ):
@@ -465,40 +342,6 @@ def reconstruct(x):
465
342
else :
466
343
return reconstruct (result )
467
344
468
- def __iter__ (self ):
469
- for i in range (len (self )):
470
- if self ._mask [i ]:
471
- yield self .dtype .na_value
472
- else :
473
- yield self ._data [i ]
474
-
475
- def take (self , indexer , allow_fill = False , fill_value = None ):
476
- # we always fill with False internally
477
- # to avoid upcasting
478
- data_fill_value = False if isna (fill_value ) else fill_value
479
- result = take (
480
- self ._data , indexer , fill_value = data_fill_value , allow_fill = allow_fill
481
- )
482
-
483
- mask = take (self ._mask , indexer , fill_value = True , allow_fill = allow_fill )
484
-
485
- # if we are filling
486
- # we only fill where the indexer is null
487
- # not existing missing values
488
- # TODO(jreback) what if we have a non-na float as a fill value?
489
- if allow_fill and notna (fill_value ):
490
- fill_mask = np .asarray (indexer ) == - 1
491
- result [fill_mask ] = fill_value
492
- mask = mask ^ fill_mask
493
-
494
- return type (self )(result , mask , copy = False )
495
-
496
- def copy (self ):
497
- data , mask = self ._data , self ._mask
498
- data = data .copy ()
499
- mask = mask .copy ()
500
- return type (self )(data , mask , copy = False )
501
-
502
345
def __setitem__ (self , key , value ):
503
346
_is_scalar = is_scalar (value )
504
347
if _is_scalar :
@@ -512,26 +355,6 @@ def __setitem__(self, key, value):
512
355
self ._data [key ] = value
513
356
self ._mask [key ] = mask
514
357
515
- def __len__ (self ):
516
- return len (self ._data )
517
-
518
- @property
519
- def nbytes (self ):
520
- return self ._data .nbytes + self ._mask .nbytes
521
-
522
- def isna (self ):
523
- return self ._mask
524
-
525
- @property
526
- def _na_value (self ):
527
- return self ._dtype .na_value
528
-
529
- @classmethod
530
- def _concat_same_type (cls , to_concat ):
531
- data = np .concatenate ([x ._data for x in to_concat ])
532
- mask = np .concatenate ([x ._mask for x in to_concat ])
533
- return cls (data , mask )
534
-
535
358
def astype (self , dtype , copy = True ):
536
359
"""
537
360
Cast to a NumPy array or ExtensionArray with 'dtype'.
0 commit comments