@@ -243,11 +243,35 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
243243 result .sortorder = sortorder
244244
245245 if verify_integrity :
246- result ._verify_integrity ()
246+ new_codes = result ._verify_integrity ()
247+ result ._codes = new_codes
248+
247249 if _set_identity :
248250 result ._reset_identity ()
251+
249252 return result
250253
254+ def _validate_codes (self , level : list , code : list ):
255+ """
256+ Reassign code values as -1 if their corresponding levels are NaN.
257+
258+ Parameters
259+ ----------
260+ code : list
261+ Code to reassign.
262+ level : list
263+ Level to check for missing values (NaN, NaT, None).
264+
265+ Returns
266+ -------
267+ code : new code where code value = -1 if it corresponds
268+ to a level with missing values (NaN, NaT, None).
269+ """
270+ null_mask = isna (level )
271+ if np .any (null_mask ):
272+ code = np .where (null_mask [code ], - 1 , code )
273+ return code
274+
251275 def _verify_integrity (self , codes = None , levels = None ):
252276 """
253277
@@ -263,6 +287,11 @@ def _verify_integrity(self, codes=None, levels=None):
263287 ValueError
264288 If length of levels and codes don't match, if the codes for any
265289 level would exceed level bounds, or there are any duplicate levels.
290+
291+ Returns
292+ -------
293+ codes : new codes where code value = -1 if it corresponds to a
294+ NaN level.
266295 """
267296 # NOTE: Currently does not check, among other things, that cached
268297 # nlevels matches nor that sortorder matches actually sortorder.
@@ -272,22 +301,33 @@ def _verify_integrity(self, codes=None, levels=None):
272301 if len (levels ) != len (codes ):
273302 raise ValueError ("Length of levels and codes must match. NOTE:"
274303 " this index is in an inconsistent state." )
275- codes_length = len (self . codes [0 ])
304+ codes_length = len (codes [0 ])
276305 for i , (level , level_codes ) in enumerate (zip (levels , codes )):
277306 if len (level_codes ) != codes_length :
278307 raise ValueError ("Unequal code lengths: %s" %
279308 ([len (code_ ) for code_ in codes ]))
280309 if len (level_codes ) and level_codes .max () >= len (level ):
281- raise ValueError ("On level %d, code max (%d) >= length of"
282- " level (%d). NOTE: this index is in an"
283- " inconsistent state" % (i , level_codes .max (),
284- len (level )))
310+ msg = ("On level {level}, code max ({max_code}) >= length of "
311+ "level ({level_len}). NOTE: this index is in an "
312+ "inconsistent state" .format (
313+ level = i , max_code = level_codes .max (),
314+ level_len = len (level )))
315+ raise ValueError (msg )
316+ if len (level_codes ) and level_codes .min () < - 1 :
317+ raise ValueError ("On level {level}, code value ({code})"
318+ " < -1" .format (
319+ level = i , code = level_codes .min ()))
285320 if not level .is_unique :
286321 raise ValueError ("Level values must be unique: {values} on "
287322 "level {level}" .format (
288323 values = [value for value in level ],
289324 level = i ))
290325
326+ codes = [self ._validate_codes (level , code )
327+ for level , code in zip (levels , codes )]
328+ new_codes = FrozenList (codes )
329+ return new_codes
330+
291331 @classmethod
292332 def from_arrays (cls , arrays , sortorder = None , names = None ):
293333 """
@@ -586,7 +626,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True,
586626 new_levels = FrozenList (new_levels )
587627
588628 if verify_integrity :
589- self ._verify_integrity (levels = new_levels )
629+ new_codes = self ._verify_integrity (levels = new_levels )
630+ self ._codes = new_codes
590631
591632 names = self .names
592633 self ._levels = new_levels
@@ -676,7 +717,6 @@ def labels(self):
676717
677718 def _set_codes (self , codes , level = None , copy = False , validate = True ,
678719 verify_integrity = False ):
679-
680720 if validate and level is None and len (codes ) != self .nlevels :
681721 raise ValueError ("Length of codes must match number of levels" )
682722 if validate and level is not None and len (codes ) != len (level ):
@@ -696,9 +736,10 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
696736 new_codes = FrozenList (new_codes )
697737
698738 if verify_integrity :
699- self ._verify_integrity (codes = new_codes )
739+ new_codes = self ._verify_integrity (codes = new_codes )
700740
701741 self ._codes = new_codes
742+
702743 self ._tuples = None
703744 self ._reset_cache ()
704745
@@ -1763,9 +1804,10 @@ def __setstate__(self, state):
17631804
17641805 self ._set_levels ([Index (x ) for x in levels ], validate = False )
17651806 self ._set_codes (codes )
1807+ new_codes = self ._verify_integrity ()
1808+ self ._set_codes (new_codes )
17661809 self ._set_names (names )
17671810 self .sortorder = sortorder
1768- self ._verify_integrity ()
17691811 self ._reset_identity ()
17701812
17711813 def __getitem__ (self , key ):
0 commit comments