@@ -878,7 +878,7 @@ def _get_merge_keys(self):
878878 return left_keys , right_keys , join_names
879879
880880 def _maybe_coerce_merge_keys (self ):
881- # we have valid mergee's but we may have to further
881+ # we have valid mergees but we may have to further
882882 # coerce these if they are originally incompatible types
883883 #
884884 # for example if these are categorical, but are not dtype_equal
@@ -890,12 +890,16 @@ def _maybe_coerce_merge_keys(self):
890890 if (len (lk ) and not len (rk )) or (not len (lk ) and len (rk )):
891891 continue
892892
893+ lk_is_cat = is_categorical_dtype (lk )
894+ rk_is_cat = is_categorical_dtype (rk )
895+
893896 # if either left or right is a categorical
894897 # then the must match exactly in categories & ordered
895- if is_categorical_dtype ( lk ) and is_categorical_dtype ( rk ) :
898+ if lk_is_cat and rk_is_cat :
896899 if lk .is_dtype_equal (rk ):
897900 continue
898- elif is_categorical_dtype (lk ) or is_categorical_dtype (rk ):
901+
902+ elif lk_is_cat or rk_is_cat :
899903 pass
900904
901905 elif is_dtype_equal (lk .dtype , rk .dtype ):
@@ -905,7 +909,7 @@ def _maybe_coerce_merge_keys(self):
905909 # kinds to proceed, eg. int64 and int8
906910 # further if we are object, but we infer to
907911 # the same, then proceed
908- if ( is_numeric_dtype (lk ) and is_numeric_dtype (rk ) ):
912+ if is_numeric_dtype (lk ) and is_numeric_dtype (rk ):
909913 if lk .dtype .kind == rk .dtype .kind :
910914 continue
911915
@@ -914,13 +918,20 @@ def _maybe_coerce_merge_keys(self):
914918 continue
915919
916920 # Houston, we have a problem!
917- # let's coerce to object
921+ # let's coerce to object if the dtypes aren't
922+ # categorical, otherwise coerce to the category
923+ # dtype. If we coerced categories to object,
924+ # then we would lose type information on some
925+ # columns, and end up trying to merge
926+ # incompatible dtypes. See GH 16900.
918927 if name in self .left .columns :
928+ typ = lk .categories .dtype if lk_is_cat else object
919929 self .left = self .left .assign (
920- ** {name : self .left [name ].astype (object )})
930+ ** {name : self .left [name ].astype (typ )})
921931 if name in self .right .columns :
932+ typ = rk .categories .dtype if rk_is_cat else object
922933 self .right = self .right .assign (
923- ** {name : self .right [name ].astype (object )})
934+ ** {name : self .right [name ].astype (typ )})
924935
925936 def _validate_specification (self ):
926937 # Hm, any way to make this logic less complicated??
0 commit comments