Skip to content

Commit 0baf047

Browse files
committed
WIP dtype cast
1 parent 10102e6 commit 0baf047

File tree

3 files changed

+57
-0
lines changed

3 files changed

+57
-0
lines changed

pandas/core/reshape/merge.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
is_numeric_dtype,
6262
is_object_dtype,
6363
is_string_dtype,
64+
is_unsigned_integer_dtype,
6465
needs_i8_conversion,
6566
)
6667
from pandas.core.dtypes.dtypes import (
@@ -1717,6 +1718,20 @@ def _maybe_coerce_merge_keys(self) -> None:
17171718
if is_numeric_dtype(lk.dtype) and is_numeric_dtype(rk.dtype):
17181719
if lk.dtype.kind == rk.dtype.kind:
17191720
continue
1721+
1722+
if(is_unsigned_integer_dtype(lk.dtype)):
1723+
try:
1724+
lk = lk.astype(rk.dtype)
1725+
continue
1726+
except TypeError as err:
1727+
raise err
1728+
elif(is_unsigned_integer_dtype(rk.dtype)):
1729+
try:
1730+
rk = rk.astype(lk.dtype)
1731+
# continue
1732+
except TypeError as err:
1733+
raise err
1734+
17201735

17211736
if isinstance(lk.dtype, ExtensionDtype) and not isinstance(
17221737
rk.dtype, ExtensionDtype

pandas/tests/arrays/integer/test_dtypes.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,29 @@ def test_astype_boolean():
297297
result = a.astype("boolean")
298298
expected = pd.array([True, False, True, True, None], dtype="boolean")
299299
tm.assert_extension_array_equal(result, expected)
300+
301+
signed_list = ["int64", "float64", "Int64", "Float64"]
302+
unsigned_list = ["uint64", "UInt64"]
303+
@pytest.mark.parametrize(
304+
"signed_dtype, unsigned_dtype",
305+
[(s_dt, u_dt) for s_dt in signed_list for u_dt in unsigned_list]
306+
)
307+
def test_unsigned_1to1_validation(signed_dtype, unsigned_dtype):
308+
#61688
309+
input1 = [1721088000012322083, 1721088047408560273, 1721088047408560451]
310+
input2 = 1
311+
df1 = pd.DataFrame()
312+
df2 = pd.DataFrame()
313+
df1["1"] = pd.Series(input1, dtype=signed_dtype) # Note different types here
314+
df2["1"] = pd.Series(input1, dtype=unsigned_dtype) # Note different types here
315+
df1["2"] = input2
316+
df2["2"] = input2
317+
318+
result = pd.merge(df1, df2, on=["1", "2"], how="left", validate="1:1")
319+
expected = pd.merge(df1, df2, on=["1"], how="left", validate="1:1")
320+
# expected = pd.merge(df1, df2, on=["1"], how="left", validate="1:1")
321+
322+
print(result)
323+
print(expected)
324+
#TODO add reverse tesetcase
325+
tm.assert_frame_equal(result, expected)

test.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pandas as pd
2+
import numpy as np
3+
4+
df1 = pd.DataFrame()
5+
df2 = pd.DataFrame()
6+
df1["t"] = np.array([1721088000012322083, 1721088047408560273, 1721088047408560451], dtype=np.int64) # Note different types here
7+
df2["t"] = np.array([1721088000012322083, 1721088047408560273, 1721088047408560451], dtype=np.uint64) # Note different types here
8+
# df1["t"] = np.array([32765,32766, 32767], dtype=np.int16) # Note different types here
9+
# df2["t"] = np.array([32765,32766, 32767], dtype=np.uint16) # Note different types here
10+
df1["i"] = 1
11+
df2["i"] = 1
12+
df1["p"] = [3, 6, 2]
13+
df2["q"] = [1, 2, 2]
14+
15+
print(pd.merge(df1, df2, on=["i", "t"], how="left", validate="1:1"))
16+
print(pd.merge(df1, df2, on=["t"], how="left", validate="1:1"))

0 commit comments

Comments
 (0)