Skip to content

Commit

Permalink
Fix pandas large ints (ClickHouse#454)
Browse files Browse the repository at this point in the history
* Fix pandas inserts into big int columns

* Tinkering with pandas insert data types
  • Loading branch information
genzgd authored and Yibo-Chen13 committed Jan 21, 2025
1 parent 4af225e commit 891cfaa
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ release (0.9.0), unrecognized arguments/keywords for these methods of creating a
instead of being passed as ClickHouse server settings. This is in conjunction with some refactoring in Client construction.
The supported method of passing ClickHouse server settings is to prefix such arguments/query parameters with`ch_`.

## 0.8.14, 2025-01-13
### Bug Fix
- Fix an edge case where a Pandas dataframe that contains _only_ Int64 (or smaller) values would cause an exception when
inserting into a ClickHouse "big int" table of U/Int128/256. Closes https://github.com/ClickHouse/clickhouse-connect/issues/452

## 0.8.13, 2025-01-07
### Bug Fix
- Fix missing default for new access_token parameter. Thanks to [Lukas Thaler](https://github.com/lukasthalerINNIO) for the PR.
Expand Down
5 changes: 3 additions & 2 deletions tests/integration_tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,14 @@ def test_pandas_low_card(test_client: Client, table_context: Callable):


def test_pandas_large_types(test_client: Client, table_context: Callable):
columns = ['key string', 'value int256']
columns = ['key String', 'value Int256', 'u_value UInt256'
]
key2_value = 30000000000000000000000000000000000
# if not test_client.min_version('21'):
# columns = ['key string', 'value int64']
# key2_value = 3000000000000000000
with table_context('test_pandas_big_int', columns):
df = pd.DataFrame([['key1', 2000], ['key2', key2_value]], columns=['key', 'value'])
df = pd.DataFrame([['key1', 2000, 50], ['key2', key2_value, 70], ['key3', -2350, 70]], columns=['key', 'value', 'u_value'])
source_df = df.copy()
test_client.insert_df('test_pandas_big_int', df)
result_df = test_client.query_df('SELECT * except _tp_time FROM test_pandas_big_int WHERE _tp_time > earliest_ts() LIMIT 3')
Expand Down
2 changes: 1 addition & 1 deletion timeplus_connect/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '0.8.13'
version = '0.8.14'
1 change: 1 addition & 0 deletions timeplus_connect/datatypes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
class BigInt(TimeplusType, registered=False):
_signed = True
valid_formats = 'string', 'native'
python_type = int

def __init_subclass__(cls,registered: bool = True):
cls.pd_type = cls.__name__
Expand Down
6 changes: 3 additions & 3 deletions timeplus_connect/driver/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ def _convert_pandas(self, df):
data = []
for df_col_name, col_name, ch_type in zip(df.columns, self.column_names, self.column_types):
df_col = df[df_col_name]
d_type = str(df_col.dtype)
d_type_kind = df_col.dtype.kind
if ch_type.python_type == int:
if d_type_kind == 'f':
df_col = df_col.round().astype(ch_type.pd_type , copy=False)
df_col = df_col.round().astype(ch_type.base_type, copy=False)
elif d_type_kind in ('i', 'u') and not df_col.hasnans:
data.append(df_col.to_list())
continue
Expand All @@ -161,7 +161,7 @@ def _convert_pandas(self, df):
self.column_formats[col_name] = 'int'
continue
if ch_type.nullable:
if d_type == 'object':
if d_type_kind == 'O':
# This is ugly, but the multiple replaces seem required as a result of this bug:
# https://github.com/pandas-dev/pandas/issues/29024
df_col = df_col.replace({pd.NaT: None}).replace({np.nan: None})
Expand Down

0 comments on commit 891cfaa

Please sign in to comment.