From 2dad7094c39eede24202701e3dd1cd45d97b777f Mon Sep 17 00:00:00 2001 From: Duncan Blythe Date: Thu, 30 Jan 2025 15:49:05 +0100 Subject: [PATCH] [PLUGINS] Bump Version [snowflake] --- .../superduper_snowflake/__init__.py | 2 +- .../superduper_snowflake/data_backend.py | 31 ++++++++++++++++--- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/plugins/snowflake/superduper_snowflake/__init__.py b/plugins/snowflake/superduper_snowflake/__init__.py index 117893925..7d1cc5632 100644 --- a/plugins/snowflake/superduper_snowflake/__init__.py +++ b/plugins/snowflake/superduper_snowflake/__init__.py @@ -1,7 +1,7 @@ from .vector_search import SnowflakeVectorSearcher as VectorSearcher from .data_backend import SnowflakeDataBackend as DataBackend -__version__ = "0.5.13" +__version__ = "0.5.14" __all__ = [ "VectorSearcher", diff --git a/plugins/snowflake/superduper_snowflake/data_backend.py b/plugins/snowflake/superduper_snowflake/data_backend.py index 2f24f40e3..e8316d42d 100644 --- a/plugins/snowflake/superduper_snowflake/data_backend.py +++ b/plugins/snowflake/superduper_snowflake/data_backend.py @@ -82,16 +82,37 @@ def reconnect(self): super().reconnect() self.snowpark = self._get_snowpark_session(self.uri) + # def insert(self, table_name, raw_documents): + # ibis_schema = self.conn.table(table_name).schema() + # df = pandas.DataFrame(raw_documents) + # rows = list(df.itertuples(index=False, name=None)) + # columns = list(ibis_schema.keys()) + # df = df.to_dict(orient='records') + # get_row = lambda row: [row[col] for col in columns] + # rows = list(map(get_row, df)) + # snowpark_cols = ibis_schema_to_snowpark_cols(ibis_schema) + # snowpark_schema = snowpark_cols_to_schema(snowpark_cols, columns) + # native_df = self.snowpark.create_dataframe(rows, schema=snowpark_schema) + # return native_df.write.saveAsTable(f'"{table_name}"', mode='append') + def insert(self, table_name, raw_documents): ibis_schema = self.conn.table(table_name).schema() df = pandas.DataFrame(raw_documents) - rows = list(df.itertuples(index=False, name=None)) + + # Ensure columns match the expected schema columns = list(ibis_schema.keys()) - df = df.to_dict(orient='records') - get_row = lambda row: [row[col] for col in columns] - rows = list(map(get_row, df)) + df = df[columns] # Keep only relevant columns + + # Convert to list of lists + rows = df.values.tolist() + + # Convert schema for Snowpark snowpark_cols = ibis_schema_to_snowpark_cols(ibis_schema) snowpark_schema = snowpark_cols_to_schema(snowpark_cols, columns) + + # Create Snowpark DataFrame native_df = self.snowpark.create_dataframe(rows, schema=snowpark_schema) - return native_df.write.saveAsTable(f'"{table_name}"', mode='append') + + # Save table with correct format + return native_df.write.saveAsTable(f'"{table_name}"', mode='append')