Skip to content

Commit f3f9b66

Browse files
SNOW-1902019: Python CVEs january batch 2 (#2155)
Co-authored-by: Yijun Xie <yijun.xie@snowflake.com>
1 parent 3769b43 commit f3f9b66

File tree

3 files changed

+148
-36
lines changed

3 files changed

+148
-36
lines changed

src/snowflake/connector/cursor.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,7 @@ def execute(
875875
_skip_upload_on_content_match: bool = False,
876876
file_stream: IO[bytes] | None = None,
877877
num_statements: int | None = None,
878+
_force_qmark_paramstyle: bool = False,
878879
_dataframe_ast: str | None = None,
879880
) -> Self | dict[str, Any] | None:
880881
"""Executes a command/query.
@@ -910,6 +911,7 @@ def execute(
910911
file_stream: File-like object to be uploaded with PUT
911912
num_statements: Query level parameter submitted in _statement_params constraining exact number of
912913
statements being submitted (or 0 if submitting an uncounted number) when using a multi-statement query.
914+
_force_qmark_paramstyle: Force the use of qmark paramstyle regardless of the connection's paramstyle.
913915
_dataframe_ast: Base64-encoded dataframe request abstract syntax tree.
914916
915917
Returns:
@@ -958,7 +960,7 @@ def execute(
958960
"dataframe_ast": _dataframe_ast,
959961
}
960962

961-
if self._connection.is_pyformat:
963+
if self._connection.is_pyformat and not _force_qmark_paramstyle:
962964
query = self._preprocess_pyformat_query(command, params)
963965
else:
964966
# qmark and numeric paramstyle
@@ -1458,7 +1460,9 @@ def executemany(
14581460
else:
14591461
if re.search(";/s*$", command) is None:
14601462
command = command + "; "
1461-
if self._connection.is_pyformat:
1463+
if self._connection.is_pyformat and not kwargs.get(
1464+
"_force_qmark_paramstyle", False
1465+
):
14621466
processed_queries = [
14631467
self._preprocess_pyformat_query(command, params)
14641468
for params in seqparams

src/snowflake/connector/pandas_tools.py

+93-29
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,16 @@ def _do_create_temp_stage(
8585
overwrite: bool,
8686
use_scoped_temp_object: bool,
8787
) -> None:
88-
create_stage_sql = f"CREATE {get_temp_type_for_object(use_scoped_temp_object)} STAGE /* Python:snowflake.connector.pandas_tools.write_pandas() */ {stage_location} FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''})"
89-
logger.debug(f"creating stage with '{create_stage_sql}'")
90-
cursor.execute(create_stage_sql, _is_internal=True).fetchall()
88+
create_stage_sql = f"CREATE {get_temp_type_for_object(use_scoped_temp_object)} STAGE /* Python:snowflake.connector.pandas_tools.write_pandas() */ identifier(?) FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''})"
89+
params = (stage_location,)
90+
logger.debug(f"creating stage with '{create_stage_sql}'. params: %s", params)
91+
cursor.execute(
92+
create_stage_sql,
93+
_is_internal=True,
94+
_force_qmark_paramstyle=True,
95+
params=params,
96+
num_statements=1,
97+
)
9198

9299

93100
def _create_temp_stage(
@@ -147,12 +154,19 @@ def _do_create_temp_file_format(
147154
use_scoped_temp_object: bool,
148155
) -> None:
149156
file_format_sql = (
150-
f"CREATE {get_temp_type_for_object(use_scoped_temp_object)} FILE FORMAT {file_format_location} "
157+
f"CREATE {get_temp_type_for_object(use_scoped_temp_object)} FILE FORMAT identifier(?) "
151158
f"/* Python:snowflake.connector.pandas_tools.write_pandas() */ "
152159
f"TYPE=PARQUET COMPRESSION={compression}{sql_use_logical_type}"
153160
)
154-
logger.debug(f"creating file format with '{file_format_sql}'")
155-
cursor.execute(file_format_sql, _is_internal=True)
161+
params = (file_format_location,)
162+
logger.debug(f"creating file format with '{file_format_sql}'. params: %s", params)
163+
cursor.execute(
164+
file_format_sql,
165+
_is_internal=True,
166+
_force_qmark_paramstyle=True,
167+
params=params,
168+
num_statements=1,
169+
)
156170

157171

158172
def _create_temp_file_format(
@@ -379,14 +393,20 @@ def write_pandas(
379393
# Upload parquet file
380394
upload_sql = (
381395
"PUT /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
382-
"'file://{path}' @{stage_location} PARALLEL={parallel}"
396+
"'file://{path}' ? PARALLEL={parallel}"
383397
).format(
384398
path=chunk_path.replace("\\", "\\\\").replace("'", "\\'"),
385-
stage_location=stage_location,
386399
parallel=parallel,
387400
)
388-
logger.debug(f"uploading files with '{upload_sql}'")
389-
cursor.execute(upload_sql, _is_internal=True)
401+
params = ("@" + stage_location,)
402+
logger.debug(f"uploading files with '{upload_sql}', params: %s", params)
403+
cursor.execute(
404+
upload_sql,
405+
_is_internal=True,
406+
_force_qmark_paramstyle=True,
407+
params=params,
408+
num_statements=1,
409+
)
390410
# Remove chunk file
391411
os.remove(chunk_path)
392412

@@ -403,9 +423,16 @@ def write_pandas(
403423
columns = quote + f"{quote},{quote}".join(snowflake_column_names) + quote
404424

405425
def drop_object(name: str, object_type: str) -> None:
406-
drop_sql = f"DROP {object_type.upper()} IF EXISTS {name} /* Python:snowflake.connector.pandas_tools.write_pandas() */"
407-
logger.debug(f"dropping {object_type} with '{drop_sql}'")
408-
cursor.execute(drop_sql, _is_internal=True)
426+
drop_sql = f"DROP {object_type.upper()} IF EXISTS identifier(?) /* Python:snowflake.connector.pandas_tools.write_pandas() */"
427+
params = (name,)
428+
logger.debug(f"dropping {object_type} with '{drop_sql}'. params: %s", params)
429+
cursor.execute(
430+
drop_sql,
431+
_is_internal=True,
432+
_force_qmark_paramstyle=True,
433+
params=params,
434+
num_statements=1,
435+
)
409436

410437
if auto_create_table or overwrite:
411438
file_format_location = _create_temp_file_format(
@@ -417,10 +444,17 @@ def drop_object(name: str, object_type: str) -> None:
417444
sql_use_logical_type,
418445
_use_scoped_temp_object,
419446
)
420-
infer_schema_sql = f"SELECT COLUMN_NAME, TYPE FROM table(infer_schema(location=>'@{stage_location}', file_format=>'{file_format_location}'))"
421-
logger.debug(f"inferring schema with '{infer_schema_sql}'")
447+
infer_schema_sql = "SELECT COLUMN_NAME, TYPE FROM table(infer_schema(location=>?, file_format=>?))"
448+
params = (f"@{stage_location}", file_format_location)
449+
logger.debug(f"inferring schema with '{infer_schema_sql}'. params: %s", params)
422450
column_type_mapping = dict(
423-
cursor.execute(infer_schema_sql, _is_internal=True).fetchall()
451+
cursor.execute(
452+
infer_schema_sql,
453+
_is_internal=True,
454+
_force_qmark_paramstyle=True,
455+
params=params,
456+
num_statements=1,
457+
).fetchall()
424458
)
425459
# Infer schema can return the columns out of order depending on the chunking we do when uploading
426460
# so we have to iterate through the dataframe columns to make sure we create the table with its
@@ -440,12 +474,21 @@ def drop_object(name: str, object_type: str) -> None:
440474
)
441475

442476
create_table_sql = (
443-
f"CREATE {table_type.upper()} TABLE IF NOT EXISTS {target_table_location} "
477+
f"CREATE {table_type.upper()} TABLE IF NOT EXISTS identifier(?) "
444478
f"({create_table_columns})"
445479
f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
446480
)
447-
logger.debug(f"auto creating table with '{create_table_sql}'")
448-
cursor.execute(create_table_sql, _is_internal=True)
481+
params = (target_table_location,)
482+
logger.debug(
483+
f"auto creating table with '{create_table_sql}'. params: %s", params
484+
)
485+
cursor.execute(
486+
create_table_sql,
487+
_is_internal=True,
488+
_force_qmark_paramstyle=True,
489+
params=params,
490+
num_statements=1,
491+
)
449492
# need explicit casting when the underlying table schema is inferred
450493
parquet_columns = "$1:" + ",$1:".join(
451494
f"{quote}{snowflake_col}{quote}::{column_type_mapping[col]}"
@@ -464,12 +507,19 @@ def drop_object(name: str, object_type: str) -> None:
464507

465508
try:
466509
if overwrite and (not auto_create_table):
467-
truncate_sql = f"TRUNCATE TABLE {target_table_location} /* Python:snowflake.connector.pandas_tools.write_pandas() */"
468-
logger.debug(f"truncating table with '{truncate_sql}'")
469-
cursor.execute(truncate_sql, _is_internal=True)
510+
truncate_sql = "TRUNCATE TABLE identifier(?) /* Python:snowflake.connector.pandas_tools.write_pandas() */"
511+
params = (target_table_location,)
512+
logger.debug(f"truncating table with '{truncate_sql}'. params: %s", params)
513+
cursor.execute(
514+
truncate_sql,
515+
_is_internal=True,
516+
_force_qmark_paramstyle=True,
517+
params=params,
518+
num_statements=1,
519+
)
470520

471521
copy_into_sql = (
472-
f"COPY INTO {target_table_location} /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
522+
f"COPY INTO identifier(?) /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
473523
f"({columns}) "
474524
f"FROM (SELECT {parquet_columns} FROM @{stage_location}) "
475525
f"FILE_FORMAT=("
@@ -478,10 +528,17 @@ def drop_object(name: str, object_type: str) -> None:
478528
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
479529
f"{sql_use_logical_type}"
480530
f") "
481-
f"PURGE=TRUE ON_ERROR={on_error}"
531+
f"PURGE=TRUE ON_ERROR=?"
482532
)
483-
logger.debug(f"copying into with '{copy_into_sql}'")
484-
copy_results = cursor.execute(copy_into_sql, _is_internal=True).fetchall()
533+
params = (target_table_location, on_error)
534+
logger.debug(f"copying into with '{copy_into_sql}'. params: %s", params)
535+
copy_results = cursor.execute(
536+
copy_into_sql,
537+
_is_internal=True,
538+
_force_qmark_paramstyle=True,
539+
params=params,
540+
num_statements=1,
541+
).fetchall()
485542

486543
if overwrite and auto_create_table:
487544
original_table_location = build_location_helper(
@@ -491,9 +548,16 @@ def drop_object(name: str, object_type: str) -> None:
491548
quote_identifiers=quote_identifiers,
492549
)
493550
drop_object(original_table_location, "table")
494-
rename_table_sql = f"ALTER TABLE {target_table_location} RENAME TO {original_table_location} /* Python:snowflake.connector.pandas_tools.write_pandas() */"
495-
logger.debug(f"rename table with '{rename_table_sql}'")
496-
cursor.execute(rename_table_sql, _is_internal=True)
551+
rename_table_sql = "ALTER TABLE identifier(?) RENAME TO identifier(?) /* Python:snowflake.connector.pandas_tools.write_pandas() */"
552+
params = (target_table_location, original_table_location)
553+
logger.debug(f"rename table with '{rename_table_sql}'. params: %s", params)
554+
cursor.execute(
555+
rename_table_sql,
556+
_is_internal=True,
557+
_force_qmark_paramstyle=True,
558+
params=params,
559+
num_statements=1,
560+
)
497561
except ProgrammingError:
498562
if overwrite and auto_create_table:
499563
# drop table only if we created a new one with a random name

test/integ/pandas/test_pandas_tools.py

+49-5
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def assert_result_equals(
6464

6565

6666
def test_fix_snow_746341(
67-
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]]
67+
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
6868
):
6969
cat = '"cat"'
7070
df = pandas.DataFrame([[1], [2]], columns=[f"col_'{cat}'"])
@@ -534,8 +534,7 @@ def test_table_location_building(
534534

535535
def mocked_execute(*args, **kwargs):
536536
if len(args) >= 1 and args[0].startswith("COPY INTO"):
537-
location = args[0].split(" ")[2]
538-
assert location == expected_location
537+
assert kwargs["params"][0] == expected_location
539538
cur = SnowflakeCursor(cnx)
540539
cur._result = iter([])
541540
return cur
@@ -906,7 +905,7 @@ def test_auto_create_table_similar_column_names(
906905

907906

908907
def test_all_pandas_types(
909-
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]]
908+
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
910909
):
911910
table_name = random_string(5, "all_types_")
912911
datetime_with_tz = datetime(1997, 6, 3, 14, 21, 32, 00, tzinfo=timezone.utc)
@@ -997,7 +996,7 @@ def test_no_create_internal_object_privilege_in_target_schema(
997996
def mock_execute(*args, **kwargs):
998997
if (
999998
f"CREATE TEMP {object_type}" in args[0]
1000-
and "target_schema_no_create_" in args[0]
999+
and "target_schema_no_create_" in kwargs["params"][0]
10011000
):
10021001
raise ProgrammingError("Cannot create temp object in target schema")
10031002
cursor = cnx.cursor()
@@ -1027,3 +1026,48 @@ def mock_execute(*args, **kwargs):
10271026
finally:
10281027
cnx.execute_string(f"drop schema if exists {source_schema}")
10291028
cnx.execute_string(f"drop schema if exists {target_schema}")
1029+
1030+
1031+
def test_write_pandas_with_on_error(
1032+
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
1033+
):
1034+
"""Tests whether overwriting table using a Pandas DataFrame works as expected."""
1035+
random_table_name = random_string(5, "userspoints_")
1036+
df_data = [("Dash", 50)]
1037+
df = pandas.DataFrame(df_data, columns=["name", "points"])
1038+
1039+
table_name = random_table_name
1040+
col_id = "id"
1041+
col_name = "name"
1042+
col_points = "points"
1043+
1044+
create_sql = (
1045+
f"CREATE OR REPLACE TABLE {table_name}"
1046+
f"({col_name} STRING, {col_points} INT, {col_id} INT AUTOINCREMENT)"
1047+
)
1048+
1049+
select_count_sql = f"SELECT count(*) FROM {table_name}"
1050+
drop_sql = f"DROP TABLE IF EXISTS {table_name}"
1051+
with conn_cnx() as cnx: # type: SnowflakeConnection
1052+
cnx.execute_string(create_sql)
1053+
try:
1054+
# Write dataframe with 1 row
1055+
success, nchunks, nrows, _ = write_pandas(
1056+
cnx,
1057+
df,
1058+
random_table_name,
1059+
quote_identifiers=False,
1060+
auto_create_table=False,
1061+
overwrite=True,
1062+
index=True,
1063+
on_error="continue",
1064+
)
1065+
# Check write_pandas output
1066+
assert success
1067+
assert nchunks == 1
1068+
assert nrows == 1
1069+
result = cnx.cursor(DictCursor).execute(select_count_sql).fetchone()
1070+
# Check number of rows
1071+
assert result["COUNT(*)"] == 1
1072+
finally:
1073+
cnx.execute_string(drop_sql)

0 commit comments

Comments
 (0)