Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1690711: Support for Cortex functions sentiment, classify_text with apply #2729

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/snowflake/snowpark/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10149,3 +10149,35 @@ def snowflake_cortex_summarize(text: ColumnOrLiteralStr):
sql_func_name = "snowflake.cortex.summarize"
text_col = _to_col_if_lit(text, sql_func_name)
return builtin(sql_func_name)(text_col)


def snowflake_cortex_classify_text(input: ColumnOrLiteralStr, list_of_categories):
"""
Classifies free-form text into categories that you provide.
Args:
input: A string containing the English text from which a summary should be generated.
list_of_categories: Array that represents the categories. Must contain at least two and at most 100 unique
categories. Categories are case sensitive. If these requirements are not met, the function returns an error.
Returns:
Returns a string that contains a JSON object. The JSON object contains the category that the input prompt was
classified as. If invalid arguments are given, an error is returned.
"""
sql_func_name = "snowflake.cortex.classify_text"
input_col = _to_col_if_lit(input, sql_func_name)
return builtin(sql_func_name)(input_col, list_of_categories)


def snowflake_cortex_sentiment(text: ColumnOrLiteralStr):
"""
A string containing the text for which a sentiment score should be calculated.

Args:
text: A string containing the English text from which a summary should be generated.

Returns:
A floating-point number from -1 to 1 (inclusive) indicating the level of negative or positive sentiment in the
text. Values around 0 indicate neutral sentiment.
"""
sql_func_name = "snowflake.cortex.sentiment"
text_col = _to_col_if_lit(text, sql_func_name)
return builtin(sql_func_name)(text_col)
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
_log2,
_log10,
sin,
snowflake_cortex_sentiment,
snowflake_cortex_summarize,
snowflake_cortex_classify_text,
udf,
to_variant,
when,
Expand Down Expand Up @@ -110,7 +112,9 @@
floor,
trunc,
sqrt,
snowflake_cortex_sentiment,
snowflake_cortex_summarize,
snowflake_cortex_classify_text,
}


Expand Down
10 changes: 10 additions & 0 deletions tests/integ/modin/test_apply_snowpark_python_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,13 @@ def test_apply_snowflake_cortex_summarize():
summary = s.apply(snowflake_cortex_summarize).iloc[0]
# this length check is to get around the fact that this function may not be deterministic
assert 0 < len(summary) < len(content)


@sql_count_checker(query_count=1)
def test_apply_snowflake_cortex_sentiment():
from snowflake.snowpark.functions import snowflake_cortex_sentiment

content = "A very very bad review!"
s = pd.Series([content])
sentiment = s.apply(snowflake_cortex_sentiment).iloc[0]
assert -1 <= sentiment <= 1
Loading