-
Notifications
You must be signed in to change notification settings - Fork 147
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
372 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
-- Extract entities example: https://github.com/anthropics/anthropic-cookbook/tree/main/tool_use | ||
\getenv anthropic_api_key ANTHROPIC_API_KEY | ||
|
||
SELECT ai.anthropic_generate( 'claude-3-5-sonnet-20240620' | ||
, jsonb_build_array( | ||
jsonb_build_object( | ||
'role', 'user', | ||
'content', 'John works at Google in New York. He met with Sarah, the CEO of Acme Inc., last week in San Francisco.' | ||
) | ||
) | ||
, _max_tokens => 4096 | ||
, _api_key => $1 | ||
, _tools => jsonb_build_array( | ||
jsonb_build_object( | ||
'name', 'anonymize_recognized_entities', | ||
'description', 'Anonymize recognized entities like people names, locations, companies. The output should be the original text with entities replaced by the entities recognized in the input text. Example input: John works at Google in New York. Example output: :PERSON works at :COMPANY in :CITY.', | ||
'input_schema', jsonb_build_object( | ||
'type', 'object', | ||
'anonymized', jsonb_build_object( | ||
'type', 'text', | ||
'description', 'The original text anonymized with entities replaced by placeholders with the type of entity recognized.' | ||
), | ||
'required', jsonb_build_array('anonimized_text') | ||
) | ||
) | ||
) | ||
) AS result | ||
\bind :anthropic_api_key | ||
\g |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
-- Extract entities example: https://github.com/anthropics/anthropic-cookbook/tree/main/tool_use | ||
\getenv anthropic_api_key ANTHROPIC_API_KEY | ||
|
||
CREATE OR REPLACE FUNCTION public.detect_entities(input_text text) | ||
RETURNS TABLE(entity_name text, entity_type text, entity_context text) | ||
AS $$ | ||
DECLARE | ||
api_response jsonb; | ||
entities_json jsonb; | ||
BEGIN | ||
SELECT ai.anthropic_generate( | ||
'claude-3-5-sonnet-20240620', | ||
jsonb_build_array( | ||
jsonb_build_object( | ||
'role', 'user', | ||
'content', input_text | ||
) | ||
), | ||
_max_tokens => 4096, | ||
_tools => jsonb_build_array( | ||
jsonb_build_object( | ||
'name', 'print_entities', | ||
'description', 'Prints extract named entities.', | ||
'input_schema', jsonb_build_object( | ||
'type', 'object', | ||
'properties', jsonb_build_object( | ||
'entities', jsonb_build_object( | ||
'type', 'array', | ||
'items', jsonb_build_object( | ||
'type', 'object', | ||
'properties', jsonb_build_object( | ||
'name', jsonb_build_object('type', 'string', 'description', 'The extracted entity name.'), | ||
'type', jsonb_build_object('type', 'string', 'description', 'The entity type (e.g., PERSON, ORGANIZATION, LOCATION).'), | ||
'context', jsonb_build_object('type', 'string', 'description', 'The context in which the entity appears in the text.') | ||
), | ||
'required', jsonb_build_array('name', 'type', 'context') | ||
) | ||
) | ||
), | ||
'required', jsonb_build_array('entities') | ||
) | ||
) | ||
) | ||
) INTO api_response; | ||
|
||
entities_json := jsonb_extract_path_text(api_response::jsonb, 'content', '1', 'input', 'entities')::jsonb; | ||
|
||
RETURN QUERY | ||
SELECT | ||
e->>'name' AS entity_name, | ||
e->>'type' AS entity_type, | ||
e->>'context' AS entity_context | ||
FROM jsonb_array_elements(entities_json) AS e; | ||
|
||
EXCEPTION | ||
WHEN OTHERS THEN | ||
RAISE NOTICE 'An error occurred: %', SQLERRM; | ||
RAISE NOTICE 'API Response: %', api_response; | ||
RETURN; | ||
END; | ||
$$ LANGUAGE plpgsql; | ||
|
||
CREATE OR REPLACE FUNCTION public.anonymize_text(input_text text) | ||
RETURNS text | ||
AS $$ | ||
DECLARE | ||
entity record; | ||
anonymized text := input_text; | ||
BEGIN | ||
-- Replace entities with their types, starting with the longest entities | ||
FOR entity IN ( | ||
SELECT entity_name, entity_type | ||
FROM public.detect_entities(input_text) | ||
ORDER BY length(entity_name) DESC | ||
) | ||
LOOP | ||
anonymized := regexp_replace( | ||
anonymized, | ||
'\m' || regexp_replace(entity.entity_name, '([().\\*+?])', '\\\1', 'g') || '\M', | ||
':' || entity.entity_type || ':', | ||
'gi' | ||
); | ||
END LOOP; | ||
|
||
RETURN anonymized; | ||
END; | ||
$$ LANGUAGE plpgsql; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
CREATE OR REPLACE FUNCTION public.summarize_article(article_text text) | ||
RETURNS TABLE( | ||
author text, | ||
topics text[], | ||
summary text, | ||
coherence integer, | ||
persuasion numeric | ||
) | ||
AS $$ | ||
DECLARE | ||
api_response jsonb; | ||
summary_json jsonb; | ||
BEGIN | ||
-- Call the Anthropic API using the ai.anthropic_generate function with print_summary tool | ||
SELECT ai.anthropic_generate( | ||
'claude-3-5-sonnet-20240620', | ||
jsonb_build_array( | ||
jsonb_build_object( | ||
'role', 'user', | ||
'content', format('Please summarize the following article using the print_summary tool: %s', article_text) | ||
) | ||
), | ||
_max_tokens => 4096, | ||
_tools => jsonb_build_array( | ||
jsonb_build_object( | ||
'name', 'print_summary', | ||
'description', 'Prints a summary of the article.', | ||
'input_schema', jsonb_build_object( | ||
'type', 'object', | ||
'properties', jsonb_build_object( | ||
'author', jsonb_build_object('type', 'string', 'description', 'Name of the article author'), | ||
'topics', jsonb_build_object( | ||
'type', 'array', | ||
'items', jsonb_build_object('type', 'string'), | ||
'description', 'Array of topics, e.g. ["tech", "politics"]. Should be as specific as possible, and can overlap.' | ||
), | ||
'summary', jsonb_build_object('type', 'string', 'description', 'Summary of the article. One or two paragraphs max.'), | ||
'coherence', jsonb_build_object('type', 'integer', 'description', 'Coherence of the article''s key points, 0-100 (inclusive)'), | ||
'persuasion', jsonb_build_object('type', 'number', 'description', 'Article''s persuasion score, 0.0-1.0 (inclusive)') | ||
), | ||
'required', jsonb_build_array('author', 'topics', 'summary', 'coherence', 'persuasion') | ||
) | ||
) | ||
) | ||
) INTO api_response; | ||
|
||
-- Extract the summary from the tool use response | ||
summary_json := jsonb_path_query(api_response, '$.content[*] ? (@.type == "tool_calls").tool_calls[*].function.arguments')::jsonb; | ||
|
||
-- Return the extracted summary information | ||
RETURN QUERY | ||
SELECT | ||
summary_json->>'author', | ||
array(SELECT jsonb_array_elements_text(summary_json->'topics')), | ||
summary_json->>'summary', | ||
(summary_json->>'coherence')::integer, | ||
(summary_json->>'persuasion')::numeric; | ||
|
||
EXCEPTION | ||
WHEN OTHERS THEN | ||
RAISE NOTICE 'An error occurred: %', SQLERRM; | ||
RAISE NOTICE 'API Response: %', api_response; | ||
RETURN; | ||
END; | ||
$$ LANGUAGE plpgsql; | ||
|
||
-- Test the function | ||
-- Content From URL: https://docs.timescale.com/use-timescale/latest/compression | ||
select * from summarize_article($$ | ||
# Compression | ||
|
||
Time-series data can be compressed to reduce the amount of storage required, and increase the speed of some queries. This is a cornerstone feature of Timescale. When new data is added to your database, it is in the form of uncompressed rows. Timescale uses a built-in job scheduler to convert this data to the form of compressed columns. This occurs across chunks of Timescale hypertables. | ||
|
||
Timescale charges are based on how much storage you use. You don't pay for a fixed storage size, and you don't need to worry about scaling disk size as your data grows; We handle it all for you. To reduce your data costs further, use compression, a data retention policy, and tiered storage. | ||
|
||
$$); | ||
-- -[ RECORD 1 ]---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ||
-- author | Timescale Documentation | ||
-- topics | {"database management","data compression","time-series data","data storage optimization"} | ||
-- summary | The article discusses Timescale's compression feature for time-series data. It explains that compression is a key feature of Timescale, designed to reduce storage requirements and improve query performance. The process involves converting newly added uncompressed row data into compressed columns using a built-in job scheduler. This compression occurs across chunks of Timescale hypertables. The article also mentions that Timescale's pricing model is based on actual storage used, with automatic scaling. To further reduce data costs, users are advised to employ compression, implement data retention policies, and utilize tiered storage. | ||
-- coherence | 95 | ||
-- persuasion | 0.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters