diff --git a/DESCRIPTION b/DESCRIPTION index a55a5d0..a19667b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,6 +33,7 @@ Imports: dotty, dplyr, duckdb (>= 1.2.2), + ellmer (>= 0.2.0), glue, httr2, methods, @@ -48,7 +49,6 @@ Imports: xml2 Suggests: dbplyr, - ellmer (>= 0.2.0), lifecycle, knitr, pandoc, diff --git a/R/ragnar-chat.R b/R/ragnar-chat.R new file mode 100644 index 0000000..e147ca1 --- /dev/null +++ b/R/ragnar-chat.R @@ -0,0 +1,560 @@ +#' Creates an `ellmer::Chat` with increased capabilities +#' powered by `ragnar::RagnarStore`. +#' +#' @param chat A function that returns an `ellmer::Chat` object, +#' such as [ellmer::chat_openai()] , etc. +#' @param store An `ragnar::RagnarStore` object that contains the knowledge base that powers +#' this chat. +#' @param register_store_tool If `TRUE`, the `store` is registered as a tool in the chat. +#' @param on_user_turn A function that is called when the user sends a message. +#' It's called with `self` (the instance of `RagnarChat`), and `...` the message +#' sent by the user. It's output is passed to `ellmer::Chat$chat()`. +#' Eg, the identity is simply `function(self, ...) list(...)`. +#' The default callback prunes the previous tool calls from the chat history and +#' inserts a tool call request, so that the LLM always sees retrieval results. +#' @param retrieve A function that takes `self` (the instance of `RagnarChat`) and `query` +#' (the query to retrieve results for) and returns a data.frame of chunks as results. +#' The default implementation calls `ragnar_retrieve()` on chunks, after filtering those +#' already present in the chat history. +#' @export +chat_ragnar <- function( + chat, + store, + register_store_tool = TRUE, + on_user_turn = function(self, ...) { + # prunes previously inserted tool calls + self$turns_prune_chunks(keep_last_n = 0) + # inserts a new tool call request with the user's input + self$turns_insert_tool_call_request(..., query = paste(..., collapse = " ")) + }, + retrieve = function(self, query) { + retrieved_ids <- self$turns_list_chunks() |> + sapply(\(x) x$id) |> + unlist() + + self$ragnar_store |> + ragnar::ragnar_retrieve( + query, + top_k = 10, + filter = !.data$id %in% retrieved_ids + ) + } +) { + chat <- chat() + RagnarChat$new(chat, store, register_store_tool, on_user_turn, on_retrieval, retrieve) +} + +#' Adds extra capabilities to a `ellmer::Chat` object. +RagnarChat <- R6::R6Class( + "RagnarChat", + inherit = ellmer:::Chat, + public = list( + #' @field ragnar_store An `ragnar::RagnarStore` object that this tool retrieves from. + ragnar_store = NULL, + + #' @field ragnar_tool_def An `ellmer::Tool` object that is registered with the chat. + ragnar_tool_def = NULL, + + #' @field on_user_turn A function that is called when the user sends a message. + #' It's called with `self` (the instance of `RagnarChat`), and `...` the message + #' sent by the user. It's output is passed to `ellmer::Chat$chat()`. + #' Eg, the identity is simply `function(self, ...) list(...)`. + on_user_turn = NULL, + + #' @field ragnar_retrieve A function that retrieves relevant chunks given a query and a store. + #' Can be set to any function taking `self` and `query`` as parameters and returning a data.frame + #' of results. + ragnar_retrieve = NULL, + + initialize = function( + chat, + store, + register_store_tool, + on_user_turn, + on_retrieval, + ragnar_retrieve + ) { + self$ragnar_store <- store + super$initialize( + chat$get_provider(), + chat$get_system_prompt(), + echo = chat$.__enclos_env__$private$echo + ) + + self$ragnar_tool_def = ellmer::tool( + .fun = self$ragnar_tool, + .description = "Given a string, retrieve the most relevant excerpts from the knowledge store.", + query = ellmer::type_string( + "The text to find most relevant matches for." + ) + ) + if (register_store_tool) { + self$register_tool(self$ragnar_tool_def) + } + self$on_user_turn <- on_user_turn + self$ragnar_retrieve <- ragnar_retrieve + }, + + chat = function(..., echo = NULL) { + result <- private$callback_user_turn(...) + do.call(super$chat, append(result, list(echo = echo))) + }, + + chat_async = function(..., tool_mode = c("concurrent", "sequential")) { + result <- private$callback_user_turn(...) + do.call(super$chat_async, append(result, list(tool_mode = tool_mode))) + }, + + chat_structured = function(..., type, echo = "none", convert = TRUE) { + result <- private$callback_user_turn(...) + do.call( + super$chat_structured, + append(result, list(type = type, echo = echo, convert = convert)) + ) + }, + + chat_structured_async = function(..., type, echo = "none") { + result <- private$callback_user_turn(...) + do.call( + super$chat_structured_async, + append(result, list(type = type, echo = echo)) + ) + }, + + stream_async = function(..., tool_mode = c("concurrent", "sequential"), stream = c("text", "content")) { + result <- private$callback_user_turn(...) + do.call( + super$stream_async, + append(result, list(tool_mode = tool_mode, stream = stream)) + ) + }, + + stream = function(..., stream = c("text", "content")) { + result <- private$callback_user_turn(...) + do.call(super$stream, append(result, list(stream = stream))) + }, + + #' @field ragnar_tool A function that retrieves relevant chunks from the store. + #' This is the function that is registered as a tool in the chat. + ragnar_tool = function(query) { + results <- self$ragnar_retrieve(self, query) + + if (!is.data.frame(results)) { + stop("The ragnar_retrieve function must return a data.frame.") + } + + results |> + jsonlite::toJSON() + }, + + # Turns modifications API ----------- + + #' @description + #' Clears tool calls from the chat history. Usually called after the LLM response, + #' so follow up questions do not include the tools calls, saving some tokens. + #' This only removes calls for the registered store. + #' + #' @param keep_last_n Keep the last `n` tools call pairs. Ie, if you set this to 2, + #' the last two assistant tool call requests and their respective results will be kept. + #' - Parallel tools calls are considered a 'single request'. + #' - Tool results are expected to be in the user turn right after the assistant's turn + #' containing the tool call request. + #' - We drop the entire turns, even (in the rare case) if they contain other content + #' besides the tool call. + turns_prune_tool_calls = function(keep_last_n = 0) { + turns <- self$get_turns() + + skipped <- 0 + drop_turns <- integer(0) + for (i in rev(seq_along(turns))) { + turn <- turns[[i]] + if (turn@role != "assistant") { + next + } + + for (content in turn@contents) { + if (!S7::S7_inherits(content, ellmer::ContentToolRequest)) { + next + } + if (content@name != self$ragnar_tool_def@name) { + next + } + + # This is a tool call turn. We now check if we alreaddy skipped + # enough to keep the last n. + if (skipped < keep_last_n) { + skipped <- skipped + 1 + break + } + + # Mark this turn and the user turn after it for removal + drop_turns <- c(drop_turns, c(i, i + 1)) + break + } + } + + self$set_turns(turns[-drop_turns]) + }, + + #' @description + #' Finds all the chunks that are currently in the chat history. + #' Requires chunks returned by the tools to be formatted as json. + turns_list_chunks = function() { + turns <- self$get_turns() + chunks <- list() + + for (turn in turns) { + if (turn@role != "user") { + next + } + for (content in turn@contents) { + chunks <- append( + chunks, + content_get_chunks(content, tool_name = self$ragnar_tool_def@name) + ) + } + } + + chunks + }, + + #' @description + #' Prunes the chunks in the chat history, keeping only the last `n` chunks. + #' This is useful to reduce the size of the chat history, especially if you have + #' many tool calls that return large chunks of text. + #' @param keep_last_n The number of chunks to keep. If `0`, all chunks are removed. + #' - Removes chunks from the tool result. If by doing this, the tool result becomes + #' empty, then entire turn is and the assistant tool call request is removed. + turns_prune_chunks = function(keep_last_n = 0) { + turns <- self$get_turns() + + drop_turn_idx <- integer(0) + for (ti in rev(seq_along(turns))) { + turn <- turns[[ti]] + + if (turn@role != "user") { + next + } + + contents <- turn@contents + + drop_content_idx <- integer(0) + for (ci in rev(seq_along(contents))) { + content <- contents[[ci]] + chunks <- content_get_chunks(content, tool_name = self$ragnar_tool_def@name) + if (is.null(chunks)) { + next + } + + # We still have to skip some chunks, we check if we can skip everything from this + # tool result. + if (length(chunks) <= keep_last_n) { + keep_last_n <- keep_last_n - length(chunks) + if (keep_last_n < 0) { + keep_last_n <- 0 + } + next + } + + # We'll need to remove some chunks. Keep only what we can. + chunks <- tail(chunks, keep_last_n) + + # If we have no chunks left, we remove the entire content from the list. + if (length(chunks) == 0) { + drop_content_idx[[length(drop_content_idx) + 1]] <- ci + turns_to_drop <- if ( + S7::S7_inherits(content, ellmer::ContentToolResult) + ) { + # when it's a tool result we drop the assistant turn that (usually a tool call) + c(ti, ti - 1) + } else if (S7::S7_inherits(content, ContentRagnarDocuments)) { + # when it's content ragnar documents, we drop only the current turn. + # Note: tha's unlikely to ever drop a turn because the turn also contains + # the user question, so we always have at least one content. + ti + } + next + } + + # Restore the content if some chunks remained. + contents[[ci]] <- content_set_chunks(content, chunks) + } + + turn@contents <- contents + if (length(drop_content_idx) > 0) { + turn@contents <- contents[-drop_content_idx] + } + + # If we removed all contents from the turn, we remove the entire turn. + # and the assistant turn that came before or after it, dependning on what + # triggered the removal - a tool call result or a proactively added set of documents + if (length(turn@contents) == 0) { + drop_turn_idx <- c(drop_turn_idx, turns_to_drop) + next + } + + turns[[ti]] <- turn + } + + # Remove the turns that we marked for removal. + if (length(drop_turn_idx) > 0) { + turns <- turns[-drop_turn_idx] + } + + self$set_turns(turns) + }, + + #' @description + #' Removes chunks from the history by id. + #' Rewrites the LLm context remving the chunks with the given ids. It will also + #' enitrely remove the tool call request and results if all chunks are removed. + #' + #' @param chunk_ids A vector of chunk ids to remove from the chat history. + turns_remove_chunks = function(chunk_ids) { + turns <- self$get_turns() + drop_turn_idx <- integer(0) + + for (ti in seq_along(turns)) { + turn <- turns[[ti]] + if (turn@role != "user") { + next + } + + contents <- turn@contents + drop_content_idx <- integer(0) + + for (ci in seq_along(contents)) { + content <- contents[[ci]] + + chunks <- content_get_chunks(content, self$ragnar_tool_def@name) + if (is.null(chunks)) { + next + } + + # Remove the chunks with the given ids. + chunks <- chunks[!sapply(chunks, function(x) x$id %in% chunk_ids)] + + # If we have no chunks left, we remove the entire content from the list. + if (length(chunks) == 0) { + drop_content_idx[[length(drop_content_idx) + 1]] <- ci + turns_to_drop <- if ( + S7::S7_inherits(content, ellmer::ContentToolResult) + ) { + # when it's a tool result we drop the assistant turn that (usually a tool call) + c(ti, ti - 1) + } else if (S7::S7_inherits(content, ContentRagnarDocuments)) { + # when it's content ragnar documents, we drop only the current turn. + # Note: tha's unlikely to ever drop a turn because the turn also contains + # the user question, so we always have at least one content. + ti + } + next + } + + # Restore the content if some chunks remained. + contents[[ci]] <- content_set_chunks(content, chunks) + } + + turn@contents <- contents + if (length(drop_content_idx) > 0) { + turn@contents <- contents[-drop_content_idx] + } + + # If we removed all contents from the turn, we remove the entire turn. + # and the assistant turn that came before or after it, dependning on what + # triggered the removal - a tool call result or a proactively added set of documents + if (length(turn@contents) == 0) { + drop_turn_idx <- c(drop_turn_idx, turns_to_drop) + next + } + + turns[[ti]] <- turn + } + + # Remove the turns that we marked for removal. + if (length(drop_turn_idx) > 0) { + turns <- turns[-drop_turn_idx] + } + + self$set_turns(turns) + }, + + #' @description + #' Some LLM's are lazy at tool calling, and for applications to be + #' robust, it's great to append context for the LLM, even if + #' it didn't really ask for. + #' This inserts a tool call request and it's results in the chat turns, so + #' the LLM can use to respond the user question. + #' @param ... Passed to `elmer:::user_turn()` to insert the user turn that generated the tool call. + #' @param query The query to pass to the tool. + #' @returns A `ellmer::ContentToolResult` object that should be included in the next call to `$chat()`. + turns_insert_tool_call_request = function(..., query) { + user_turn <- ellmer:::user_turn(...) + + tool_request <- ellmer::ContentToolRequest( + id = rlang::hash(Sys.time()), + name = self$ragnar_tool_def@name, + arguments = list( + query = query + ), + self$ragnar_tool_def + ) + + assistant_turn <- ellmer::Turn( + role = "assistant", + contents = list(tool_request) + ) + + self$add_turn(user_turn, assistant_turn) + + ellmer::ContentToolResult( + self$ragnar_tool(query), + request = tool_request + ) + }, + + #' @description Some models do not support tool calling, so instead of adding a tool call + #' request (faking that the model asked for some search results) we actually + #' proactively insert context into the chat user - as if the user did it had done it. + #' - User: {question} {documents} + #' - LLM: answer + #' @param ... The contents of the user turn that generated the tool call. + turns_insert_documents = function( + ..., + query + ) { + documents <- self$ragnar_tool(query) + + list( + ..., + ContentRagnarDocuments(text = documents) + ) + }, + + #' @description + #' Summarizes the tools calls in the chat history. The assistant tool + #' call request is redacted, and the chunks are summarized. + #' + #' The ellmer::ToolCallRequest becomes a `ellmer::ContentText` with: + #' ``` + #' + #' ``` + #' + #' The `ellmer::ContentToolResult` becomes a `ellmer::ContentText` with + #' a summary of the chunks given by the `summarize_chunks` callback. + #' `summarize_chunks` takes a list of `chunks` as argument. + #' + turns_summarize_tool_calls = function(summarize_chunks) { + turns <- self$get_turns() |> + turns_modify_tool_calls(function(assistant_turn, user_turn) { + summarize_tool_call( + assistant_turn = assistant_turn, + user_turn = user_turn, + summarize_chunks, + tool_name = self$ragnar_tool_def@name + ) + }, + tool_name = self$ragnar_tool_def@name + ) + self$set_turns(turns) + } + ), + + private = list( + callback_user_turn = function(...) { + result <- self$on_user_turn(self, ...) + if (!is.list(result)) { + result <- list(result) + } + result + } + ) +) + +ContentRagnarDocuments <- S7::new_class( + "ContentRagnarDocuments", + parent = ellmer::ContentText +) + +content_get_chunks <- function(x, tool_name) { + value <- if (S7::S7_inherits(x, ContentRagnarDocuments)) { + x@text + } else if (S7::S7_inherits(x, ellmer::ContentToolResult)) { + if (x@request@name != tool_name) { + return(NULL) + } + x@value + } else { + return(NULL) + } + jsonlite::fromJSON(value, simplifyVector = FALSE) +} + +content_set_chunks <- function(x, chunks) { + chunks <- jsonlite::toJSON(chunks, pretty = TRUE) + if (S7::S7_inherits(x, ContentRagnarDocuments)) { + x@text <- chunks + } else if (S7::S7_inherits(x, ellmer::ContentToolResult)) { + x@value <- chunks + } else { + stop("Unsupported content type for setting chunks.") + } + x +} + +#' Applies a function to pairs of turns that represent tool calls. +#' Otherwise keep them unchanged. +#' @noRd +turns_modify_tool_calls <- function(turns, fn, tool_name = NULL) { + i <- 1 + while (i < length(turns)) { + if (turns[[i]]@role == "user") { + i <- i + 1; next + }; + if (!is_tool_call(turns[[i]], tool_name)) { + i <- i + 1; next; + } + + turns[c(i, i+1)] <- fn(turns[[i]], turns[[i+1]]) + i <- i + 2 + } + turns +} + +is_tool_call <- function(x, tool_name = NULL) { + for (content in x@contents) { + if (S7::S7_inherits(content, ellmer::ContentToolRequest)) { + if (length(x@contents) > 1) { + cli::cli_warn( + "Tool call request found in turn with multiple contents, this may lead to unexpected results." + ) + } + if (!is.null(tool_name) && content@tool@name != tool_name) { + return(FALSE) + } + + return(TRUE) + } + } + FALSE +} + +summarize_tool_call <- function(assistant_turn, user_turn, summarize, tool_name) { + assistant_turn@contents <- list( + ellmer::ContentText( + text = paste0( + "" + ) + ) + ) + + summary <- lapply(user_turn@contents, \(x) content_get_chunks(x, tool_name)) |> + unlist(recursive = FALSE) |> + summarize() + + user_turn@contents <- list(ellmer::ContentText(text = summary)) + + list(assistant_turn, user_turn) +} + diff --git a/tests/testthat/helper-doc.R b/tests/testthat/helper-doc.R index 226ce83..7aad879 100644 --- a/tests/testthat/helper-doc.R +++ b/tests/testthat/helper-doc.R @@ -18,3 +18,14 @@ maybe_set_threads <- function(store) { } store } + +test_store <- function() { + store <- ragnar_store_create( + embed = \(x) matrix(nrow = length(x), ncol = 100, stats::runif(100)) + ) + + doc <- test_doc() + chunks <- read_as_markdown(doc) |> markdown_chunk() + ragnar_store_insert(store, chunks) + ragnar_store_build_index(store) +} diff --git a/tests/testthat/test-ragnar-chat.R b/tests/testthat/test-ragnar-chat.R new file mode 100644 index 0000000..d16adbc --- /dev/null +++ b/tests/testthat/test-ragnar-chat.R @@ -0,0 +1,161 @@ +test_that("RagnarChat", { + store <- test_store() + chat <- chat_ragnar(\() ellmer::chat_openai(model = "gpt-4.1-nano"), store = store) + + out <- chat$chat("advanced R") + + # 2 turns from the chat + 2 turns of forced tool calls + expect_equal(length(chat$get_turns()), 2 + 2) + + # pruning clears the tool calls + chat$turns_prune_chunks() + expect_equal(length(chat$get_turns()), 2) + + out <- chat$chat("advanced R") + # 2 turns that were already there + 2 turns of forced tool calls + # + user + LLM + expect_equal(length(chat$get_turns()), 2 + 2 + 2) + + # the default pruning will clear the previous tool calls. + # so we end up with 6 turns + 1 pair + out <- chat$chat("more chatting") + expect_equal(length(chat$get_turns()), 6 + 2) + + # prune tool calls will clear two turns + chat$turns_prune_tool_calls() + expect_equal(length(chat$get_turns()), 6) +}) + +test_that("Implementing query rewriting", { + # By default we're sending the full user input as a retrieval query. + # this might not be ideal, and the user may want to implement a query + # rewriting strategy: + + query_rewriter <- function(...) { + # ... takes user input and returns a new query + "hello world" + } + + store <- test_store() + chat <- chat_ragnar( + \() ellmer::chat_openai(model = "gpt-4.1-nano"), + store = store, + on_user_turn = function(self, ...) { + + self$turns_prune_tool_calls() + self$turns_insert_tool_call_request( + ..., + query = query_rewriter(...) + ) + } + ) + + out <- chat$chat("advanced R") + expect_equal(length(chat$get_turns()), 2 + 2) + + tool_call_request <- chat$get_turns()[[2]]@contents[[1]] + expect_equal(tool_call_request@arguments$query, "hello world") +}) + +test_that("remove chunks by id works", { + store <- test_store() + chat <- chat_ragnar( + \() ellmer::chat_openai(model = "gpt-4.1-nano"), + store = store, + on_user_turn = function(self, ...) { + self$turns_insert_tool_call_request( + ..., + query = paste(..., collapse = " ") + ) + } + ) + + chat$chat("advanced R") + chunks <- chat$turns_list_chunks() + id <- chunks[[1]]$id + + chat$turns_remove_chunks(id) + + chunks <- chat$turns_list_chunks() + chunk_ids <- sapply(chunks, function(x) x$id) + expect_false(id %in% chunk_ids) + + chat$turns_remove_chunks(chunk_ids) + # we removed all turns, thus the tool call request and result turns also got removed + expect_equal(length(chat$turns_list_chunks()), 0) + expect_equal(length(chat$get_turns()), 2) +}) + +test_that("duplicated chunks are not returned", { + store <- test_store() + chat <- chat_ragnar( + \() ellmer::chat_openai(model = "gpt-4.1-nano"), + store = store, + on_user_turn = function(self, ...) { + self$turns_insert_tool_call_request( + ..., + query = paste(..., collapse = " ") + ) + } + ) + + # chat twice adds the same query and results twice + chat$chat("advanced R") + chat$chat("advanced R") + + # all ids should be unique + new_chunk_ids <- sapply(chat$turns_list_chunks(), function(x) x$id) + + expect_equal(length(unique(new_chunk_ids)), length(new_chunk_ids)) +}) + +test_that("Can insert chunks premptively in the user chat", { + store <- test_store() + chat <- chat_ragnar( + \() ellmer::chat_openai(model = "gpt-4.1-nano"), + store = store, + on_user_turn = function(self, ...) { + self$turns_insert_documents( + ..., + query = paste(..., collapse = " ") + ) + } + ) + + out <- chat$chat("advanced R") + expect_equal(length(chat$get_turns()), 2) + # it adds the documents into the context + expect_equal(length(chat$get_turns()[[1]]@contents), 2) + + chat$turns_prune_chunks() + expect_equal(length(chat$get_turns()), 2) + expect_equal(length(chat$get_turns()[[1]]@contents), 1) +}) + +test_that("Can summarize tool calls", { + + store <- test_store() + chat <- chat_ragnar( + \() ellmer::chat_openai(model = "gpt-4.1-nano"), + store = store + ) + + x <- chat$chat("functional") + + summarize_chunks <- function(chunks) { + json <- jsonlite::toJSON(chunks) + ellmer::chat_openai( + model = "gpt-4.1-nano", + system_prompt = "Summarize the following chunks of text.", + echo = FALSE + )$chat(json, echo = FALSE) + } + + chat$turns_summarize_tool_calls(summarize_chunks) + + turns <- chat$get_turns() + expect_equal(length(turns), 4) # 2 user + 2 assistant + expect_equal(turns[[2]]@role, "assistant") + expect_true(grepl("Redacted by summarization tool", turns[[2]]@contents[[1]]@text)) + expect_true(S7::S7_inherits(turns[[3]]@contents[[1]], ellmer::ContentText)) +}) diff --git a/vignettes/articles/ragnar-chat.Rmd b/vignettes/articles/ragnar-chat.Rmd new file mode 100644 index 0000000..c37188e --- /dev/null +++ b/vignettes/articles/ragnar-chat.Rmd @@ -0,0 +1,155 @@ +--- +title: "Using ragnar_chat" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using ragnar_chat} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(ragnar) +``` + +The `ragnar_chat` interface adds common RAG systems functionalities to an `ellmer::Chat`, allowing +users to easily manage and operate on the LLM chat history, customize the search behavior and etc. + +This vignettes shows a few patterns for customizing the behavior of `ragnar_chat`. +To make the examples simpler, we will create a small knowledge base with a single document and a few chunks: + +```{r} +store <- ragnar_store_create( + embed = \(x) embed_openai(x, model = "text-embedding-3-small") +) + +doc <- "https://r4ds.hadley.nz/base-R.html" +chunks <- ragnar_read(doc, frame_by_tags = c("h3")) +ragnar_store_insert(store, chunks) +ragnar_store_build_index(store) +``` + +## Implement Query Rewriting + +When the LLM queries the knowledge base, it may not use terms and keywords that are present in the knowledge base. To improve the chances of a successful query, you can implement query rewriting. It's also possible to +use another LLM to create a few slightly different queries and combine the results into a single set. + +We first implement a function that takes a query and rewrites it using another LLM. Usually a smaller model is used for this task, as it is not very computationally intensive and the results are not very sensitive to the model size. + +```{r} +query_rewrite <- function(query) { + chat <- ellmer::chat_openai( + model = "gpt-4.1-nano", + system_prompt = paste( + "You are a helpful assistant that rewrites queries to improve search results.", + "You may return a few different queries, whose results will be combined.", + "Queries should be concise and relevant to the original query.", + "You may use synonyms, rephrase, or add relevant keywords.", + collapse = "\n" + ) + ) + chat$chat_structured( + paste0("Rewrite the following query to improve search results: ", query), + type = ellmer::type_array( + ellmer::type_string(), + description = "Rewritten queries" + ) + ) +} +``` + +Now we create a `ragnar_chat` object that uses the `query_rewrite` function to rewrite queries before searching the knowledge base. To do that, we customize the `.retrieve` callback. This callback takes `self` (the instance of `ragnar_chat`) and `query` (the original query) as arguments. It should return a list of results, which will be combined into a single set. The default behavior of `.retrieve` is to use `ragnar::ragnar_retrieve()` to search the knowledge base, but we will override it to use our `query_rewrite` function. + +```{r} +chat <- chat_ragnar( + \() ellmer::chat_openai(), + store = store, + retrieve = function(self, query) { + queries <- query_rewrite(query) + cli::cli_inform( + i = "Rewriten queries:", + queries + ) + queries |> + purrr::map_dfr(\(q) ragnar::ragnar_retrieve(self$ragnar_store, q)) |> + dplyr::distinct() + } +) +``` + +Now we can use the `chat` object to ask questions and see how it rewrites the queries and retrieves the results: + +```{r} +cat(chat$chat("What is the difference between a vector and a list in R?")) +``` + +## Customize retrieval results + +Some models are not trained to support function calls, thus inserting tool calls into the chat +history may confuse the model and lead to incresased hallucinations. A common solution for this +problem is to include a pair of turns in the top of the chat history that adds relevant documents +to the chat history, so that the model can use them to answer the question. + +This is implemented by customizing the `.on_user_turn` callback. This callback is called with the +`self` (the instance of `ragnar_chat`) and `...` (the user inpuits when calling `$chat`). It should +return a list of `ellmer::Content` obejcts that will be forwarded to the actual `$chat` call. + +Here's an example of how to implement this callback: + +```{r} +chat <- chat_ragnar( + \() ellmer::chat_openai(), + store = store, + # We explicitly avoid registering the store so the LLM is not capable of making tool + # calls to the store. Instead, it relies on the documents inserted at the top of + # the chat history to answer the question. + register_store = FALSE, + on_user_turn = function(self, ...) { + self$turns_prune_chunks() + # Inserts documents relevant to the query at the top of the chat history. + self$turns_insert_documents( + ..., + query = paste(..., collapse = "\n") + ) + } +) +``` + +```{r} +cat(chat$chat("What is the difference between a vector and a list in R?")) +``` + +## Summarize tool calls + +To save tokens, it's common to summarize the results of the tool calls after the chunks +were used to produce the LLM response. With this, the next message in the chat won't need +to use the full text of the chunks, but rather a summary of the results. + +This can be acomplished by customizing the `on_user_turn` callback to summarise the +results of the previous tool calls. For example: + +```{r} +summarize_chunks <- function(chunks) { + json <- jsonlite::toJSON(chunks) + ellmer::chat_openai( + model = "gpt-4.1-nano", + system_prompt = "Summarize the following chunks of text.", + echo = FALSE + )$chat(json, echo = FALSE) +} + +chat <- chat_ragnar( + \() ellmer::chat_openai(), + store = store, + on_user_turn = function(self, ...) { + self$turns_summarize_tool_calls(summarize_chunks) + self$turns_insert_tool_call_request(..., query = paste(..., collapse = " ")) + } +) +```