diff --git a/DESCRIPTION b/DESCRIPTION
index a55a5d0..a19667b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -33,6 +33,7 @@ Imports:
     dotty,
     dplyr,
     duckdb (>= 1.2.2),
+    ellmer (>= 0.2.0),
     glue,
     httr2,
     methods,
@@ -48,7 +49,6 @@ Imports:
     xml2
 Suggests:
     dbplyr,
-    ellmer (>= 0.2.0),
     lifecycle,
     knitr,
     pandoc,
diff --git a/R/ragnar-chat.R b/R/ragnar-chat.R
new file mode 100644
index 0000000..e147ca1
--- /dev/null
+++ b/R/ragnar-chat.R
@@ -0,0 +1,560 @@
+#' Creates an `ellmer::Chat` with increased capabilities
+#' powered by `ragnar::RagnarStore`.
+#'
+#' @param chat A function that returns an `ellmer::Chat` object,
+#'   such as [ellmer::chat_openai()] , etc.
+#' @param store An `ragnar::RagnarStore` object that contains the knowledge base that powers
+#'   this chat.
+#' @param register_store_tool If `TRUE`, the `store` is registered as a tool in the chat. 
+#' @param on_user_turn A function that is called when the user sends a message.
+#'   It's called with `self` (the instance of `RagnarChat`), and `...` the message
+#'   sent by the user. It's output is passed to `ellmer::Chat$chat()`.
+#'   Eg, the identity is simply `function(self, ...) list(...)`.
+#'   The default callback prunes the previous tool calls from the chat history and
+#'   inserts a tool call request, so that the LLM always sees retrieval results.
+#' @param retrieve A function that takes `self` (the instance of `RagnarChat`) and `query`
+#'   (the query to retrieve results for) and returns a data.frame of chunks as results.
+#'   The default implementation calls `ragnar_retrieve()` on chunks, after filtering those
+#'   already present in the chat history.
+#' @export
+chat_ragnar <- function(
+  chat,
+  store,
+  register_store_tool = TRUE,
+  on_user_turn = function(self, ...) {
+    # prunes previously inserted tool calls
+    self$turns_prune_chunks(keep_last_n = 0)
+    # inserts a new tool call request with the user's input
+    self$turns_insert_tool_call_request(..., query = paste(..., collapse = " "))
+  },
+  retrieve = function(self, query) {
+    retrieved_ids <- self$turns_list_chunks() |>
+      sapply(\(x) x$id) |> 
+      unlist()
+
+    self$ragnar_store |>
+      ragnar::ragnar_retrieve(
+        query, 
+        top_k = 10,
+        filter = !.data$id %in% retrieved_ids
+      )
+  }
+) {
+  chat <- chat()
+  RagnarChat$new(chat, store, register_store_tool, on_user_turn, on_retrieval, retrieve)
+}
+
+#' Adds extra capabilities to a `ellmer::Chat` object.
+RagnarChat <- R6::R6Class(
+  "RagnarChat",
+  inherit = ellmer:::Chat,
+  public = list(
+    #' @field ragnar_store An `ragnar::RagnarStore` object that this tool retrieves from.
+    ragnar_store = NULL,
+
+    #' @field ragnar_tool_def An `ellmer::Tool` object that is registered with the chat.
+    ragnar_tool_def = NULL,
+
+    #' @field on_user_turn A function that is called when the user sends a message.
+    #'  It's called with `self` (the instance of `RagnarChat`), and `...` the message
+    #'  sent by the user. It's output is passed to `ellmer::Chat$chat()`.
+    #'  Eg, the identity is simply `function(self, ...) list(...)`.
+    on_user_turn = NULL,
+
+    #' @field ragnar_retrieve A function that retrieves relevant chunks given a query and a store.
+    #'  Can be set to any function taking `self` and `query`` as parameters and returning a data.frame
+    #'  of results.
+    ragnar_retrieve = NULL,
+
+    initialize = function(
+      chat,
+      store,
+      register_store_tool,
+      on_user_turn,
+      on_retrieval,
+      ragnar_retrieve
+    ) {
+      self$ragnar_store <- store
+      super$initialize(
+        chat$get_provider(),
+        chat$get_system_prompt(),
+        echo = chat$.__enclos_env__$private$echo
+      )
+
+      self$ragnar_tool_def = ellmer::tool(
+        .fun = self$ragnar_tool,
+        .description = "Given a string, retrieve the most relevant excerpts from the knowledge store.",
+        query = ellmer::type_string(
+          "The text to find most relevant matches for."
+        )
+      )
+      if (register_store_tool) {
+        self$register_tool(self$ragnar_tool_def)
+      }
+      self$on_user_turn <- on_user_turn
+      self$ragnar_retrieve <- ragnar_retrieve
+    },
+
+    chat = function(..., echo = NULL) {
+      result <- private$callback_user_turn(...)
+      do.call(super$chat, append(result, list(echo = echo)))
+    },
+
+    chat_async = function(..., tool_mode = c("concurrent", "sequential")) {
+      result <- private$callback_user_turn(...)
+      do.call(super$chat_async, append(result, list(tool_mode = tool_mode)))
+    },
+
+    chat_structured = function(..., type, echo = "none", convert = TRUE) {
+      result <- private$callback_user_turn(...)
+      do.call(
+        super$chat_structured,
+        append(result, list(type = type, echo = echo, convert = convert))
+      )
+    },
+
+    chat_structured_async = function(..., type, echo = "none") {
+      result <- private$callback_user_turn(...)
+      do.call(
+        super$chat_structured_async,
+        append(result, list(type = type, echo = echo))
+      )
+    },
+
+    stream_async = function(..., tool_mode = c("concurrent", "sequential"), stream = c("text", "content")) {
+      result <- private$callback_user_turn(...)
+      do.call(
+        super$stream_async,
+        append(result, list(tool_mode = tool_mode, stream = stream))
+      )
+    },
+
+    stream = function(..., stream = c("text", "content")) {
+      result <- private$callback_user_turn(...)
+      do.call(super$stream, append(result, list(stream = stream)))
+    },
+
+    #' @field ragnar_tool A function that retrieves relevant chunks from the store.
+    #'  This is the function that is registered as a tool in the chat.
+    ragnar_tool = function(query) {
+      results <- self$ragnar_retrieve(self, query)
+
+      if (!is.data.frame(results)) {
+        stop("The ragnar_retrieve function must return a data.frame.")
+      }
+
+      results |>
+        jsonlite::toJSON()
+    },
+
+    # Turns modifications API -----------
+
+    #' @description
+    #' Clears tool calls from the chat history. Usually called after the LLM response,
+    #' so follow up questions do not include the tools calls, saving some tokens.
+    #' This only removes calls for the registered store.
+    #'
+    #' @param keep_last_n Keep the last `n` tools call pairs. Ie, if you set this to 2,
+    #'  the last two assistant tool call requests and their respective results will be kept.
+    #'  - Parallel tools calls are considered a 'single request'.
+    #'  - Tool results are expected to be in the user turn right after the assistant's turn
+    #'  containing the tool call request.
+    #'  - We drop the entire turns, even (in the rare case) if they contain other content
+    #'  besides the tool call.
+    turns_prune_tool_calls = function(keep_last_n = 0) {
+      turns <- self$get_turns()
+
+      skipped <- 0
+      drop_turns <- integer(0)
+      for (i in rev(seq_along(turns))) {
+        turn <- turns[[i]]
+        if (turn@role != "assistant") {
+          next
+        }
+
+        for (content in turn@contents) {
+          if (!S7::S7_inherits(content, ellmer::ContentToolRequest)) {
+            next
+          }
+          if (content@name != self$ragnar_tool_def@name) {
+            next
+          }
+
+          # This is a tool call turn. We now check if we alreaddy skipped
+          # enough to keep the last n.
+          if (skipped < keep_last_n) {
+            skipped <- skipped + 1
+            break
+          }
+
+          # Mark this turn and the user turn after it for removal
+          drop_turns <- c(drop_turns, c(i, i + 1))
+          break
+        }
+      }
+
+      self$set_turns(turns[-drop_turns])
+    },
+
+    #' @description
+    #' Finds all the chunks that are currently in the chat history.
+    #' Requires chunks returned by the tools to be formatted as json.
+    turns_list_chunks = function() {
+      turns <- self$get_turns()
+      chunks <- list()
+
+      for (turn in turns) {
+        if (turn@role != "user") {
+          next
+        }
+        for (content in turn@contents) {
+          chunks <- append(
+            chunks,
+            content_get_chunks(content, tool_name = self$ragnar_tool_def@name)
+          )
+        }
+      }
+
+      chunks
+    },
+
+    #' @description
+    #' Prunes the chunks in the chat history, keeping only the last `n` chunks.
+    #' This is useful to reduce the size of the chat history, especially if you have
+    #' many tool calls that return large chunks of text.
+    #' @param keep_last_n The number of chunks to keep. If `0`, all chunks are removed.
+    #'  - Removes chunks from the tool result. If by doing this, the tool result becomes
+    #'  empty, then entire turn is and the assistant tool call request is removed.
+    turns_prune_chunks = function(keep_last_n = 0) {
+      turns <- self$get_turns()
+
+      drop_turn_idx <- integer(0)
+      for (ti in rev(seq_along(turns))) {
+        turn <- turns[[ti]]
+
+        if (turn@role != "user") {
+          next
+        }
+
+        contents <- turn@contents
+
+        drop_content_idx <- integer(0)
+        for (ci in rev(seq_along(contents))) {
+          content <- contents[[ci]]
+          chunks <- content_get_chunks(content, tool_name = self$ragnar_tool_def@name)
+          if (is.null(chunks)) {
+            next
+          }
+
+          # We still have to skip some chunks, we check if we can skip everything from this
+          # tool result.
+          if (length(chunks) <= keep_last_n) {
+            keep_last_n <- keep_last_n - length(chunks)
+            if (keep_last_n < 0) {
+              keep_last_n <- 0
+            }
+            next
+          }
+
+          # We'll need to remove some chunks. Keep only what we can.
+          chunks <- tail(chunks, keep_last_n)
+
+          # If we have no chunks left, we remove the entire content from the list.
+          if (length(chunks) == 0) {
+            drop_content_idx[[length(drop_content_idx) + 1]] <- ci
+            turns_to_drop <- if (
+              S7::S7_inherits(content, ellmer::ContentToolResult)
+            ) {
+              # when it's a tool result we drop the assistant turn that (usually a tool call)
+              c(ti, ti - 1)
+            } else if (S7::S7_inherits(content, ContentRagnarDocuments)) {
+              # when it's content ragnar documents, we drop only the current turn.
+              # Note: tha's unlikely to ever drop a turn because the turn also contains
+              # the user question, so we always have at least one content.
+              ti
+            }
+            next
+          }
+
+          # Restore the content if some chunks remained.
+          contents[[ci]] <- content_set_chunks(content, chunks)
+        }
+
+        turn@contents <- contents
+        if (length(drop_content_idx) > 0) {
+          turn@contents <- contents[-drop_content_idx]
+        }
+
+        # If we removed all contents from the turn, we remove the entire turn.
+        # and the assistant turn that came before or after it, dependning on what
+        # triggered the removal - a tool call result or a proactively added set of documents
+        if (length(turn@contents) == 0) {
+          drop_turn_idx <- c(drop_turn_idx, turns_to_drop)
+          next
+        }
+
+        turns[[ti]] <- turn
+      }
+
+      # Remove the turns that we marked for removal.
+      if (length(drop_turn_idx) > 0) {
+        turns <- turns[-drop_turn_idx]
+      }
+
+      self$set_turns(turns)
+    },
+
+    #' @description
+    #' Removes chunks from the history by id.
+    #' Rewrites the LLm context remving the chunks with the given ids. It will also
+    #' enitrely remove the tool call request and results if all chunks are removed.
+    #'
+    #' @param chunk_ids A vector of chunk ids to remove from the chat history.
+    turns_remove_chunks = function(chunk_ids) {
+      turns <- self$get_turns()
+      drop_turn_idx <- integer(0)
+
+      for (ti in seq_along(turns)) {
+        turn <- turns[[ti]]
+        if (turn@role != "user") {
+          next
+        }
+
+        contents <- turn@contents
+        drop_content_idx <- integer(0)
+
+        for (ci in seq_along(contents)) {
+          content <- contents[[ci]]
+
+          chunks <- content_get_chunks(content, self$ragnar_tool_def@name)
+          if (is.null(chunks)) {
+            next
+          }
+
+          # Remove the chunks with the given ids.
+          chunks <- chunks[!sapply(chunks, function(x) x$id %in% chunk_ids)]
+
+          # If we have no chunks left, we remove the entire content from the list.
+          if (length(chunks) == 0) {
+            drop_content_idx[[length(drop_content_idx) + 1]] <- ci
+            turns_to_drop <- if (
+              S7::S7_inherits(content, ellmer::ContentToolResult)
+            ) {
+              # when it's a tool result we drop the assistant turn that (usually a tool call)
+              c(ti, ti - 1)
+            } else if (S7::S7_inherits(content, ContentRagnarDocuments)) {
+              # when it's content ragnar documents, we drop only the current turn.
+              # Note: tha's unlikely to ever drop a turn because the turn also contains
+              # the user question, so we always have at least one content.
+              ti
+            }
+            next
+          }
+
+          # Restore the content if some chunks remained.
+          contents[[ci]] <- content_set_chunks(content, chunks)
+        }
+
+        turn@contents <- contents
+        if (length(drop_content_idx) > 0) {
+          turn@contents <- contents[-drop_content_idx]
+        }
+
+        # If we removed all contents from the turn, we remove the entire turn.
+        # and the assistant turn that came before or after it, dependning on what
+        # triggered the removal - a tool call result or a proactively added set of documents
+        if (length(turn@contents) == 0) {
+          drop_turn_idx <- c(drop_turn_idx, turns_to_drop)
+          next
+        }
+
+        turns[[ti]] <- turn
+      }
+
+      # Remove the turns that we marked for removal.
+      if (length(drop_turn_idx) > 0) {
+        turns <- turns[-drop_turn_idx]
+      }
+
+      self$set_turns(turns)
+    },
+
+    #' @description
+    #' Some LLM's are lazy at tool calling, and for applications to be
+    #' robust, it's great to append context for the LLM, even if
+    #' it didn't really ask for.
+    #' This inserts a tool call request and it's results in the chat turns, so
+    #' the LLM can use to respond the user question.
+    #' @param ... Passed to `elmer:::user_turn()` to insert the user turn that generated the tool call.
+    #' @param query The query to pass to the tool.
+    #' @returns A `ellmer::ContentToolResult` object that should be included in the next call to `$chat()`.
+    turns_insert_tool_call_request = function(..., query) {
+      user_turn <- ellmer:::user_turn(...)
+
+      tool_request <- ellmer::ContentToolRequest(
+        id = rlang::hash(Sys.time()),
+        name = self$ragnar_tool_def@name,
+        arguments = list(
+          query = query
+        ),
+        self$ragnar_tool_def
+      )
+
+      assistant_turn <- ellmer::Turn(
+        role = "assistant",
+        contents = list(tool_request)
+      )
+
+      self$add_turn(user_turn, assistant_turn)
+
+      ellmer::ContentToolResult(
+        self$ragnar_tool(query),
+        request = tool_request
+      )
+    },
+
+    #' @description Some models do not support tool calling, so instead of adding a tool call
+    #' request (faking that the model asked for some search results) we actually
+    #' proactively insert context into the chat user - as if the user did it had done it.
+    #' - User: {question} {documents}
+    #' - LLM: answer
+    #' @param ... The contents of the user turn that generated the tool call.
+    turns_insert_documents = function(
+      ...,
+      query
+    ) {
+      documents <- self$ragnar_tool(query)
+
+      list(
+        ...,
+        ContentRagnarDocuments(text = documents)
+      )
+    },
+
+    #' @description
+    #' Summarizes the tools calls in the chat history. The assistant tool
+    #' call request is redacted, and the chunks are summarized. 
+    #' 
+    #' The ellmer::ToolCallRequest becomes a `ellmer::ContentText` with:
+    #' ```
+    #' <Redacted by summarization tool: query={}>
+    #' ```
+    #' 
+    #' The `ellmer::ContentToolResult` becomes a `ellmer::ContentText` with
+    #' a summary of the chunks given by the `summarize_chunks` callback.
+    #' `summarize_chunks` takes a list of `chunks` as argument.
+    #' 
+    turns_summarize_tool_calls = function(summarize_chunks) {
+      turns <- self$get_turns() |> 
+        turns_modify_tool_calls(function(assistant_turn, user_turn) {
+          summarize_tool_call(
+            assistant_turn = assistant_turn,
+            user_turn = user_turn, 
+            summarize_chunks, 
+            tool_name = self$ragnar_tool_def@name
+          )
+        },
+        tool_name = self$ragnar_tool_def@name
+      )
+      self$set_turns(turns)
+    }
+  ),
+
+  private = list(
+    callback_user_turn = function(...) {
+      result <- self$on_user_turn(self, ...)
+      if (!is.list(result)) {
+        result <- list(result)
+      }
+      result
+    }
+  )
+)
+
+ContentRagnarDocuments <- S7::new_class(
+  "ContentRagnarDocuments",
+  parent = ellmer::ContentText
+)
+
+content_get_chunks <- function(x, tool_name) {
+  value <- if (S7::S7_inherits(x, ContentRagnarDocuments)) {
+    x@text
+  } else if (S7::S7_inherits(x, ellmer::ContentToolResult)) {
+    if (x@request@name != tool_name) {
+      return(NULL)
+    }
+    x@value
+  } else {
+    return(NULL)
+  }
+  jsonlite::fromJSON(value, simplifyVector = FALSE)
+}
+
+content_set_chunks <- function(x, chunks) {
+  chunks <- jsonlite::toJSON(chunks, pretty = TRUE)
+  if (S7::S7_inherits(x, ContentRagnarDocuments)) {
+    x@text <- chunks
+  } else if (S7::S7_inherits(x, ellmer::ContentToolResult)) {
+    x@value <- chunks
+  } else {
+    stop("Unsupported content type for setting chunks.")
+  }
+  x
+}
+
+#' Applies a function to pairs of turns that represent tool calls.
+#' Otherwise keep them unchanged.
+#' @noRd
+turns_modify_tool_calls <- function(turns, fn, tool_name = NULL) {
+  i <- 1
+  while (i < length(turns)) {
+    if (turns[[i]]@role == "user") {
+      i <- i + 1; next
+    };
+    if (!is_tool_call(turns[[i]], tool_name)) {
+      i <- i + 1; next;
+    }
+
+    turns[c(i, i+1)] <- fn(turns[[i]], turns[[i+1]])
+    i <- i + 2
+  }
+  turns
+}
+
+is_tool_call <- function(x, tool_name = NULL) {
+  for (content in x@contents) {
+    if (S7::S7_inherits(content, ellmer::ContentToolRequest)) {
+      if (length(x@contents) > 1) {
+        cli::cli_warn(
+          "Tool call request found in turn with multiple contents, this may lead to unexpected results."
+        )
+      }
+      if (!is.null(tool_name) && content@tool@name != tool_name) {
+        return(FALSE)
+      }
+
+      return(TRUE)
+    }
+  }
+  FALSE
+}
+
+summarize_tool_call <- function(assistant_turn, user_turn, summarize, tool_name) {
+  assistant_turn@contents <- list(
+    ellmer::ContentText(
+      text = paste0(
+        "<Redacted by summarization tool: query=",
+        user_turn@contents[[1]]@request@arguments$query, ">"
+      )
+    )
+  )
+
+  summary <- lapply(user_turn@contents, \(x) content_get_chunks(x, tool_name)) |> 
+    unlist(recursive = FALSE) |> 
+    summarize()
+
+  user_turn@contents <- list(ellmer::ContentText(text = summary))
+
+  list(assistant_turn, user_turn)
+}
+
diff --git a/tests/testthat/helper-doc.R b/tests/testthat/helper-doc.R
index 226ce83..7aad879 100644
--- a/tests/testthat/helper-doc.R
+++ b/tests/testthat/helper-doc.R
@@ -18,3 +18,14 @@ maybe_set_threads <- function(store) {
   }
   store
 }
+
+test_store <- function() {
+  store <- ragnar_store_create(
+    embed = \(x) matrix(nrow = length(x), ncol = 100, stats::runif(100))
+  )
+  
+  doc <- test_doc()
+  chunks <- read_as_markdown(doc) |> markdown_chunk()
+  ragnar_store_insert(store, chunks)
+  ragnar_store_build_index(store)
+}
diff --git a/tests/testthat/test-ragnar-chat.R b/tests/testthat/test-ragnar-chat.R
new file mode 100644
index 0000000..d16adbc
--- /dev/null
+++ b/tests/testthat/test-ragnar-chat.R
@@ -0,0 +1,161 @@
+test_that("RagnarChat", {
+  store <- test_store()
+  chat <- chat_ragnar(\() ellmer::chat_openai(model = "gpt-4.1-nano"), store = store)
+
+  out <- chat$chat("advanced R")
+
+  # 2 turns from the chat + 2 turns of forced tool calls
+  expect_equal(length(chat$get_turns()), 2 + 2)
+
+  # pruning clears the tool calls
+  chat$turns_prune_chunks()
+  expect_equal(length(chat$get_turns()), 2)
+  
+  out <- chat$chat("advanced R")
+  # 2 turns that were already there + 2 turns of forced tool calls
+  # + user + LLM
+  expect_equal(length(chat$get_turns()), 2 + 2 + 2)
+
+  # the default pruning will clear the previous tool calls.
+  # so we end up with 6 turns + 1 pair
+  out <- chat$chat("more chatting")
+  expect_equal(length(chat$get_turns()), 6 + 2)
+
+  # prune tool calls will clear two turns
+  chat$turns_prune_tool_calls()
+  expect_equal(length(chat$get_turns()), 6)
+})
+
+test_that("Implementing query rewriting", {
+  # By default we're sending the full user input as a retrieval query.
+  # this might not be ideal, and the user may want to implement a query
+  # rewriting strategy:
+
+  query_rewriter <- function(...) {
+    # ... takes user input and returns a new query
+    "hello world"
+  }
+
+  store <- test_store()
+  chat <- chat_ragnar(
+    \() ellmer::chat_openai(model = "gpt-4.1-nano"), 
+    store = store,
+    on_user_turn = function(self, ...) {
+      
+      self$turns_prune_tool_calls()
+      self$turns_insert_tool_call_request(
+        ...,
+        query = query_rewriter(...)
+      )
+    }
+  )
+
+  out <- chat$chat("advanced R")
+  expect_equal(length(chat$get_turns()), 2 + 2)
+
+  tool_call_request <- chat$get_turns()[[2]]@contents[[1]]
+  expect_equal(tool_call_request@arguments$query, "hello world")
+})
+
+test_that("remove chunks by id works", {
+  store <- test_store()
+  chat <- chat_ragnar(
+    \() ellmer::chat_openai(model = "gpt-4.1-nano"), 
+    store = store,
+    on_user_turn = function(self, ...) {
+       self$turns_insert_tool_call_request(
+        ...,
+        query = paste(..., collapse = " ")
+      )
+    }
+  )
+
+  chat$chat("advanced R")
+  chunks <- chat$turns_list_chunks()
+  id <- chunks[[1]]$id
+
+  chat$turns_remove_chunks(id)
+
+  chunks <- chat$turns_list_chunks()
+  chunk_ids <- sapply(chunks, function(x) x$id)
+  expect_false(id %in% chunk_ids)
+
+  chat$turns_remove_chunks(chunk_ids)
+  # we removed all turns, thus the tool call request and result turns also got removed
+  expect_equal(length(chat$turns_list_chunks()), 0)
+  expect_equal(length(chat$get_turns()), 2)
+})
+
+test_that("duplicated chunks are not returned", {
+  store <- test_store()
+  chat <- chat_ragnar(
+    \() ellmer::chat_openai(model = "gpt-4.1-nano"), 
+    store = store,
+    on_user_turn = function(self, ...) {
+      self$turns_insert_tool_call_request(
+        ...,
+        query = paste(..., collapse = " ")
+      )
+    }
+  )
+
+  # chat twice adds the same query and results twice
+  chat$chat("advanced R")
+  chat$chat("advanced R")
+
+  # all ids should be unique
+  new_chunk_ids <- sapply(chat$turns_list_chunks(), function(x) x$id)
+
+  expect_equal(length(unique(new_chunk_ids)), length(new_chunk_ids))
+})
+
+test_that("Can insert chunks premptively in the user chat", {
+  store <- test_store()
+  chat <- chat_ragnar(
+    \() ellmer::chat_openai(model = "gpt-4.1-nano"), 
+    store = store,
+    on_user_turn = function(self, ...) {
+      self$turns_insert_documents(
+        ...,
+        query = paste(..., collapse = " ")
+      )
+    }
+  )
+
+  out <- chat$chat("advanced R")
+  expect_equal(length(chat$get_turns()), 2)
+  # it adds the documents into the context
+  expect_equal(length(chat$get_turns()[[1]]@contents), 2)
+
+  chat$turns_prune_chunks()
+  expect_equal(length(chat$get_turns()), 2)
+  expect_equal(length(chat$get_turns()[[1]]@contents), 1)
+})
+
+test_that("Can summarize tool calls", {
+
+  store <- test_store()
+  chat <- chat_ragnar(
+    \() ellmer::chat_openai(model = "gpt-4.1-nano"), 
+    store = store
+  )
+
+  x <- chat$chat("functional")
+
+  summarize_chunks <- function(chunks) {
+    json <- jsonlite::toJSON(chunks)
+    ellmer::chat_openai(
+      model = "gpt-4.1-nano",
+      system_prompt = "Summarize the following chunks of text.",
+      echo = FALSE
+    )$chat(json, echo = FALSE)
+  }
+
+  chat$turns_summarize_tool_calls(summarize_chunks)
+
+  turns <- chat$get_turns()
+  expect_equal(length(turns), 4) # 2 user + 2 assistant
+  expect_equal(turns[[2]]@role, "assistant")
+  expect_true(grepl("Redacted by summarization tool", turns[[2]]@contents[[1]]@text))
+  expect_true(S7::S7_inherits(turns[[3]]@contents[[1]], ellmer::ContentText))
+})
diff --git a/vignettes/articles/ragnar-chat.Rmd b/vignettes/articles/ragnar-chat.Rmd
new file mode 100644
index 0000000..c37188e
--- /dev/null
+++ b/vignettes/articles/ragnar-chat.Rmd
@@ -0,0 +1,155 @@
+---
+title: "Using ragnar_chat"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Using ragnar_chat}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(ragnar)
+```
+
+The `ragnar_chat` interface adds common RAG systems functionalities to an `ellmer::Chat`, allowing
+users to easily manage and operate on the LLM chat history, customize the search behavior and etc.
+
+This vignettes shows a few patterns for customizing the behavior of `ragnar_chat`.
+To make the examples simpler, we will create a small knowledge base with a single document and a few chunks:
+
+```{r}
+store <- ragnar_store_create(
+  embed = \(x) embed_openai(x, model = "text-embedding-3-small")
+)
+  
+doc <- "https://r4ds.hadley.nz/base-R.html"
+chunks <- ragnar_read(doc, frame_by_tags = c("h3"))
+ragnar_store_insert(store, chunks)
+ragnar_store_build_index(store)
+```
+
+## Implement Query Rewriting
+
+When the LLM queries the knowledge base, it may not use terms and keywords that are present in the knowledge base. To improve the chances of a successful query, you can implement query rewriting. It's also possible to
+use another LLM to create a few slightly different queries and combine the results into a single set.
+
+We first implement a function that takes a query and rewrites it using another LLM. Usually a smaller model is used for this task, as it is not very computationally intensive and the results are not very sensitive to the model size.
+
+```{r}
+query_rewrite <- function(query) {
+  chat <- ellmer::chat_openai(
+    model = "gpt-4.1-nano",
+    system_prompt = paste(
+      "You are a helpful assistant that rewrites queries to improve search results.",
+      "You may return a few different queries, whose results will be combined.",
+      "Queries should be concise and relevant to the original query.",
+      "You may use synonyms, rephrase, or add relevant keywords.",
+      collapse = "\n"
+    )
+  )
+  chat$chat_structured(
+    paste0("Rewrite the following query to improve search results: ", query),
+    type = ellmer::type_array(
+      ellmer::type_string(),
+      description = "Rewritten queries"
+    )
+  )
+}
+```
+
+Now we create a `ragnar_chat` object that uses the `query_rewrite` function to rewrite queries before searching the knowledge base. To do that, we customize the `.retrieve` callback. This callback takes `self` (the instance of `ragnar_chat`) and `query` (the original query) as arguments. It should return a list of results, which will be combined into a single set. The default behavior of `.retrieve` is to use `ragnar::ragnar_retrieve()` to search the knowledge base, but we will override it to use our `query_rewrite` function.
+
+```{r}
+chat <- chat_ragnar(
+  \() ellmer::chat_openai(),
+  store = store,
+  retrieve = function(self, query) {
+    queries <- query_rewrite(query)
+    cli::cli_inform(
+      i = "Rewriten queries:",
+      queries
+    )
+    queries |> 
+      purrr::map_dfr(\(q) ragnar::ragnar_retrieve(self$ragnar_store, q)) |> 
+      dplyr::distinct()
+  }
+)
+```
+
+Now we can use the `chat` object to ask questions and see how it rewrites the queries and retrieves the results:
+
+```{r}
+cat(chat$chat("What is the difference between a vector and a list in R?"))
+```
+
+## Customize retrieval results
+
+Some models are not trained to support function calls, thus inserting tool calls into the chat
+history may confuse the model and lead to incresased hallucinations. A common solution for this 
+problem is to include a pair of turns in the top of the chat history that adds relevant documents
+to the chat history, so that the model can use them to answer the question.
+
+This is implemented by customizing the `.on_user_turn` callback. This callback is called with the
+`self` (the instance of `ragnar_chat`) and `...` (the user inpuits when calling `$chat`). It should
+return a list of `ellmer::Content` obejcts that will be forwarded to the actual `$chat` call.
+
+Here's an example of how to implement this callback:
+
+```{r}
+chat <- chat_ragnar(
+  \() ellmer::chat_openai(), 
+  store = store,
+  # We explicitly avoid registering the store so the LLM is not capable of making tool
+  # calls to the store. Instead, it relies on the documents inserted at the top of
+  # the chat history to answer the question.
+  register_store = FALSE,
+  on_user_turn = function(self, ...) {
+    self$turns_prune_chunks()
+    # Inserts documents relevant to the query at the top of the chat history.
+    self$turns_insert_documents(
+      ...,
+      query = paste(..., collapse = "\n")
+    )
+  }
+)
+```
+
+```{r}
+cat(chat$chat("What is the difference between a vector and a list in R?"))
+```
+
+## Summarize tool calls
+
+To save tokens, it's common to summarize the results of the tool calls after the chunks
+were used to produce the LLM response. With this, the next message in the chat won't need
+to use the full text of the chunks, but rather a summary of the results.
+
+This can be acomplished by customizing the `on_user_turn` callback to summarise the
+results of the previous tool calls. For example:
+
+```{r}
+summarize_chunks <- function(chunks) {
+  json <- jsonlite::toJSON(chunks)
+  ellmer::chat_openai(
+    model = "gpt-4.1-nano",
+    system_prompt = "Summarize the following chunks of text.",
+    echo = FALSE
+  )$chat(json, echo = FALSE)
+}
+
+chat <- chat_ragnar(
+  \() ellmer::chat_openai(), 
+  store = store,
+  on_user_turn = function(self, ...) {
+    self$turns_summarize_tool_calls(summarize_chunks)
+    self$turns_insert_tool_call_request(..., query = paste(..., collapse = " "))
+  }
+)
+```