Merge pull request #73 from JamesHWade/fix/better-context-checking

Fix/better context checking
JamesHWade · Feb 1, 2024 · ddbd426 · ddbd426
2 parents be76e47 + 500420d
commit ddbd426
Show file tree

Hide file tree

Showing 7 changed files with 26 additions and 35 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -28,10 +28,7 @@ repos:
     -   id: parsable-R
     -   id: no-browser-statement
     -   id: no-debug-statement
-    # -   id: deps-in-desc
-    #     args: [--warn_only]
-    # -   id: pkgdown
-    #     args: [--warn_only]
+    -   id: no-print-statement
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:

diff --git a/R/document_data.R b/R/document_data.R
@@ -88,5 +88,5 @@ summarize_data <- function(data,
 prep_data_prompt <- function(data, method, prompt) {
   summarized_data <- summarize_data(data = data, method = method)
 
-  paste(testthat::capture_output(print(summarized_data)), prompt, sep = "\n")
+  paste(testthat::capture_output(cat_print(summarized_data)), prompt, sep = "\n")
 }
diff --git a/R/embedding-py.R b/R/embedding-py.R
@@ -99,7 +99,6 @@ colbert_rerank <- function(documents, model_name = "colbert-ir/colbertv2.0") {
     )
   }
 
-  print(paste0("Took ", time$time() - start, " seconds to re-rank documents with ColBERT."))
   sorted_data <- scores[order(sapply(scores, function(x) x$score), decreasing = TRUE)]
 }
 

diff --git a/R/embedding.R b/R/embedding.R
@@ -3,9 +3,8 @@ prepare_scraped_files <- function(domain) {
   scraped <-
     arrow::read_parquet(glue("{scraped_dir}/text/{domain}.parquet"))
 
-  if (max(scraped$n_words) > 2e5) {
+  if (max(scraped$n_words) > 1e6) {
     max_index <- scraped[which.max(scraped$n_words), ]
-    print(max_index |> dplyr::select(-text))
     cli_alert_warning(
       c(
         "!" = "Entry {max_index$link} of {domain} has at least 200,000 words.",

diff --git a/R/history.R b/R/history.R
@@ -63,7 +63,7 @@ delete_history <- function(local = FALSE) {
     if (delete_file) {
       file.remove(x)
     } else {
-      cli_inform("{x} was **not** deleted.")
+      cli_alert_info("{x} was **not** deleted.")
     }
   })
   invisible()
@@ -137,11 +137,11 @@ get_query_context <- function(query_embedding, full_context, k) {
 
 check_context <- function(context) {
   if (rlang::is_null(context)) {
-    cli_warn(
+    cli_alert_warning(
       "You specified that context should be added but none was provided."
     )
   } else if (!is.data.frame(context)) {
-    cli_warn(
+    cli_alert_warning(
       "You passed a {class(context)} to but a data.frame was expected."
     )
   }
@@ -209,15 +209,15 @@ chat_with_context <- function(query,
   )
 
   if (rlang::is_true(add_context) || rlang::is_true(add_history)) {
-    cli_inform("Creating embedding from query.")
+    cli_alert_info("Creating embedding from query.")
     query_embedding <- get_query_embedding(query,
       local = local,
       model = embedding_model
     )
   }
 
   if (rlang::is_true(add_context) && rlang::is_true(need_context)) {
-    cli_inform("Attempting to add context to query.")
+    cli_alert_info("Attempting to add context to query.")
     full_context <-
       get_query_context(
         query_embedding,
@@ -242,8 +242,8 @@ chat_with_context <- function(query,
   }
 
   if (rlang::is_true(add_history) && rlang::is_true(need_context)) {
-    cli_inform("Attempting to add chat history to query.")
-    cli_inform("Chat history: {class(chat_history)}")
+    cli_alert_info("Attempting to add chat history to query.")
+    cli_alert_info("Chat history: {class(chat_history)}")
     if (rlang::is_null(chat_history)) {
       related_history <- "No related history found."
     } else {
@@ -258,7 +258,7 @@ chat_with_context <- function(query,
         paste(collapse = "\n\n")
     }
   } else {
-    cli_inform("Not attempting to add chat history to query.")
+    cli_alert_info("Not attempting to add chat history to query.")
     related_history <- "No related history found."
   }
 
@@ -296,6 +296,10 @@ chat_with_context <- function(query,
     )
 
   prompt_context <- list(
+    list(
+      role = "system",
+      content = "You provide succinct, concise, and accurate responses."
+    ),
     list(
       role = "user",
       content = glue("---\nContext:\n{context}\n---")
@@ -340,10 +344,8 @@ chat_with_context <- function(query,
     purrr::map_chr(.f = "content") |>
     paste(collapse = "\n\n")
 
-  cat(simple_prompt, "\n\n")
-
-  cli_inform("Service: {service}")
-  cli_inform("Model: {model}")
+  cli_alert_info("Service: {service}")
+  cli_alert_info("Model: {model}")
 
   answer <- gptstudio::chat(
     prompt = simple_prompt,
@@ -384,20 +386,17 @@ is_context_needed <- function(user_prompt,
                               service = getOption("gpttools.service"),
                               model = getOption("gpttools.model")) {
   prompt <-
-    glue::glue("Consider if additional context or history is necessary to
-               ccurately respond to this user prompt. Useful context may include
-               recent information, package documentation, textbook excerpts, or
+    glue::glue("Consider if additional context or history would be useful to
+               accurately respond to this user prompt. Useful context may include
+               information like package documentation, textbook excerpts, or
                other relevant details.
 
                Respond with TRUE if such context is likely to enhance the
-               response, especially for queries involving recent developments,
-               technical subjects, or complex topics. Respond with FALSE if the
-               query seems straightforward or well within the AI's existing
+               response. Respond with FALSE only if the
+               query seems straightforward and well within your existing
                knowledge base.
 
-               Remember, the AI's training includes data only up to a few
-               months ago. If the query might relate to developments after this
-               period, lean towards TRUE.
+               Most queries benefit from additional context.
 
                Respond ONLY with TRUE or FALSE.
                \n\n{user_prompt}")

diff --git a/R/transcribe.R b/R/transcribe.R
@@ -54,8 +54,6 @@ transcribe_audio_chunk <-
 
     result <- httr::content(response, "parsed", "application/json")
 
-    print(result)
-
     file.remove(tmp_file)
 
     return(result)

diff --git a/inst/retriever/app.R b/inst/retriever/app.R
@@ -53,7 +53,6 @@ make_chat_history <- function(chats) {
       )
     }) |>
     purrr::list_flatten()
-  print(history)
   history
 }
 
@@ -116,7 +115,7 @@ ui <- page_fillable(
             selected = getOption("gpttools.service", "openai")
           ),
           selectInput("model", "Model",
-                      choices = NULL
+            choices = NULL
           ),
           selectInput(
             "embed_model", "OpenAI Embedding Model",
@@ -269,8 +268,8 @@ server <- function(input, output, session) {
     )
   )
   observe(updateSelectInput(session, "source",
-                            choices = c("All", indices()),
-                            selected = getOption("gpttools.sources")
+    choices = c("All", indices()),
+    selected = getOption("gpttools.sources")
   ))
   observe({
     toggle_popover("settings", show = FALSE)