Skip to content

Commit

Permalink
Merge pull request #74 from JamesHWade/fix/better-index-loading
Browse files Browse the repository at this point in the history
Fix/better index loading
  • Loading branch information
JamesHWade authored Feb 1, 2024
2 parents ddbd426 + 6180c8f commit 5d6e15c
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 15 deletions.
16 changes: 10 additions & 6 deletions R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param service The name of the service to use, default is "openai".
#' @param model The model to use, default is "gpt-4-1106-preview".
#' @param task The task to perform, default is "Permissive Chat".
#' @param embeddings The location of embeddings, default is "local".
#' @param local_embed Whether to use local embedding model. Default is FALSE.
#' @param openai_embed_model The OpenAI embeddings model to use, default is
#' "text-embedding-3-small".
#' @param local_embed_model The local embeddings model to use, default is
Expand All @@ -15,24 +15,27 @@
#' @param save_history Logical indicating whether history should be saved,
#' default is TRUE.
#' @param sources The sources to use, default is "All".
#' @param run_code Whether to execute generated code with `reprex::reprex()`,
#' default is FALSE.
#' @param persist Logical indicating whether to persist the settings, default
#' is TRUE.
#' @return Invisible NULL.
#' @export
save_user_config <- function(service = "openai",
model = "gpt-4-turbo-preview",
task = "Permissive Chat",
embeddings = TRUE,
local_embed = FALSE,
openai_embed_model = "text-embedding-3-small",
local_embed_model = "BAAI/bge-small-en-v1.5",
k_context = 4,
k_history = 4,
save_history = TRUE,
sources = "All",
run_code = FALSE,
persist = TRUE) {
ops <- tibble::tibble(
service, model, task, embeddings, openai_embed_model, local_embed_model,
k_context, k_history, sources, save_history
service, model, task, local_embed, openai_embed_model, local_embed_model,
k_context, k_history, sources, run_code, save_history
)

if (persist == TRUE) {
Expand Down Expand Up @@ -69,13 +72,14 @@ set_user_config <- function(path = NULL) {
gpttools.service = ops$service,
gpttools.model = ops$model,
gpttools.task = ops$task,
gpttools.local_embed = ops$embeddings,
gpttools.local_embed = ops$local_embed,
gpttools.k_context = ops$k_context,
gpttools.k_history = ops$k_history,
gpttools.save_history = ops$save_history,
gpttools.sources = ops$sources,
gpttools.openai_embed_model = ops$openai_embed_model,
gpttools.local_embed_model = ops$local_embed_model
gpttools.local_embed_model = ops$local_embed_model,
gpttools.run_code = ops$run_code
)
invisible(TRUE)
} else {
Expand Down
9 changes: 8 additions & 1 deletion R/embedding.R
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,11 @@ gpttools_index_all_scraped_data <- function(overwrite = FALSE,
get_top_matches <- function(index, query_embedding, k = 5) {
k <- min(k, nrow(index))
index |>
dplyr::glimpse() |>
dplyr::mutate(
similarity = purrr::map_dbl(embedding, \(x) {
cli_alert_info("query embedding: {length(query_embedding)}")
cli_alert_info("text embedding: {length(unlist(x))}")
lsa::cosine(query_embedding, unlist(x))
})
) |>
Expand Down Expand Up @@ -331,7 +334,11 @@ load_index <- function(domain, local_embeddings = FALSE) {
}

if (domain == "All") {
arrow::open_dataset(data_dir) |> tibble::as_tibble()
arrow::open_dataset(
data_dir,
factory_options = list(selector_ignore_prefixes = "local")
) |>
tibble::as_tibble()
} else {
arrow::read_parquet(glue("{data_dir}/{domain}.parquet"))
}
Expand Down
5 changes: 3 additions & 2 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
op_gpttools <- list(
gpttools.service = "openai",
gpttools.model = "gpt-4-turbo-preview",
gpttools.local_embed = TRUE,
gpttools.local_embed = FALSE,
gpttools.local_embed_model = "BAAI/bge-large-en-v1.5",
gpttools.task = "Permissive Chat",
gpttools.k_context = 4,
gpttools.k_history = 4,
gpttools.save_history = FALSE,
gpttools.sources = "All",
gpttools.openai_embed_model = "text-embedding-3-small"
gpttools.openai_embed_model = "text-embedding-3-small",
gpttools.run_code = FALSE
)

toset <- !(names(op_gpttools) %in% names(op))
Expand Down
10 changes: 6 additions & 4 deletions inst/retriever/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ ui <- page_fillable(
"local", "Local Embeddings",
choiceNames = c("Yes", "No"),
choiceValues = c(TRUE, FALSE),
selected = getOption("gpttools.local_embed"),
selected = getOption("gpttools.local_embed", FALSE),
inline = TRUE,
),
selectInput(
Expand Down Expand Up @@ -242,7 +242,7 @@ server <- function(input, output, session) {
dplyr::bind_rows()
}
} else if (input$source == "All") {
load_index(domain = "All", local_embeddings = TRUE)
load_index(domain = "All", local_embeddings = FALSE)
} else {
purrr::map(input$source, \(x) {
load_index(x, local_embeddings = FALSE) |>
Expand All @@ -253,6 +253,7 @@ server <- function(input, output, session) {
})

indices <- reactive({
req(input$local)
if (input$local == TRUE) {
list_index(dir = "index/local") |> tools::file_path_sans_ext()
} else {
Expand All @@ -269,21 +270,22 @@ server <- function(input, output, session) {
)
observe(updateSelectInput(session, "source",
choices = c("All", indices()),
selected = getOption("gpttools.sources")
selected = getOption("gpttools.sources", "All")
))
observe({
toggle_popover("settings", show = FALSE)
save_user_config(
service = input$service,
model = input$model,
task = input$task,
embeddings = input$local,
local_embed = input$local,
openai_embed_model = input$openai_embed_model,
local_embed_model = input$local_embed_model,
k_context = input$n_docs,
k_history = input$n_history,
save_history = input$save_history,
sources = input$source,
run_code = input$test_code,
persist = TRUE
)
}) |> bindEvent(input$save_settings)
Expand Down
8 changes: 6 additions & 2 deletions man/save_user_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5d6e15c

Please sign in to comment.