diff --git a/DESCRIPTION b/DESCRIPTION index 64f261e..905d8ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9012 +Version: 0.0.0.9013 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), @@ -30,4 +30,3 @@ Suggests: shiny, withr Config/testthat/edition: 3 -Config/testthat/parallel: true diff --git a/R/test-helpers.R b/R/test-helpers.R index b1d3010..59a94b8 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -19,3 +19,219 @@ Mocker <- R6::R6Class( } ) ) + +PineconeMocked <- R6::R6Class( + "PineconeMocked", + inherit = Pinecone, + public = list( + get_index_metadata = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://api.pinecone.io/indexes/", private$.index) + + response <- httr2::response_json( + body = test_fixtures[["pinecone_index_response"]] + ) + httr2::resp_body_json(response) + }, + + write_record = function(id, text, metadata = list()) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + embeddings <- private$.get_embeddings(text = text) + + metadata$text <- text + + body <- list( + namespace = private$.namespace, + vectors = list( + id = id, + values = embeddings, + metadata = metadata + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = list("upsertedCount" = 1) + ) + + response_body <- httr2::resp_body_json(response) + response_body + }, + + read_record = function(id) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("fetch") |> + httr2::req_url_query( + ids = id, + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- httr2::response_json( + body = test_fixtures[["read_record"]] + ) + + response_body <- httr2::resp_body_json(response) + results <- response_body$vectors + + results + }, + + find_records = function(query, top_k = 1) { + + embeddings <- private$.get_embeddings(query) + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + body <- list( + namespace = private$.namespace, + vector = embeddings, + topK = top_k, + includeValues = FALSE, + includeMetadata = TRUE + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("query") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = test_fixtures[["matched_records"]] + ) + + response_body <- httr2::resp_body_json(response) + results <- response_body$matches + + results |> + purrr::map(function(result) { + result$values <- NULL + result + }) + } + ), + + private = list( + .get_embeddings = function(text) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- "https://api.pinecone.io" + + body <- list( + model = "multilingual-e5-large", + parameters = list( + input_type = "passage", + truncate = "END" + ), + inputs = list( + list(text = text) + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = test_fixtures[["embeddings"]] + ) + + response_body <- httr2::resp_body_json(response) + + response_body$data[[1]]$values |> unlist() + } + ) +) + +test_fixtures <- list() + +test_fixtures[["pinecone_index_response"]] <- list( + "name" = "gitai", + "metric" = "cosine", + "dimension" = 1024L, + "status" = list( + "ready" = TRUE, + "state" = "Ready" + ), + "host" = "gitai-test-host", + "spec" = list( + "serverless" = list( + "region" = "us-east-1", + "cloud" = "aws" + ) + ) +) + +test_fixtures[["embeddings"]] <- list( + "model" = "multilingual-e5-large", + "data" = list( + list( + "values" = list( + runif(1024L, -1, 1) |> as.list() + ) + ) + ), + "usage" = list( + "total_tokens" = 78L + ) +) + +test_fixtures[["matched_records"]] <- list( + "results" = list(), + "matches" = list( + list( + "id" = "id_2", + "score" = 0.820673, + "values" = list(), + "metadata" = list( + "files" = c("test_file1", "test_file2"), + "repo_url" = "test_url", + "text" = "This package will best suite you.", + "timestamp" = Sys.Date() + ) + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 10L) +) + +test_fixtures[["read_record"]] <- list( + "vectors" = list( + "TestProject" = list( + "values" = test_fixtures[["embeddings"]][["data"]][[1]]["values"], + "metadata" = test_fixtures[["matched_records"]][["matches"]][[1]][["metadata"]] + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) diff --git a/inst/example_workflow.R b/inst/example_workflow.R new file mode 100644 index 0000000..d90acec --- /dev/null +++ b/inst/example_workflow.R @@ -0,0 +1,15 @@ +gitai_demo <- initialize_project("gitai-tests") |> + set_database(index = "gitai-mb", + namespace = "gitai-demo-2") |> + set_github_repos( + orgs = "r-world-devs" + ) |> + add_files(files = "\\.md") |> + set_llm() |> + set_prompt("Provide a one-two sentence description of the product based on input.") + +process_repos(gitai_demo) + +gitai_demo$db$find_records("Find package with which I can plot data.") + +gitai_demo$db$read_record("GitStats") diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index f18b741..7350ff1 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -1,6 +1,6 @@ test_that("getting index metadata", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -11,7 +11,7 @@ test_that("getting index metadata", { test_that("getting embeddings", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -24,7 +24,7 @@ test_that("getting embeddings", { test_that("writting records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -51,9 +51,7 @@ test_that("writting records", { test_that("finding records", { - Sys.sleep(3) - - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -68,17 +66,11 @@ test_that("finding records", { result[[1]]$metadata$text |> is.character() |> expect_true() result[[1]]$score |> is.numeric() |> expect_true() - result_2 <- db$find_records( - query = "Tell me about apple fruit.", - top_k = 1 - ) - - expect_false(result_2[[1]]$id == result[[1]]$id) }) test_that("reading records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) diff --git a/tests/testthat/test-set_database.R b/tests/testthat/test-set_database.R index f3dee7a..202c244 100644 --- a/tests/testthat/test-set_database.R +++ b/tests/testthat/test-set_database.R @@ -1,11 +1,11 @@ test_that("setting database provider with default namespace", { - + gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("gitai-demo") }) @@ -14,11 +14,11 @@ test_that("setting database provider with custom namepsace", { gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai", namespace = "test_namespace" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("test_namespace") })