diff --git a/DESCRIPTION b/DESCRIPTION index 29e7c80..438bc9c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9008 +Version: 0.0.0.9009 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), @@ -11,6 +11,8 @@ License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +Depends: + R (>= 4.1.0) Imports: cli (>= 3.4.0), elmer, @@ -20,7 +22,9 @@ Imports: R6, S7, dplyr, - purrr + purrr, + rlang, + glue Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 775f2ac..1da8f0a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,3 @@ export(set_llm) export(set_prompt) export(verbose_off) export(verbose_on) -importFrom(R6,R6Class) -importFrom(httr2,with_verbosity) -importFrom(lubridate,as_datetime) diff --git a/R/GitAI-package.R b/R/GitAI-package.R index 3a32adb..ba55e3d 100644 --- a/R/GitAI-package.R +++ b/R/GitAI-package.R @@ -1,4 +1,13 @@ -#' @importFrom R6 R6Class -#' @importFrom httr2 with_verbosity -#' @importFrom lubridate as_datetime -NULL +#' Derive knowledge from GitHub or GitLab repositories with the use of AI/LLM +#' +#' @name GitAI-package +"_PACKAGE" + +#' This function is meant to fix 'Namespaces in Imports field not imported from:' R check note. +#' The note shows up when namespace is used to create package object (not function) or +#' within file marked at '.Rbuildignore' file. +missing_deps_note_fix <- function() { + R6::R6Class + elmer::chat_ollama + lubridate::as_datetime +} diff --git a/R/add_metadata.R b/R/add_metadata.R index 9f4d446..7441a47 100644 --- a/R/add_metadata.R +++ b/R/add_metadata.R @@ -17,11 +17,15 @@ add_metadata <- function(result, content) { get_repo_date <- S7::new_generic("get_repo_date", "repo_api_url") -github_repo <- S7::new_class("github_repo", - properties = list(repo = S7::class_character)) +github_repo <- S7::new_class( + "github_repo", + properties = list(repo = S7::class_character) +) -gitlab_repo <- S7::new_class("gitlab_repo", - properties = list(repo = S7::class_character)) +gitlab_repo <- S7::new_class( + "gitlab_repo", + properties = list(repo = S7::class_character) +) S7::method(get_repo_date, github_repo) <- function(repo_api_url) { repo_data <- get_response(repo_api_url@repo) @@ -29,8 +33,10 @@ S7::method(get_repo_date, github_repo) <- function(repo_api_url) { } S7::method(get_repo_date, gitlab_repo) <- function(repo_api_url) { - repo_data <- get_response(endpoint = repo_api_url@repo, - token = Sys.getenv("GITLAB_PAT")) + repo_data <- get_response( + endpoint = repo_api_url@repo, + token = Sys.getenv("GITLAB_PAT") + ) lubridate::as_datetime(repo_data$last_activity_at) } diff --git a/R/process_repos.R b/R/process_repos.R index e6e622b..d3a843b 100644 --- a/R/process_repos.R +++ b/R/process_repos.R @@ -9,10 +9,11 @@ process_repos <- function(gitai, verbose = is_verbose()) { gitstats <- gitai$gitstats - gitai$repos_metadata <- - GitStats::get_repos(gitstats, - add_contributors = FALSE, - verbose = verbose) + gitai$repos_metadata <- GitStats::get_repos( + gitstats, + add_contributors = FALSE, + verbose = verbose + ) GitStats::get_files_structure( gitstats_object = gitstats, @@ -22,30 +23,28 @@ process_repos <- function(gitai, verbose = is_verbose()) { ) files_content <- GitStats::get_files_content(gitstats, verbose = verbose) repositories <- unique(files_content$repo_name) - results <- - repositories |> - purrr::map(function(repo_name) { - if (verbose) { - cli::cli_alert_info("Processing repository: {.pkg {repo_name}}") - } + process_repo_content <- function(repo_name) { + if (verbose) { + cli::cli_alert_info("Processing repository: {.pkg {repo_name}}") + } - filtered_content <- - files_content |> - dplyr::filter(repo_name == !!repo_name) - content_to_process <- - filtered_content |> - dplyr::pull(file_content) |> - paste(collapse = "\n\n") + filtered_content <- files_content |> + dplyr::filter(repo_name == !!repo_name) + content_to_process <- filtered_content |> + dplyr::pull(file_content) |> + paste(collapse = "\n\n") - result <- process_content( - gitai = gitai, + result <- gitai |> + process_content( content = content_to_process ) |> - add_metadata( - content = filtered_content - ) + add_metadata( + content = filtered_content + ) + } - }) |> + results <- repositories |> + purrr::map(process_repo_content) |> purrr::set_names(repositories) results diff --git a/R/set_llm.R b/R/set_llm.R index 4c81a97..1a9d772 100644 --- a/R/set_llm.R +++ b/R/set_llm.R @@ -1,31 +1,44 @@ #' Set Large Language Model in `GitAI` object. +#' #' @name set_llm #' @param gitai A \code{GitAI} object. -#' @param provider A LLM provider. -#' @param model A LLM model. -#' @param seed An integer to make results more reproducible. -#' @param ... Other arguments to pass to `elmer::chat_openai()` function. +#' @param provider Name of LLM provider, a string. Results with setting up LLM using +#' \code{elmer::chat_} function. +#' @param ... Other arguments to pass to corresponding \code{elmer::chat_} function. +#' Please use \link{get_llm_defaults} to get default model arguments. #' @return A \code{GitAI} object. #' @export -set_llm <- function(gitai, - provider = "openai", - model = "gpt-4o-mini", - seed = NULL, - ...) { +set_llm <- function(gitai, provider = "openai", ...) { - if (provider == "openai") { + provider_method <- rlang::env_get( + env = asNamespace("elmer"), + nm = glue::glue("chat_{provider}") + ) + provider_args <- purrr::list_modify( + get_llm_defaults(provider), + !!!rlang::dots_list(...) + ) - gitai$llm <- elmer::chat_openai( - model = model, - echo = "none", - seed = seed, - ... - ) - } + gitai$llm <- rlang::exec(provider_method, !!!provider_args) invisible(gitai) } +llm_default_args <- list( + openai = list(model = "gpt-4o-mini", seed = NULL, echo = "none"), + ollama = list(model = "llama3.2", seed = NULL), + bedrock = list(model = "anthropic.claude-3-5-sonnet-20240620-v1:0") +) + +#' @rdname set_llm +get_llm_defaults <- function(provider) { + llm_defaults <- llm_default_args[[provider]] + if (!is.null(llm_defaults)) { + return(llm_defaults) + } + list() +} + #' Set prompt. #' @name set_prompt #' @param gitai A \code{GitAI} object. diff --git a/man/GitAI-package.Rd b/man/GitAI-package.Rd new file mode 100644 index 0000000..873b1a0 --- /dev/null +++ b/man/GitAI-package.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GitAI-package.R +\docType{package} +\name{GitAI-package} +\alias{GitAI} +\alias{GitAI-package} +\title{Derive knowledge from GitHub or GitLab repositories with the use of AI/LLM} +\description{ +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + +Scan multiple Git repositories, pull specified files content and process it with Large Language Models. You can summarize the content in specific way, extract information and data, or find answers to your questions about the repositories. +} +\author{ +\strong{Maintainer}: Kamil Wais \email{kamil.wais@gmail.com} + +Authors: +\itemize{ + \item Krystian Igras \email{krystian8207@gmail.com} + \item Maciej Banas \email{banasmaciek@gmail.com} +} + +} diff --git a/man/missing_deps_note_fix.Rd b/man/missing_deps_note_fix.Rd new file mode 100644 index 0000000..4eccab3 --- /dev/null +++ b/man/missing_deps_note_fix.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GitAI-package.R +\name{missing_deps_note_fix} +\alias{missing_deps_note_fix} +\title{This function is meant to fix 'Namespaces in Imports field not imported from:' R check note. +The note shows up when namespace is used to create package object (not function) or +within file marked at '.Rbuildignore' file.} +\usage{ +missing_deps_note_fix() +} +\description{ +This function is meant to fix 'Namespaces in Imports field not imported from:' R check note. +The note shows up when namespace is used to create package object (not function) or +within file marked at '.Rbuildignore' file. +} diff --git a/man/set_llm.Rd b/man/set_llm.Rd index 66e20b0..11e57f6 100644 --- a/man/set_llm.Rd +++ b/man/set_llm.Rd @@ -2,20 +2,21 @@ % Please edit documentation in R/set_llm.R \name{set_llm} \alias{set_llm} +\alias{get_llm_defaults} \title{Set Large Language Model in \code{GitAI} object.} \usage{ -set_llm(gitai, provider = "openai", model = "gpt-4o-mini", seed = NULL, ...) +set_llm(gitai, provider = "openai", ...) + +get_llm_defaults(provider) } \arguments{ \item{gitai}{A \code{GitAI} object.} -\item{provider}{A LLM provider.} - -\item{model}{A LLM model.} - -\item{seed}{An integer to make results more reproducible.} +\item{provider}{Name of LLM provider, a string. Results with setting up LLM using +\code{elmer::chat_} function.} -\item{...}{Other arguments to pass to \code{elmer::chat_openai()} function.} +\item{...}{Other arguments to pass to corresponding \code{elmer::chat_} function. +Please use \link{get_llm_defaults} to get default model arguments.} } \value{ A \code{GitAI} object. diff --git a/tests/testthat.R b/tests/testthat.R index c35d6e0..5d8d06d 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -7,6 +7,7 @@ # * https://testthat.r-lib.org/articles/special-files.html library(testthat) +library(rlang) library(GitAI) test_check("GitAI") diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index ca930d0..1f8ac48 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1 +1,86 @@ test_mocker <- Mocker$new() + +# Override other methods when needed in the future +ChatMocked <- R6::R6Class( + "ChatMocked", + inherit = elmer:::Chat, + public = list( + chat = function(..., echo = NULL) { + if (self$get_system_prompt() == "You always return only 'Hi there!'") { + return("Hi there!") + } + } + ) +) + +# This method allows to skip original checks (e.g. for api or other args structure) and returns +# object of class ChatMocked that we can modify for our testing purposes. +mock_chat_method <- function(turns = NULL, + echo = c("none", "text", "all"), + ..., + provider_class) { + + provider_args <- rlang::dots_list(...) + provider <- rlang::exec(provider_class, !!!provider_args) + + ChatMocked$new(provider = provider, turns = turns, echo = echo) +} + +chat_openai_mocked <- function(system_prompt = NULL, + turns = NULL, + base_url = "https://api.mocked.com/v1", + api_key = "mocked_key", + model = NULL, + seed = NULL, + api_args = list(), + echo = c("none", "text", "all")) { + + turns <- elmer:::normalize_turns(turns, system_prompt) + model <- elmer:::set_default(model, "gpt-4o") + echo <- elmer:::check_echo(echo) + + if (is.null(seed)) { + seed <- 1014 + } + + mock_chat_method( + turns = turns, + echo = echo, + base_url = base_url, + model = model, + seed = seed, + extra_args = api_args, + api_key = api_key, + provider_class = elmer:::ProviderOpenAI + ) +} + +chat_bedrock_mocked <- function(system_prompt = NULL, + turns = NULL, + model = NULL, + profile = NULL, + echo = NULL) { + + credentials <- list( + access_key_id = "access_key_id_mocked", + secret_access_key = "access_key_id_mocked", + session_token = "session_token_mocked", + access_token = "access_token_mocked", + expiration = as.numeric(Sys.time() + 3600), + region = "eu-central-1" + ) + + turns <- elmer:::normalize_turns(turns, system_prompt) + model <- elmer:::set_default(model, "model_bedrock") + echo <- elmer:::check_echo(echo) + + mock_chat_method( + turns = turns, + echo = echo, + base_url = "", + model = model, + profile = profile, + credentials = credentials, + provider_class = elmer:::ProviderBedrock + ) +} diff --git a/tests/testthat/test-add_files.R b/tests/testthat/test-add_files.R index e0fbd12..97a1316 100644 --- a/tests/testthat/test-add_files.R +++ b/tests/testthat/test-add_files.R @@ -1,29 +1,26 @@ test_that("add_files adds file_paths to GitAI settings", { my_project <- initialize_project("gitai_test_project") - my_project <- - my_project |> + + my_project <- my_project |> add_files(files = "DESCRIPTION") expect_equal("DESCRIPTION", my_project$files) - my_project <- - my_project |> + + my_project <- my_project |> add_files(files = c("LICENSE", "project_metadata.yaml")) expect_equal(c("LICENSE", "project_metadata.yaml"), my_project$files) }) test_that("add_files adds file_types to GitAI settings", { my_project <- initialize_project("gitai_test_project") - my_project <- - my_project |> + my_project <- my_project |> add_files(files = "*.md") expect_equal("*.md", my_project$files) }) test_that("add_files returns error when other than character type is passed", { - my_project <- initialize_project("gitai_test_project") expect_snapshot_error( - my_project <- - my_project |> + my_project <- my_project |> add_files(files = 12345) ) }) diff --git a/tests/testthat/test-add_metadata.R b/tests/testthat/test-add_metadata.R index 8124953..ea2ea09 100644 --- a/tests/testthat/test-add_metadata.R +++ b/tests/testthat/test-add_metadata.R @@ -10,8 +10,8 @@ test_that("metadata is added to content", { api_url = c("test_URL", "test_URL") ) testthat::with_mocked_bindings({ - result_with_metadata <- - test_mocker$use("result") |> + result_with_metadata <- "result" |> + test_mocker$use() |> add_metadata( content = mocked_files_content ) diff --git a/tests/testthat/test-initialize_project.R b/tests/testthat/test-initialize_project.R index 4931207..61cc1fa 100644 --- a/tests/testthat/test-initialize_project.R +++ b/tests/testthat/test-initialize_project.R @@ -1,16 +1,10 @@ test_that("project can be initialized", { - expect_error(initialize_project()) test_project_id <- "gitai_test_project" - my_project <- initialize_project(project_id = test_project_id) - expect_true("R6" %in% class(my_project)) expect_true("GitAI" %in% class(my_project)) - - expect_equal(my_project$project_id, - test_project_id) - + expect_equal(my_project$project_id, test_project_id) expect_null(my_project$llm) }) diff --git a/tests/testthat/test-process_content.R b/tests/testthat/test-process_content.R index aadf542..a5828f3 100644 --- a/tests/testthat/test-process_content.R +++ b/tests/testthat/test-process_content.R @@ -1,48 +1,43 @@ test_that("processing content have proper output structure", { - - my_project <- - initialize_project("gitai_test_project") |> + my_project <- initialize_project("gitai_test_project") |> set_llm() |> set_prompt(system_prompt = "Say 'Hi there!' only and nothing else.") result <- process_content(gitai = my_project, content = "") - - expect_equal(result$text, - "Hi there!") - - result$tokens |> is.numeric() |> expect_true() - result$output |> is.list() |> expect_true() - result$content_nchars |> is.numeric() |> expect_true() - result$text |> is.character() |> expect_true() + expect_equal(result$text, "Hi there!") + expect_true(is.numeric(result$tokens)) + expect_true(is.list(result$output)) + expect_true(is.numeric(result$content_nchars)) + expect_true(is.character(result$text)) }) test_that("processing a single file content with deterministic output", { - - my_project <- - initialize_project("gitai_test_project") |> + my_project <- initialize_project("gitai_test_project") |> set_llm(seed = 1014, api_args = list(temperature = 0)) |> set_prompt(system_prompt = "Summarize provided conent with one, short sentence.") - - test_content <- paste0( - "Artificial intelligence (AI) plays a crucial role in transforming industries", - "by automating repetitive tasks and enhancing productivity. It enables personalized experiences", - "in sectors like healthcare, finance, and entertainment by analyzing vast amounts of data. AI algorithms", - "assist in decision-making processes by providing insights that humans may overlook. In addition,", - "AI is vital for advancements in technologies such as self-driving cars and smart home devices. Overall,", - "AI acts as a powerful tool for innovation, driving efficiencies, and solving complex problems." - ) - + test_content <- r"( + Artificial intelligence (AI) plays a crucial role in transforming industries + by automating repetitive tasks and enhancing productivity. It enables personalized experiences + in sectors like healthcare, finance, and entertainment by analyzing vast amounts of data. AI algorithms + assist in decision-making processes by providing insights that humans may overlook. In addition, + AI is vital for advancements in technologies such as self-driving cars and smart home devices. Overall, + AI acts as a powerful tool for innovation, driving efficiencies, and solving complex problems. + )" httr2::with_verbosity(verbosity = 0, { - result <- process_content(gitai = my_project, - content = test_content) + result <- process_content( + gitai = my_project, + content = test_content + ) }) expect_length(gregexpr("\\.", result$text)[[1]], 1) - - expect_equal(result$text, - process_content(gitai = my_project, content = test_content)$text) - - expect_equal(result$text, - process_content(gitai = my_project, content = test_content)$text) + expect_equal( + result$text, + process_content(gitai = my_project, content = test_content)$text + ) + expect_equal( + result$text, + process_content(gitai = my_project, content = test_content)$text + ) test_mocker$cache(result) }) diff --git a/tests/testthat/test-process_repos.R b/tests/testthat/test-process_repos.R index 7da49f8..fc8767a 100644 --- a/tests/testthat/test-process_repos.R +++ b/tests/testthat/test-process_repos.R @@ -1,22 +1,22 @@ test_that("process_repos() returns results with repo metadata", { - verbose_off() - my_project <- - initialize_project("gitai_test_project") |> + my_project <- initialize_project("gitai_test_project") |> set_github_repos( repos = c("r-world-devs/GitStats", "openpharma/DataFakeR") ) |> add_files(files = "README.md") |> set_llm() |> set_prompt(system_prompt = "Summarize the user content if one sentence.") - results <- my_project |> process_repos() - results |> is.list() |> expect_true() - results |> names() |> expect_equal(c("GitStats", "DataFakeR")) - - results |> purrr::map(~ nchar(.x$text) > 10) |> unlist() |> all() |> expect_true() - + expect_true(is.list(results)) + expect_equal(c("GitStats", "DataFakeR"), names(results)) + expect_true( + results |> + purrr::map(~ nchar(.x$text) > 10) |> + unlist() |> + all() + ) results |> purrr::walk(~ expect_true("metadata" %in% names(.))) }) diff --git a/tests/testthat/test-set_llm.R b/tests/testthat/test-set_llm.R index d832a54..4f05ae8 100644 --- a/tests/testthat/test-set_llm.R +++ b/tests/testthat/test-set_llm.R @@ -1,33 +1,126 @@ +# integration tests + test_that("setting LLM ", { + my_project <- initialize_project("gitai_test_project") |> set_llm() + expect_true("Chat" %in% class(my_project$llm)) + expect_null(my_project$llm$system_prompt) +}) +test_that("setting system prompt", { my_project <- initialize_project("gitai_test_project") + expect_error( + my_project |> set_prompt(system_prompt = "You always return only 'Hi there!'") + ) - my_project <- - my_project |> - set_llm() + my_project <- my_project |> + set_llm() |> + set_prompt(system_prompt = "You always return only 'Hi there!'") + expect_equal( + my_project$llm$get_system_prompt(), + "You always return only 'Hi there!'" + ) + expect_equal( + my_project$llm$chat("Hi"), + "Hi there!" + ) +}) +# mocked llm and provider tests + +test_that("setting LLM with default provider ", { + my_project <- initialize_project("gitai_test_project") + testthat::local_mocked_bindings( + chat_openai = chat_openai_mocked, + .package = "elmer" + ) + my_project <- my_project |> set_llm() expect_true("Chat" %in% class(my_project$llm)) + expect_in( + "elmer::ProviderOpenAI", + class(my_project$llm$.__enclos_env__$private$provider) + ) +}) + +test_that("setting LLM with non-default provider ", { + my_project <- initialize_project("gitai_test_project") + testthat::local_mocked_bindings( + chat_bedrock = chat_bedrock_mocked, + .package = "elmer" + ) + my_project <- my_project |> set_llm(provider = "bedrock") + expect_in( + "elmer::ProviderBedrock", + class(my_project$llm$.__enclos_env__$private$provider) + ) +}) + +test_that("setting LLM with non-existing provider ", { + my_project <- initialize_project("gitai_test_project") + expect_error( + my_project <- my_project |> set_llm(provider = "non_existing_provider"), + "Can't find `chat_non_existing_provider` in environment" + ) +}) + +test_that("setting arguments for selected provider ", { + my_project <- initialize_project("gitai_test_project") + testthat::local_mocked_bindings( + chat_openai = chat_openai_mocked, + .package = "elmer" + ) + + # Provider-related argument overrides the default from `llm_default_args` + my_project <- my_project |> + set_llm(provider = "openai", model = "model_mocked") + expect_equal( + my_project$llm$.__enclos_env__$private$provider@model, + "model_mocked" + ) + + # Provider-related, non-default argument (not included within `llm_default_args`) is properly set + my_project <- my_project |> + set_llm(provider = "openai", api_key = "api_key_mocked") + expect_equal( + my_project$llm$.__enclos_env__$private$provider@api_key, + "api_key_mocked" + ) + + # Chat-related, non-default argument (not included within `llm_default_args`) is properly set + my_project <- my_project |> + set_llm(provider = "openai", echo = "all") + expect_equal( + my_project$llm$.__enclos_env__$private$echo, + "all" + ) +}) + +test_that("setting LLM without system prompt", { + testthat::local_mocked_bindings( + chat_openai = chat_openai_mocked, + .package = "elmer" + ) + my_project <- initialize_project("gitai_test_project") |> set_llm() expect_null(my_project$llm$system_prompt) }) test_that("setting system prompt", { + testthat::local_mocked_bindings( + chat_openai = chat_openai_mocked, + .package = "elmer" + ) my_project <- initialize_project("gitai_test_project") - expect_error( my_project |> set_prompt(system_prompt = "You always return only 'Hi there!'") ) - my_project <- - my_project |> + my_project <- my_project |> set_llm() |> set_prompt(system_prompt = "You always return only 'Hi there!'") - expect_equal( my_project$llm$get_system_prompt(), "You always return only 'Hi there!'" ) - expect_equal( my_project$llm$chat("Hi"), "Hi there!" diff --git a/tests/testthat/test-set_repos.R b/tests/testthat/test-set_repos.R index 4c5d0cb..d6f3e3c 100644 --- a/tests/testthat/test-set_repos.R +++ b/tests/testthat/test-set_repos.R @@ -1,19 +1,13 @@ test_that("set_*_repos creates GitStats object inside GitAI with repos set", { verbose_off() - my_project <- initialize_project("gitai_test_project") - my_project <- - my_project |> - set_github_repos( - repos = c("r-world-devs/GitStats", "openpharma/DataFakeR") - ) + my_project <- my_project |> + set_github_repos(repos = c("r-world-devs/GitStats", "openpharma/DataFakeR")) expect_true("GitStats" %in% class(my_project$gitstats)) - my_project <- - my_project |> - set_gitlab_repos( - repos = "mbtests/gitstatstesting" - ) + + my_project <- my_project |> + set_gitlab_repos(repos = "mbtests/gitstatstesting") expect_length( my_project$gitstats$.__enclos_env__$private$hosts, 2