diff --git a/.ci/r_tests.sh b/.ci/r_tests.sh index fc135397..30a26334 100755 --- a/.ci/r_tests.sh +++ b/.ci/r_tests.sh @@ -14,7 +14,9 @@ if [[ $OS_NAME == "macos-latest" ]]; then echo 'options(install.packages.check.source = "no")' >> .Rprofile else tlmgr --verify-repo=none update --self - tlmgr --verify-repo=none install ec + tlmgr --verify-repo=none install ec hyperref iftex infwarerr kvoptions pdftexcmds + + echo "Sys.setenv(RETICULATE_PYTHON = '$CONDA_PREFIX/bin/python')" >> .Rprofile fi R_LIB_PATH=$HOME/R @@ -26,6 +28,13 @@ echo "R_LIBS=$R_LIB_PATH" > .Renviron export _R_CHECK_CRAN_INCOMING_=0 export _R_CHECK_CRAN_INCOMING_REMOTE_=0 +# increase the allowed time to run the examples +export _R_CHECK_EXAMPLE_TIMING_THRESHOLD_=30 + +# fix the 'unable to verify current time' NOTE +# see: https://stackoverflow.com/a/63837547/8302386 +export _R_CHECK_SYSTEM_CLOCK_=0 + if [[ $OS_NAME == "macos-latest" ]]; then Rscript -e "install.packages('devtools', dependencies = TRUE, repos = 'https://cran.r-project.org')" fi diff --git a/.ci/r_tests_windows.ps1 b/.ci/r_tests_windows.ps1 index a1992ea8..5378107c 100644 --- a/.ci/r_tests_windows.ps1 +++ b/.ci/r_tests_windows.ps1 @@ -24,6 +24,13 @@ Remove-Item C:\rtools40 -Force -Recurse -ErrorAction Ignore $env:_R_CHECK_CRAN_INCOMING_ = 0 $env:_R_CHECK_CRAN_INCOMING_REMOTE_ = 0 +# increase the allowed time to run the examples +$env:_R_CHECK_EXAMPLE_TIMING_THRESHOLD_ = 30 + +# fix the 'unable to verify current time' NOTE +# see: https://stackoverflow.com/a/63837547/8302386 +$env:_R_CHECK_SYSTEM_CLOCK_ = 0 + $R_VER = "4.0.4" $ProgressPreference = "SilentlyContinue" # progress bar bug extremely slows down download speed Invoke-WebRequest -Uri https://cloud.r-project.org/bin/windows/base/old/$R_VER/R-$R_VER-win.exe -OutFile R-win.exe -MaximumRetryCount 3 diff --git a/R-package/.gitignore b/R-package/.gitignore deleted file mode 100644 index 5b6a0652..00000000 --- a/R-package/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 70683b14..2da7012c 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: RGF Type: Package Title: Regularized Greedy Forest -Version: 1.0.6.3 -Date: 2019-12-12 +Version: 1.0.7 +Date: 2021-03-17 Authors@R: c( person("Lampros", "Mouselimis", email = "mouselimislampros@gmail.com", role = c("aut", "cre")), person("Ryosuke", "Fukatani", role = "cph", comment = "Author of the python wrapper of the 'Regularized Greedy Forest' machine learning algorithm"), person("Nikita", "Titov", role = "cph", comment = "Author of the python wrapper of the 'Regularized Greedy Forest' machine learning algorithm"), person("Tong", "Zhang", role = "cph", comment = "Author of the 'Regularized Greedy Forest' and of the Multi-core implementation of Regularized Greedy Forest machine learning algorithm"), person("Rie", "Johnson", role = "cph", comment = "Author of the 'Regularized Greedy Forest' machine learning algorithm") ) Maintainer: Lampros Mouselimis BugReports: https://github.com/RGF-team/rgf/issues @@ -21,5 +21,5 @@ Suggests: rmarkdown Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 VignetteBuilder: knitr diff --git a/R-package/NEWS.md b/R-package/NEWS.md index 3f7b288c..b6532727 100644 --- a/R-package/NEWS.md +++ b/R-package/NEWS.md @@ -2,6 +2,10 @@ * We've modified the *package.R* file so that messages are printed to the console whenever Python or any of the required modules is not available. Moreover, for the R-package testing the conda environment parameter is adjusted ( this applies to the RGF-team Github repository and not to the CRAN package directly ) * We've modified the *.appveyor.yml* file to return the *artifacts* in order to observe if tests ran successfully ( this applies to the RGF-team Github repository and not to the CRAN package directly ) * We've added tests to increase the code coverage. +* We've dropped support for Python 2.7 +* We've fixed also the invalid URL's in the README.md file +* We removed the 'zzz.R' file which included the message: 'Beginning from version 1.0.3 the 'dgCMatrix_2scipy_sparse' function was renamed to 'TO_scipy_sparse' and now accepts either a 'dgCMatrix' or a 'dgRMatrix' as input. The appropriate format for the 'RGF' package in case of sparse matrices is the 'dgCMatrix' format (scipy.sparse.csc_matrix)' as after 4 version updates is no longer required +* We've modified the '.onLoad' function in the 'package.R' file by removing 'reticulate::py_available(initialize = TRUE)' which forces reticulate to initialize Python and gives the following NOTE on CRAN 'Warning in system2(command = python, args = shQuote(config_script), stdout = TRUE, : ..."' had status 2' (see: https://github.com/rstudio/reticulate/issues/730#issuecomment-594365528) ## RGF 1.0.6 diff --git a/R-package/R/FastRGF_Classifier.R b/R-package/R/FastRGF_Classifier.R index b13093df..ea4d375d 100644 --- a/R-package/R/FastRGF_Classifier.R +++ b/R-package/R/FastRGF_Classifier.R @@ -51,7 +51,7 @@ #' min_child_weight = 5.0, data_l2 = 2.0, #' sparse_max_features = 80000, #' sparse_min_occurences = 5, -#' calc_prob="sigmoid", n_jobs = 1, +#' calc_prob = "sigmoid", n_jobs = 1, #' verbose = 0)}}{} #' #' \item{\code{--------------}}{} @@ -89,25 +89,29 @@ #' # min_child_weight = 5.0, data_l2 = 2.0, #' # sparse_max_features = 80000, #' # sparse_min_occurences = 5, -#' # calc_prob="sigmoid", n_jobs = 1, +#' # calc_prob = "sigmoid", n_jobs = 1, #' # verbose = 0) #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("rgf.sklearn")) { #' -#' library(RGF) +#' library(RGF) #' -#' set.seed(1) -#' x = matrix(runif(100000), nrow = 100, ncol = 1000) +#' set.seed(1) +#' x = matrix(runif(100000), nrow = 100, ncol = 1000) #' -#' y = sample(1:2, 100, replace = TRUE) +#' y = sample(1:2, 100, replace = TRUE) #' -#' fast_RGF_class = FastRGF_Classifier$new(max_leaf = 50) +#' fast_RGF_class = FastRGF_Classifier$new(max_leaf = 50) #' -#' fast_RGF_class$fit(x, y) +#' fast_RGF_class$fit(x, y) #' -#' preds = fast_RGF_class$predict_proba(x) -#' } +#' preds = fast_RGF_class$predict_proba(x) +#' } +#' } +#' }, silent = TRUE) FastRGF_Classifier <- R6::R6Class( "FastRGF_Classifier", diff --git a/R-package/R/FastRGF_Regressor.R b/R-package/R/FastRGF_Regressor.R index c74dfba0..c72a0182 100644 --- a/R-package/R/FastRGF_Regressor.R +++ b/R-package/R/FastRGF_Regressor.R @@ -83,21 +83,25 @@ #' # n_jobs = 1, verbose = 0) #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("rgf.sklearn")) { #' -#' library(RGF) +#' library(RGF) #' -#' set.seed(1) -#' x = matrix(runif(100000), nrow = 100, ncol = 1000) +#' set.seed(1) +#' x = matrix(runif(100000), nrow = 100, ncol = 1000) #' -#' y = runif(100) +#' y = runif(100) #' -#' fast_RGF_regr = FastRGF_Regressor$new(max_leaf = 50) +#' fast_RGF_regr = FastRGF_Regressor$new(max_leaf = 50) #' -#' fast_RGF_regr$fit(x, y) +#' fast_RGF_regr$fit(x, y) #' -#' preds = fast_RGF_regr$predict(x) -#' } +#' preds = fast_RGF_regr$predict(x) +#' } +#' } +#' }, silent = TRUE) FastRGF_Regressor <- R6::R6Class( "FastRGF_Regressor", diff --git a/R-package/R/RGF_Classifier.R b/R-package/R/RGF_Classifier.R index 4fb8c7f3..abc28789 100644 --- a/R-package/R/RGF_Classifier.R +++ b/R-package/R/RGF_Classifier.R @@ -22,7 +22,7 @@ #' @param memory_policy a character string. One of \emph{"conservative"} (it uses less memory at the expense of longer runtime. Try only when with default value it uses too much memory) or \emph{"generous"} (it runs faster using more memory by keeping the sorted orders of the features on memory for reuse). Memory using policy. #' @param verbose an integer. Controls the verbosity of the tree building process. #' @param init_model either NULL or a character string, optional (default=NULL). Filename of a previously saved model from which training should do warm-start. If model has been saved into multiple files, do not include numerical suffixes in the filename. \emph{NOTE:} Make sure you haven't forgotten to increase the value of the max_leaf parameter regarding to the specified warm-start model because warm-start model trees are counted in the overall number of trees. -#' @param filename a character string specifying a valid path to a file where the fitted model should be saved +#' @param filename a character string specifying a valid path to a file where the fitted model should be saved #' @export #' @details #' @@ -41,7 +41,7 @@ #' the \emph{feature_importances} function returns the feature importances for the data. #' #' the \emph{dump_model} function currently prints information about the fitted model in the console -#' +#' #' the \emph{save_model} function saves a model to a file from which training can do warm-start in the future. #' #' @references \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Rie Johnson and Tong Zhang, Learning Nonlinear Functions Using Regularized Greedy Forest} @@ -93,7 +93,7 @@ #' \item{\code{dump_model()}}{} #' #' \item{\code{--------------}}{} -#' +#' #' \item{\code{save_model(filename)}}{} #' #' \item{\code{--------------}}{} @@ -109,21 +109,25 @@ #' # verbose = 0, init_model = NULL) #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("rgf.sklearn")) { #' -#' library(RGF) +#' library(RGF) #' -#' set.seed(1) -#' x = matrix(runif(1000), nrow = 100, ncol = 10) +#' set.seed(1) +#' x = matrix(runif(1000), nrow = 100, ncol = 10) #' -#' y = sample(1:2, 100, replace = TRUE) +#' y = sample(1:2, 100, replace = TRUE) #' -#' RGF_class = RGF_Classifier$new(max_leaf = 50) +#' RGF_class = RGF_Classifier$new(max_leaf = 50) #' -#' RGF_class$fit(x, y) +#' RGF_class$fit(x, y) #' -#' preds = RGF_class$predict_proba(x) -#' } +#' preds = RGF_class$predict_proba(x) +#' } +#' } +#' }, silent = TRUE) RGF_Classifier <- R6::R6Class( "RGF_Classifier", diff --git a/R-package/R/RGF_Regressor.R b/R-package/R/RGF_Regressor.R index eedecd75..edcaa62e 100644 --- a/R-package/R/RGF_Regressor.R +++ b/R-package/R/RGF_Regressor.R @@ -20,7 +20,7 @@ #' @param memory_policy a character string. One of \emph{"conservative"} (it uses less memory at the expense of longer runtime. Try only when with default value it uses too much memory) or \emph{"generous"} (it runs faster using more memory by keeping the sorted orders of the features on memory for reuse). Memory using policy. #' @param verbose an integer. Controls the verbosity of the tree building process. #' @param init_model either NULL or a character string, optional (default=NULL). Filename of a previously saved model from which training should do warm-start. If model has been saved into multiple files, do not include numerical suffixes in the filename. \emph{NOTE:} Make sure you haven't forgotten to increase the value of the max_leaf parameter regarding to the specified warm-start model because warm-start model trees are counted in the overall number of trees. -#' @param filename a character string specifying a valid path to a file where the fitted model should be saved +#' @param filename a character string specifying a valid path to a file where the fitted model should be saved #' @export #' @details #' @@ -37,7 +37,7 @@ #' the \emph{feature_importances} function returns the feature importances for the data. #' #' the \emph{dump_model} function currently prints information about the fitted model in the console -#' +#' #' the \emph{save_model} function saves a model to a file from which training can do warm-start in the future. #' #' @references \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Rie Johnson and Tong Zhang, Learning Nonlinear Functions Using Regularized Greedy Forest} @@ -99,21 +99,26 @@ #' # verbose = 0, init_model = NULL) #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("rgf.sklearn")) { #' -#' library(RGF) +#' library(RGF) #' -#' set.seed(1) -#' x = matrix(runif(1000), nrow = 100, ncol = 10) +#' set.seed(1) +#' x = matrix(runif(1000), nrow = 100, ncol = 10) #' -#' y = runif(100) +#' y = runif(100) #' -#' RGF_regr = RGF_Regressor$new(max_leaf = 50) +#' RGF_regr = RGF_Regressor$new(max_leaf = 50) #' -#' RGF_regr$fit(x, y) +#' RGF_regr$fit(x, y) #' -#' preds = RGF_regr$predict(x) -#' } +#' preds = RGF_regr$predict(x) +#' } +#' } +#' }, silent = TRUE) + RGF_Regressor <- R6::R6Class( "RGF_Regressor", inherit = Internal_class, diff --git a/R-package/R/TO_scipy_sparse.R b/R-package/R/TO_scipy_sparse.R index 475a0773..ce055e92 100644 --- a/R-package/R/TO_scipy_sparse.R +++ b/R-package/R/TO_scipy_sparse.R @@ -13,45 +13,50 @@ #' @references https://stat.ethz.ch/R-manual/R-devel/library/Matrix/html/dgCMatrix-class.html, https://stat.ethz.ch/R-manual/R-devel/library/Matrix/html/dgRMatrix-class.html, https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("scipy")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("scipy")) { #' -#' if (Sys.info()["sysname"] != 'Darwin') { +#' if (Sys.info()["sysname"] != 'Darwin') { #' -#' library(RGF) +#' library(RGF) #' #' -#' # 'dgCMatrix' sparse matrix -#' #-------------------------- +#' # 'dgCMatrix' sparse matrix +#' #-------------------------- #' -#' data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) +#' data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) #' -#' dgcM = Matrix::Matrix( -#' data = data -#' , nrow = 3 -#' , ncol = 3 -#' , byrow = TRUE -#' , sparse = TRUE -#' ) +#' dgcM = Matrix::Matrix( +#' data = data +#' , nrow = 3 +#' , ncol = 3 +#' , byrow = TRUE +#' , sparse = TRUE +#' ) #' -#' print(dim(dgcM)) +#' print(dim(dgcM)) #' -#' res = TO_scipy_sparse(dgcM) +#' res = TO_scipy_sparse(dgcM) #' -#' print(res$shape) +#' print(res$shape) #' #' -#' # 'dgRMatrix' sparse matrix -#' #-------------------------- +#' # 'dgRMatrix' sparse matrix +#' #-------------------------- #' -#' dgrM = as(dgcM, "RsparseMatrix") +#' dgrM = as(dgcM, "RsparseMatrix") #' -#' print(dim(dgrM)) +#' print(dim(dgrM)) #' -#' res_dgr = TO_scipy_sparse(dgrM) +#' res_dgr = TO_scipy_sparse(dgrM) #' -#' print(res_dgr$shape) -#' } -#' } +#' print(res_dgr$shape) +#' } +#' } +#' } +#' }, silent = TRUE) + TO_scipy_sparse = function(R_sparse_matrix) { if (inherits(R_sparse_matrix, "dgCMatrix")) { diff --git a/R-package/R/mat_2scipy_sparse.R b/R-package/R/mat_2scipy_sparse.R index 8ec8de18..afa56437 100644 --- a/R-package/R/mat_2scipy_sparse.R +++ b/R-package/R/mat_2scipy_sparse.R @@ -9,20 +9,24 @@ #' @references https://docs.scipy.org/doc/scipy/reference/sparse.html #' @examples #' -#' if (reticulate::py_available() && reticulate::py_module_available("scipy")) { +#' try({ +#' if (reticulate::py_available(initialize = TRUE)) { +#' if (reticulate::py_module_available("scipy")) { #' -#' library(RGF) +#' library(RGF) #' -#' set.seed(1) +#' set.seed(1) #' -#' x = matrix(runif(1000), nrow = 100, ncol = 10) +#' x = matrix(runif(1000), nrow = 100, ncol = 10) #' -#' res = mat_2scipy_sparse(x) +#' res = mat_2scipy_sparse(x) #' -#' print(dim(x)) +#' print(dim(x)) #' -#' print(res$shape) -#' } +#' print(res$shape) +#' } +#' } +#' }, silent = TRUE) mat_2scipy_sparse = function(x, format = 'sparse_row_matrix') { diff --git a/R-package/R/package.R b/R-package/R/package.R index 1dc3c2ba..05ed72f8 100644 --- a/R-package/R/package.R +++ b/R-package/R/package.R @@ -27,33 +27,38 @@ RGF_mod <- NULL; RGF_utils <- NULL; SCP <- NULL; # reticulate::use_condaenv('test-environment', required = TRUE) #--------------------------------------------------------------------------- - if (reticulate::py_available(initialize = TRUE)) { - - if (reticulate::py_module_available("rgf.sklearn")) { - - RGF_mod <<- reticulate::import("rgf.sklearn", delay_load = TRUE) - } - # else { - # packageStartupMessage("The 'rgf.sklearn' module is not available!") # keep these lines for debugging - # } - - if (reticulate::py_module_available("rgf.utils")) { - - RGF_utils <<- reticulate::import("rgf.utils", delay_load = TRUE) - } - # else { - # packageStartupMessage("The 'rgf.utils' module is not available!") # keep these lines for debugging - # } - - if (reticulate::py_module_available("scipy")) { - - SCP <<- reticulate::import("scipy", delay_load = TRUE, convert = FALSE) - } - # else { - # packageStartupMessage("The 'scipy' package is not available!") # keep these lines for debugging - # } - } - # else { - # packageStartupMessage("Python is not available!") # keep these lines for debugging - # } + try({ # I added the try() functions in version 1.0.7 because I received a similar warning as mentioned in: [ https://github.com/rstudio/reticulate/issues/730#issuecomment-594365528 ] and [ https://github.com/rstudio/reticulate/issues/814 ] + RGF_mod <<- reticulate::import("rgf.sklearn", delay_load = TRUE) + }, silent = TRUE) + + try({ + RGF_utils <<- reticulate::import("rgf.utils", delay_load = TRUE) + }, silent = TRUE) + + try({ + SCP <<- reticulate::import("scipy", delay_load = TRUE, convert = FALSE) + }, silent = TRUE) + + # #................................................................................. keep this as a reference, however it gives a warning on CRAN because it tries to initialize python + # try({ + # if (reticulate::py_module_available("rgf.sklearn")) { + # RGF_mod <<- reticulate::import("rgf.sklearn", delay_load = TRUE) + # } + # # else { + # # packageStartupMessage("The 'rgf.sklearn' module is not available!") # keep these lines for debugging + # # } + # if (reticulate::py_module_available("rgf.utils")) { + # RGF_utils <<- reticulate::import("rgf.utils", delay_load = TRUE) + # } + # # else { + # # packageStartupMessage("The 'rgf.utils' module is not available!") + # # } + # if (reticulate::py_module_available("scipy")) { + # SCP <<- reticulate::import("scipy", delay_load = TRUE, convert = FALSE) + # } + # # else { + # # packageStartupMessage("The 'scipy' package is not available!") + # # } + # }, silent = TRUE) + # #................................................................................. } diff --git a/R-package/R/zzz.R b/R-package/R/zzz.R deleted file mode 100644 index d0e51881..00000000 --- a/R-package/R/zzz.R +++ /dev/null @@ -1,6 +0,0 @@ -# temporary startup message beginning from version 1.0.3 [ SEE : http://r-pkgs.had.co.nz/r.html#r-differences ] - -.onAttach <- function(libname, pkgname) { - - packageStartupMessage("Beginning from version 1.0.3 the 'dgCMatrix_2scipy_sparse' function was renamed to 'TO_scipy_sparse' and now accepts either a 'dgCMatrix' or a 'dgRMatrix' as input. The appropriate format for the 'RGF' package in case of sparse matrices is the 'dgCMatrix' format (scipy.sparse.csc_matrix)") -} diff --git a/R-package/README.md b/R-package/README.md index 0a0465f4..82ba9dff 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -10,7 +10,7 @@
-The **RGF** package is a wrapper of the [Regularized Greedy Forest (RGF)](https://github.com/RGF-team/rgf_python) *python* package, which also includes a [Multi-core implementation (FastRGF)](https://github.com/RGF-team/rgf/tree/master/FastRGF). More details on the functionality of the RGF package can be found in the [blog-post](http://mlampros.github.io/2018/02/14/the_RGF_package/) and in the package Documentation. +The **RGF** package is a wrapper of the [Regularized Greedy Forest (RGF)](https://github.com/RGF-team/rgf) *python* package, which also includes a [Multi-core implementation (FastRGF)](https://github.com/RGF-team/rgf/tree/master/FastRGF). More details on the functionality of the RGF package can be found in the [blog-post](http://mlampros.github.io/2018/02/14/the_RGF_package/) and in the package Documentation.
@@ -339,7 +339,7 @@ gcc --version
-Normally MinGW is installed in the **C:\\** directory. So, first delete the folder **C:\\MinGW** (if it already exists), and then remove the environment variable from (Control Panel >> System and Security >> System >> Advanced system settings >> Environment variables >> System variables >> Path >> Edit) which usually is **C:\\MinGW\\bin**. Then download the most recent version of [MinGW](http://www.mingw.org/wiki/Getting_Started), and especially the **mingw-get-setup.exe** which is an *automated GUI installer assistant*. After the new version is installed successfully, update the environment variable by adding **C:\\MinGW\\bin** in (Control Panel >> System and Security >> System >> Advanced system settings >> Environment variables >> System variables >> Path >> Edit). Then open a new command prompt (console) and type, +Normally MinGW is installed in the **C:\\** directory. So, first delete the folder **C:\\MinGW** (if it already exists), and then remove the environment variable from (Control Panel >> System and Security >> System >> Advanced system settings >> Environment variables >> System variables >> Path >> Edit) which usually is **C:\\MinGW\\bin**. Then download the most recent version of [MinGW](http://mingw-w64.org/doku.php), and especially the **mingw-get-setup.exe** which is an *automated GUI installer assistant*. After the new version is installed successfully, update the environment variable by adding **C:\\MinGW\\bin** in (Control Panel >> System and Security >> System >> Advanced system settings >> Environment variables >> System variables >> Path >> Edit). Then open a new command prompt (console) and type,
diff --git a/R-package/man/FastRGF_Classifier.Rd b/R-package/man/FastRGF_Classifier.Rd index e35e1d2e..edb11387 100644 --- a/R-package/man/FastRGF_Classifier.Rd +++ b/R-package/man/FastRGF_Classifier.Rd @@ -13,7 +13,7 @@ # min_child_weight = 5.0, data_l2 = 2.0, # sparse_max_features = 80000, # sparse_min_occurences = 5, -# calc_prob="sigmoid", n_jobs = 1, +# calc_prob = "sigmoid", n_jobs = 1, # verbose = 0) } \description{ @@ -46,7 +46,7 @@ the \emph{score} function returns the mean accuracy on the given test data and l min_child_weight = 5.0, data_l2 = 2.0, sparse_max_features = 80000, sparse_min_occurences = 5, - calc_prob="sigmoid", n_jobs = 1, + calc_prob = "sigmoid", n_jobs = 1, verbose = 0)}}{} \item{\code{--------------}}{} @@ -79,21 +79,25 @@ the \emph{score} function returns the mean accuracy on the given test data and l \examples{ -if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("rgf.sklearn")) { - library(RGF) + library(RGF) - set.seed(1) - x = matrix(runif(100000), nrow = 100, ncol = 1000) + set.seed(1) + x = matrix(runif(100000), nrow = 100, ncol = 1000) - y = sample(1:2, 100, replace = TRUE) + y = sample(1:2, 100, replace = TRUE) - fast_RGF_class = FastRGF_Classifier$new(max_leaf = 50) + fast_RGF_class = FastRGF_Classifier$new(max_leaf = 50) - fast_RGF_class$fit(x, y) + fast_RGF_class$fit(x, y) - preds = fast_RGF_class$predict_proba(x) -} + preds = fast_RGF_class$predict_proba(x) + } + } +}, silent = TRUE) } \references{ \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Tong Zhang, FastRGF: Multi-core Implementation of Regularized Greedy Forest (https://github.com/RGF-team/rgf/tree/master/FastRGF)} @@ -125,6 +129,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} \subsection{Method \code{new()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{FastRGF_Classifier$new( @@ -193,6 +198,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/R-package/man/FastRGF_Regressor.Rd b/R-package/man/FastRGF_Regressor.Rd index bc00267a..9b0d8895 100644 --- a/R-package/man/FastRGF_Regressor.Rd +++ b/R-package/man/FastRGF_Regressor.Rd @@ -71,21 +71,25 @@ the \emph{score} function returns the coefficient of determination ( R^2 ) for t \examples{ -if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("rgf.sklearn")) { - library(RGF) + library(RGF) - set.seed(1) - x = matrix(runif(100000), nrow = 100, ncol = 1000) + set.seed(1) + x = matrix(runif(100000), nrow = 100, ncol = 1000) - y = runif(100) + y = runif(100) - fast_RGF_regr = FastRGF_Regressor$new(max_leaf = 50) + fast_RGF_regr = FastRGF_Regressor$new(max_leaf = 50) - fast_RGF_regr$fit(x, y) + fast_RGF_regr$fit(x, y) - preds = fast_RGF_regr$predict(x) -} + preds = fast_RGF_regr$predict(x) + } + } +}, silent = TRUE) } \references{ \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Tong Zhang, FastRGF: Multi-core Implementation of Regularized Greedy Forest (https://github.com/RGF-team/rgf/tree/master/FastRGF)} @@ -117,6 +121,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} \subsection{Method \code{new()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{FastRGF_Regressor$new( @@ -179,6 +184,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/R-package/man/Internal_class.Rd b/R-package/man/Internal_class.Rd index c469f219..0b26a3da 100644 --- a/R-package/man/Internal_class.Rd +++ b/R-package/man/Internal_class.Rd @@ -26,6 +26,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-fit}{}}} \subsection{Method \code{fit()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$fit(x, y, sample_weight = NULL)}\if{html}{\out{
}} @@ -34,6 +35,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-predict}{}}} \subsection{Method \code{predict()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$predict(x)}\if{html}{\out{
}} @@ -42,6 +44,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-predict_proba}{}}} \subsection{Method \code{predict_proba()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$predict_proba(x)}\if{html}{\out{
}} @@ -50,6 +53,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-cleanup}{}}} \subsection{Method \code{cleanup()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$cleanup()}\if{html}{\out{
}} @@ -58,6 +62,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-get_params}{}}} \subsection{Method \code{get_params()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$get_params(deep = TRUE)}\if{html}{\out{
}} @@ -66,6 +71,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-score}{}}} \subsection{Method \code{score()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$score(x, y, sample_weight = NULL)}\if{html}{\out{
}} @@ -74,6 +80,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-feature_importances}{}}} \subsection{Method \code{feature_importances()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$feature_importances()}\if{html}{\out{
}} @@ -82,6 +89,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-dump_model}{}}} \subsection{Method \code{dump_model()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$dump_model()}\if{html}{\out{
}} @@ -90,6 +98,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-save_model}{}}} \subsection{Method \code{save_model()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Internal_class$save_model(filename)}\if{html}{\out{
}} @@ -98,6 +107,7 @@ Internal R6 class for all secondary functions used in RGF and FastRGF } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/R-package/man/RGF_Classifier.Rd b/R-package/man/RGF_Classifier.Rd index 86324e80..274c27fc 100644 --- a/R-package/man/RGF_Classifier.Rd +++ b/R-package/man/RGF_Classifier.Rd @@ -84,7 +84,7 @@ the \emph{save_model} function saves a model to a file from which training can d \item{\code{dump_model()}}{} \item{\code{--------------}}{} - + \item{\code{save_model(filename)}}{} \item{\code{--------------}}{} @@ -93,21 +93,25 @@ the \emph{save_model} function saves a model to a file from which training can d \examples{ -if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("rgf.sklearn")) { - library(RGF) + library(RGF) - set.seed(1) - x = matrix(runif(1000), nrow = 100, ncol = 10) + set.seed(1) + x = matrix(runif(1000), nrow = 100, ncol = 10) - y = sample(1:2, 100, replace = TRUE) + y = sample(1:2, 100, replace = TRUE) - RGF_class = RGF_Classifier$new(max_leaf = 50) + RGF_class = RGF_Classifier$new(max_leaf = 50) - RGF_class$fit(x, y) + RGF_class$fit(x, y) - preds = RGF_class$predict_proba(x) -} + preds = RGF_class$predict_proba(x) + } + } +}, silent = TRUE) } \references{ \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Rie Johnson and Tong Zhang, Learning Nonlinear Functions Using Regularized Greedy Forest} @@ -139,6 +143,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} \subsection{Method \code{new()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{RGF_Classifier$new( @@ -207,6 +212,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/R-package/man/RGF_Regressor.Rd b/R-package/man/RGF_Regressor.Rd index 2842f23c..60e6e031 100644 --- a/R-package/man/RGF_Regressor.Rd +++ b/R-package/man/RGF_Regressor.Rd @@ -85,21 +85,25 @@ the \emph{save_model} function saves a model to a file from which training can d \examples{ -if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("rgf.sklearn")) { - library(RGF) + library(RGF) - set.seed(1) - x = matrix(runif(1000), nrow = 100, ncol = 10) + set.seed(1) + x = matrix(runif(1000), nrow = 100, ncol = 10) - y = runif(100) + y = runif(100) - RGF_regr = RGF_Regressor$new(max_leaf = 50) + RGF_regr = RGF_Regressor$new(max_leaf = 50) - RGF_regr$fit(x, y) + RGF_regr$fit(x, y) - preds = RGF_regr$predict(x) -} + preds = RGF_regr$predict(x) + } + } +}, silent = TRUE) } \references{ \emph{https://github.com/RGF-team/rgf/tree/master/python-package}, \emph{Rie Johnson and Tong Zhang, Learning Nonlinear Functions Using Regularized Greedy Forest} @@ -131,6 +135,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} \subsection{Method \code{new()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{RGF_Regressor$new( @@ -193,6 +198,7 @@ if (reticulate::py_available() && reticulate::py_module_available("rgf.sklearn") } \if{html}{\out{
}} \if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/R-package/man/TO_scipy_sparse.Rd b/R-package/man/TO_scipy_sparse.Rd index d955151c..ff68b5d5 100644 --- a/R-package/man/TO_scipy_sparse.Rd +++ b/R-package/man/TO_scipy_sparse.Rd @@ -19,45 +19,49 @@ The \emph{dgCMatrix} class is a class of sparse numeric matrices in the compress } \examples{ -if (reticulate::py_available() && reticulate::py_module_available("scipy")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("scipy")) { - if (Sys.info()["sysname"] != 'Darwin') { + if (Sys.info()["sysname"] != 'Darwin') { - library(RGF) + library(RGF) - # 'dgCMatrix' sparse matrix - #-------------------------- + # 'dgCMatrix' sparse matrix + #-------------------------- - data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) + data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) - dgcM = Matrix::Matrix( - data = data - , nrow = 3 - , ncol = 3 - , byrow = TRUE - , sparse = TRUE - ) + dgcM = Matrix::Matrix( + data = data + , nrow = 3 + , ncol = 3 + , byrow = TRUE + , sparse = TRUE + ) - print(dim(dgcM)) + print(dim(dgcM)) - res = TO_scipy_sparse(dgcM) + res = TO_scipy_sparse(dgcM) - print(res$shape) + print(res$shape) - # 'dgRMatrix' sparse matrix - #-------------------------- + # 'dgRMatrix' sparse matrix + #-------------------------- - dgrM = as(dgcM, "RsparseMatrix") + dgrM = as(dgcM, "RsparseMatrix") - print(dim(dgrM)) + print(dim(dgrM)) - res_dgr = TO_scipy_sparse(dgrM) + res_dgr = TO_scipy_sparse(dgrM) - print(res_dgr$shape) - } -} + print(res_dgr$shape) + } + } + } +}, silent = TRUE) } \references{ https://stat.ethz.ch/R-manual/R-devel/library/Matrix/html/dgCMatrix-class.html, https://stat.ethz.ch/R-manual/R-devel/library/Matrix/html/dgRMatrix-class.html, https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix diff --git a/R-package/man/mat_2scipy_sparse.Rd b/R-package/man/mat_2scipy_sparse.Rd index 8f5abee7..a5f9b290 100644 --- a/R-package/man/mat_2scipy_sparse.Rd +++ b/R-package/man/mat_2scipy_sparse.Rd @@ -19,20 +19,24 @@ This function allows the user to convert an R matrix to a scipy sparse matrix. T } \examples{ -if (reticulate::py_available() && reticulate::py_module_available("scipy")) { +try({ + if (reticulate::py_available(initialize = TRUE)) { + if (reticulate::py_module_available("scipy")) { - library(RGF) + library(RGF) - set.seed(1) + set.seed(1) - x = matrix(runif(1000), nrow = 100, ncol = 10) + x = matrix(runif(1000), nrow = 100, ncol = 10) - res = mat_2scipy_sparse(x) + res = mat_2scipy_sparse(x) - print(dim(x)) + print(dim(x)) - print(res$shape) -} + print(res$shape) + } + } +}, silent = TRUE) } \references{ https://docs.scipy.org/doc/scipy/reference/sparse.html diff --git a/R-package/tests/testthat/helper-init.R b/R-package/tests/testthat/helper-init.R new file mode 100644 index 00000000..c8820d52 --- /dev/null +++ b/R-package/tests/testthat/helper-init.R @@ -0,0 +1,9 @@ + +# prefer Python 3 if available [ see: https://github.com/rstudio/reticulate/blob/master/tests/testthat/helper-init.R ] +if (!reticulate::py_available(initialize = FALSE) && + is.na(Sys.getenv("RETICULATE_PYTHON", unset = NA))) +{ + python <- Sys.which("python3") + if (nzchar(python)) + reticulate::use_python(python, required = TRUE) +} diff --git a/R-package/tests/testthat/helper-skip.R b/R-package/tests/testthat/helper-skip.R new file mode 100644 index 00000000..958a48e2 --- /dev/null +++ b/R-package/tests/testthat/helper-skip.R @@ -0,0 +1,29 @@ + +#....................................... +# skip a test if python is not available [ see: https://github.com/rstudio/reticulate/tree/master/tests/testthat ] +#....................................... + +skip_test_if_no_python <- function() { + if (!reticulate::py_available(initialize = TRUE)) + testthat::skip("Python bindings not available for testing") +} + + +#......................................... +# skip a test if a module is not available [ see: https://github.com/rstudio/reticulate ] +#......................................... + +skip_test_if_no_module <- function(MODULE) { # MODULE is of type character string ( length(MODULE) >= 1 ) + + try({ + if (length(MODULE) == 1) { + module_exists <- reticulate::py_module_available(MODULE)} + else { + module_exists <- sum(as.vector(sapply(MODULE, function(x) reticulate::py_module_available(x)))) == length(MODULE) + } + }, silent = TRUE) + + if (!module_exists) { + testthat::skip(paste0(MODULE, " is not available for testthat-testing")) + } +} diff --git a/R-package/tests/testthat/setup.R b/R-package/tests/testthat/setup.R new file mode 100644 index 00000000..04794efc --- /dev/null +++ b/R-package/tests/testthat/setup.R @@ -0,0 +1,43 @@ + +# Input data + +# data [ regression and (multiclass-) classification RGF_Regressor, RGF_Classifier ] +#----------------------------------------------------------------------------------- + +set.seed(1) +x_rgf = matrix(runif(1000), nrow = 100, ncol = 10) + + +# data [ regression and (multiclass-) classification FastRGF_Regressor, FastRGF_Classifier ] +#------------------------------------------------------------------------------------------- + +set.seed(2) +x_FASTrgf = matrix(runif(100000), nrow = 100, ncol = 1000) # high dimensionality for 'FastRGF' (however more observations are needed so that it works properly) + + +# response regression +#-------------------- + +set.seed(3) +y_reg = runif(100) + + +# response "binary" classification +#--------------------------------- + +set.seed(4) +y_BINclass = sample(1:2, 100, replace = TRUE) + + +# response "multiclass" classification +#------------------------------------- + +set.seed(5) +y_MULTIclass = sample(1:5, 100, replace = TRUE) + + +# weights for the fit function +#------------------------------ + +set.seed(6) +W = runif(100) diff --git a/R-package/tests/testthat/test-RGF_package.R b/R-package/tests/testthat/test-RGF_package.R index 10d75e05..01b86fd1 100644 --- a/R-package/tests/testthat/test-RGF_package.R +++ b/R-package/tests/testthat/test-RGF_package.R @@ -1,70 +1,6 @@ context('rgf R-package tests') -#======================================================================================== -# helper function to skip tests if we don't have the 'foo' module [ https://github.com/rstudio/reticulate ] -skip_test_if_no_module <- function(MODULE) { # MODULE is of type character string ( length(MODULE) >= 1 ) - - if (length(MODULE) == 1) { - - module_exists <- reticulate::py_module_available(MODULE)} - - else { - - module_exists <- sum(as.vector(sapply(MODULE, function(x) reticulate::py_module_available(x)))) == length(MODULE) - } - - if (!module_exists) { - - testthat::skip(paste0(MODULE, " is not available for testthat-testing")) - } -} - -#=========================================================================================== -# Input data - -# data [ regression and (multiclass-) classification RGF_Regressor, RGF_Classifier ] -#----------------------------------------------------------------------------------- - -set.seed(1) -x_rgf = matrix(runif(1000), nrow = 100, ncol = 10) - - -# data [ regression and (multiclass-) classification FastRGF_Regressor, FastRGF_Classifier ] -#------------------------------------------------------------------------------------------- - -set.seed(2) -x_FASTrgf = matrix(runif(100000), nrow = 100, ncol = 1000) # high dimensionality for 'FastRGF' (however more observations are needed so that it works properly) - - -# response regression -#-------------------- - -set.seed(3) -y_reg = runif(100) - - -# response "binary" classification -#--------------------------------- - -set.seed(4) -y_BINclass = sample(1:2, 100, replace = T) - - -# response "multiclass" classification -#------------------------------------- - -set.seed(5) -y_MULTIclass = sample(1:5, 100, replace = T) - - -# weights for the fit function -#------------------------------ - -set.seed(6) -W = runif(100) - -#=========================================================================================== # Tests for 'RGF_Regressor' & 'RGF_Classifier' # tests for 'RGF_Regressor' @@ -72,6 +8,13 @@ W = runif(100) testthat::test_that("the methods of the 'RGF_Regressor' class return the correct output", { + #............................................................................................................ keep this as a reference as I might have to disable the tests on CRAN due to the fact that I don't know beforehand which Python version it uses + # # don't run tests on CRAN [ see: https://github.com/rstudio/reticulate/issues/715#issuecomment-700821462 ] + # testthat::skip_on_cran() + #............................................................................................................ + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_regr = RGF_Regressor$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) @@ -100,6 +43,8 @@ testthat::test_that("the methods of the 'RGF_Regressor' class return the correct testthat::test_that("the methods of the 'RGF_Classifier' class return the correct output (binary classification)", { + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) @@ -127,6 +72,8 @@ testthat::test_that("the methods of the 'RGF_Classifier' class return the correc testthat::test_that("the methods of the 'RGF_Classifier' class return the correct output (multiclass classification)", { + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) @@ -151,7 +98,7 @@ testthat::test_that("the methods of the 'RGF_Classifier' class return the correc }) -#=========================================================================================== +#=========================================================================================== # Tests for 'FastRGF_Regressor' & 'FastRGF_Classifier' # tests for 'FastRGF_Regressor' @@ -159,6 +106,8 @@ testthat::test_that("the methods of the 'RGF_Classifier' class return the correc testthat::test_that("the methods of the 'FastRGF_Regressor' class return the correct output", { + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_regr = FastRGF_Regressor$new(n_estimators = 50, max_bin = 65000) @@ -192,6 +141,8 @@ testthat::test_that("the methods of the 'FastRGF_Regressor' class return the cor testthat::test_that("the methods of the 'FastRGF_Classifier' class return the correct output (binary classification)", { + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_class = FastRGF_Classifier$new(n_estimators = 50, max_bin = 65000) @@ -220,6 +171,8 @@ testthat::test_that("the methods of the 'FastRGF_Classifier' class return the co testthat::test_that("the methods of the 'FastRGF_Classifier' class return the correct output (multiclass classification)", { + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") init_class = FastRGF_Classifier$new(n_estimators = 50, max_bin = 65000) @@ -245,7 +198,7 @@ testthat::test_that("the methods of the 'FastRGF_Classifier' class return the co }) -#=========================================================================================== +#=========================================================================================== # Tests for scipy sparse @@ -254,6 +207,8 @@ testthat::test_that("the methods of the 'FastRGF_Classifier' class return the co testthat::test_that("the 'mat_2scipy_sparse' returns an error in case that the data is not inheriting matrix class", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") x_rgf_invalid = as.data.frame(x_rgf) @@ -264,6 +219,8 @@ testthat::test_that("the 'mat_2scipy_sparse' returns an error in case that the d testthat::test_that("the 'mat_2scipy_sparse' returns an error in case that the 'format' parameter is invalid", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") testthat::expect_error( mat_2scipy_sparse(x_rgf, format = 'invalid') ) @@ -272,6 +229,8 @@ testthat::test_that("the 'mat_2scipy_sparse' returns an error in case that the ' testthat::test_that("the 'mat_2scipy_sparse' returns a scipy CSR sparse matrix", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") res = mat_2scipy_sparse(x_rgf, format = 'sparse_row_matrix') @@ -284,6 +243,8 @@ testthat::test_that("the 'mat_2scipy_sparse' returns a scipy CSR sparse matrix", testthat::test_that("the 'mat_2scipy_sparse' returns a scipy CSC sparse matrix", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") res = mat_2scipy_sparse(x_rgf, format = 'sparse_column_matrix') @@ -307,6 +268,8 @@ if (Sys.info()["sysname"] != 'Darwin') { testthat::test_that("the 'TO_scipy_sparse' returns an error in case that the input object is not of type 'dgCMatrix' or 'dgRMatrix'", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") mt = matrix(runif(20), nrow = 5, ncol = 4) @@ -317,6 +280,8 @@ if (Sys.info()["sysname"] != 'Darwin') { testthat::test_that("the 'TO_scipy_sparse' returns the correct output for dgCMatrix", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) @@ -337,6 +302,8 @@ if (Sys.info()["sysname"] != 'Darwin') { testthat::test_that("the 'TO_scipy_sparse' returns the correct output for dgRMatrix", { + skip_test_if_no_python() + skip_test_if_no_module("scipy") data = c(1, 0, 2, 0, 0, 3, 4, 5, 6) @@ -360,6 +327,8 @@ if (Sys.info()["sysname"] != 'Darwin') { testthat::test_that("the RGF_Regressor works with sparse (scipy) matrices", { + skip_test_if_no_python() + skip_test_if_no_module(c("rgf.sklearn", 'scipy')) set.seed(1) @@ -394,18 +363,20 @@ if (Sys.info()["sysname"] != 'Darwin') { } -#=========================================================================================== +#=========================================================================================== # test feature importances testthat::test_that("the feature importances of the 'RGF_Regressor' class works as expected", { - + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") - + init_regr = RGF_Regressor$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) - + init_regr$fit(x = x_rgf, y = y_reg, sample_weight = W) # include also a vector of weights - + vec_imp = init_regr$feature_importances() testthat::expect_true( inherits(vec_imp, 'array') && length(vec_imp) == ncol(x_rgf) ) @@ -417,47 +388,49 @@ testthat::test_that("the feature importances of the 'RGF_Regressor' class works testthat::test_that("the 'dump_model' method returns the correct output (Dumps the forest information to the R session -- works ONLY for RGF and NOT for FastRGF)", { - + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") - + init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) - + init_class$fit(x = x_rgf, y = y_BINclass) - + dump_model = init_class$dump_model() - + #--------------------------------------- # for pretty-print in the R session use: print( dump_model() ) #--------------------------------------- - + output_dump = reticulate::py_capture_output(dump_model()) - + #------------------------------------------------- # function to search for terms in the dumped model #------------------------------------------------- - + search_for_term = function(term, model_dump) { - + regex = gregexpr(pattern = term, text = model_dump)[[1]] - + len_result = attributes(regex)$match.length regex = as.vector(regex) - + term_results = unlist(lapply(1:length(regex), function(x) { - + substr(model_dump, start = regex[x], stop = regex[x] + len_result[x] - 1) })) - + return(all(term_results == term)) } - + is_depth_0_in_model_dump = search_for_term(term = 'depth=0', model_dump = output_dump) is_gain_0_in_model_dump = search_for_term(term = 'gain=0', model_dump = output_dump) - + is_depth_1_in_model_dump = search_for_term(term = 'depth=1', model_dump = output_dump) is_gain_1_in_model_dump = search_for_term(term = 'gain=1', model_dump = output_dump) - - testthat::expect_true( nchar(output_dump) > 0 && object.size(output_dump) > 0 && is_depth_0_in_model_dump && + + testthat::expect_true( nchar(output_dump) > 0 && object.size(output_dump) > 0 && is_depth_0_in_model_dump && is_gain_0_in_model_dump && is_depth_1_in_model_dump && is_gain_1_in_model_dump ) }) @@ -467,125 +440,131 @@ testthat::test_that("the 'dump_model' method returns the correct output (Dumps t testthat::test_that("the 'save_model' method returns the correct output -- works ONLY for RGF and NOT for FastRGF", { - + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") - + init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) - + init_class$fit(x = x_rgf, y = y_BINclass) - + tmp_file = tempfile(fileext = '.model') - + SIZE_begin = file.info(tmp_file)$size - + sv_md = init_class$save_model(filename = tmp_file) - + SIZE_after = file.info(tmp_file)$size # size of the saved model - + binary_file = file(tmp_file, "rb") # connection to binary file raw_binary = readBin(binary_file, character(), n = 10000) # read first 10.000 characters of the binary file close(binary_file) # close connection - + idx_chars = which(raw_binary != "") # keep non-empty strings raw_binary = raw_binary[idx_chars] - + idx_max_leaf = which(gregexpr('max_leaf_forest', raw_binary) != -1) # search for 'max_leaf_forest' idx_sl2 = which(gregexpr('reg_sL2', raw_binary) != -1) # search for 'reg_sL2' - + if (file.exists(tmp_file)) file.remove(tmp_file) - + testthat::expect_true( is.na(SIZE_begin) && (SIZE_after > 0) && (length(idx_max_leaf) > 0) && (length(idx_sl2) > 0) ) }) -#=========================================================================================== +#=========================================================================================== # test the 'cleanup' method - + testthat::test_that("the 'cleanup' method (ESTIMATOR specific) works as expected for both RGF and FastRGF (checking of the size of the temporary directory before and after the '$fit' method)", { - + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") - - #-------------------------------------------------------------------------------- - # default directory where the temporary 'rgf' files are saved - + + #-------------------------------------------------------------------------------- + # default directory where the temporary 'rgf' files are saved + default_dir = file.path(dirname(tempdir()), 'rgf') - - #-------------------------------------------------------------------------------- + + #-------------------------------------------------------------------------------- # RGF init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) init_class$fit(x = x_rgf, y = y_BINclass) - + lst_files_tmp_upd_rgf = list.files(path = default_dir) - + init_exists_upd_rgf = (dir.exists(default_dir) == TRUE) init_num_files_rgf = length(lst_files_tmp_upd_rgf) - + init_class$cleanup() - + lst_files_tmp_upd_rgf = list.files(path = default_dir) init_num_files_rgf_after_clean = length(lst_files_tmp_upd_rgf) - + end_state_rgf = (init_num_files_rgf_after_clean < init_num_files_rgf) - - #-------------------------------------------------------------------------------- + + #-------------------------------------------------------------------------------- # FastRGF - + init_class = FastRGF_Classifier$new(n_estimators = 50, max_bin = 65000) init_class$fit(x = x_FASTrgf, y = y_MULTIclass) - + lst_files_tmp_upd_fastrgf = list.files(path = default_dir) init_num_files_fastrgf = length(lst_files_tmp_upd_fastrgf) - + init_class$cleanup() - + lst_files_tmp_upd_fastrgf = list.files(path = default_dir) init_num_files_fastrgf_after_clean = length(lst_files_tmp_upd_fastrgf) - + end_state_fastrgf = (init_num_files_fastrgf_after_clean < init_num_files_fastrgf) - + testthat::expect_true( init_exists_upd_rgf && end_state_rgf && end_state_fastrgf ) }) testthat::test_that("the 'cleanup' method (APPLIES TO ALL ESTIMATORS) works as expected for both RGF and FastRGF (checking of the size of the temporary directory before and after the '$fit' method)", { - + + skip_test_if_no_python() + skip_test_if_no_module("rgf.sklearn") - - #-------------------------------------------------------------------------------- - # default directory where the temporary 'rgf' files are saved - + + #-------------------------------------------------------------------------------- + # default directory where the temporary 'rgf' files are saved + default_dir = file.path(dirname(tempdir()), 'rgf') - - #-------------------------------------------------------------------------------- + + #-------------------------------------------------------------------------------- # RGF - + lst_files_tmp = list.files(path = default_dir) init_class = RGF_Classifier$new(max_leaf = 50, sl2 = 0.1, n_iter = 10) init_class$fit(x = x_rgf, y = y_BINclass) - + lst_files_tmp_upd_rgf = list.files(path = default_dir) - + init_exists_upd_rgf = (dir.exists(default_dir) == TRUE) init_num_files_rgf = length(lst_files_tmp_upd_rgf) - #-------------------------------------------------------------------------------- + #-------------------------------------------------------------------------------- # FastRGF - + init_class = FastRGF_Classifier$new(n_estimators = 50, max_bin = 65000) init_class$fit(x = x_FASTrgf, y = y_MULTIclass) - + lst_files_tmp_upd_fastrgf = list.files(path = default_dir) init_num_files_fastrgf = length(lst_files_tmp_upd_fastrgf) - + RGF_cleanup_temp_files() - + lst_files_tmp_end_state = list.files(path = default_dir) - + testthat::expect_true( init_exists_upd_rgf && (init_num_files_rgf > 0) && (init_num_files_fastrgf > init_num_files_rgf) && ( init_num_files_rgf > length(lst_files_tmp) && init_num_files_fastrgf > length(lst_files_tmp_end_state) ) ) # normally, both initial and end state must have the same length [ length(lst_files_tmp) == length(lst_files_tmp_end_state) ] })