diff --git a/DESCRIPTION b/DESCRIPTION
index cfa1443bb..8619a5a60 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: parsnip
Title: A Common API to Modeling and Analysis Functions
-Version: 1.0.2.9003
+Version: 1.0.2.9004
Authors@R: c(
person("Max", "Kuhn", , "max@rstudio.com", role = c("aut", "cre")),
person("Davis", "Vaughan", , "davis@rstudio.com", role = "aut"),
diff --git a/NAMESPACE b/NAMESPACE
index 77a011c37..a64bb4d0e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ S3method(fit,model_spec)
S3method(fit_xy,decision_tree)
S3method(fit_xy,gen_additive_mod)
S3method(fit_xy,model_spec)
+S3method(fit_xy,rand_forest)
S3method(glance,model_fit)
S3method(has_multi_predict,default)
S3method(has_multi_predict,model_fit)
diff --git a/R/rand_forest.R b/R/rand_forest.R
index 8655a7d9e..4880d4464 100644
--- a/R/rand_forest.R
+++ b/R/rand_forest.R
@@ -163,3 +163,25 @@ check_args.rand_forest <- function(object) {
# move translate checks here?
invisible(object)
}
+
+# ------------------------------------------------------------------------------
+
+#' @export
+fit_xy.rand_forest <- function(object,
+ x,
+ y,
+ case_weights = NULL,
+ control = parsnip::control_parsnip(),
+ ...) {
+
+ if (object$mode == "censored regression" && object$engine == "aorsf") {
+ # CRAN aorsf::orsf() requires two variables on the left-hand side of the formula,
+ # either in as `Surv(time, status) ~ .` or as `time + status ~ .`
+ # see https://github.com/ropensci/aorsf/issues/11
+ rlang::abort("For the `'aorsf'` engine, please use the formula interface via `fit()`.")
+ }
+
+ # call parsnip::fit_xy.model_spec()
+ res <- NextMethod()
+ res
+}
diff --git a/R/rand_forest_aorsf.R b/R/rand_forest_aorsf.R
new file mode 100644
index 000000000..b1b6d8d44
--- /dev/null
+++ b/R/rand_forest_aorsf.R
@@ -0,0 +1,13 @@
+#' Oblique random survival forests via aorsf
+#'
+#' [aorsf::orsf()] fits a model that creates a large number of decision
+#' trees, each de-correlated from the others. The final prediction uses all
+#' predictions from the individual trees and combines them.
+#'
+#' @includeRmd man/rmd/rand_forest_aorsf.md details
+#'
+#' @name details_rand_forest_aorsf
+#' @keywords internal
+NULL
+
+# See inst/README-DOCS.md for a description of how these files are processed
diff --git a/R/tunable.R b/R/tunable.R
index 5f2d143f8..d203c88cb 100644
--- a/R/tunable.R
+++ b/R/tunable.R
@@ -159,6 +159,18 @@ partykit_engine_args <-
component_id = "engine"
)
+aorsf_engine_args <-
+ tibble::tibble(
+ name = c(
+ "split_min_stat"
+ ),
+ call_info = list(
+ list(pkg = "dials", fun = "conditional_min_criterion")
+ ),
+ source = "model_spec",
+ component = "rand_forest",
+ component_id = "engine"
+ )
earth_engine_args <-
tibble::tibble(
@@ -284,6 +296,8 @@ tunable_rand_forest <- function(x, ...) {
res <- add_engine_parameters(res, randomForest_engine_args)
} else if (x$engine == "partykit") {
res <- add_engine_parameters(res, partykit_engine_args)
+ } else if (x$engine == "aorsf") {
+ res <- add_engine_parameters(res, aorsf_engine_args)
}
res
}
diff --git a/inst/models.tsv b/inst/models.tsv
index f56754593..cc0a2851c 100644
--- a/inst/models.tsv
+++ b/inst/models.tsv
@@ -105,6 +105,7 @@
"poisson_reg" "regression" "zeroinfl" "poissonreg"
"proportional_hazards" "censored regression" "glmnet" "censored"
"proportional_hazards" "censored regression" "survival" "censored"
+"rand_forest" "censored regression" "aorsf" "censored"
"rand_forest" "censored regression" "partykit" "censored"
"rand_forest" "classification" "h2o" "agua"
"rand_forest" "classification" "partykit" "bonsai"
diff --git a/man/details_rand_forest_aorsf.Rd b/man/details_rand_forest_aorsf.Rd
new file mode 100644
index 000000000..34c96c3f3
--- /dev/null
+++ b/man/details_rand_forest_aorsf.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rand_forest_aorsf.R
+\name{details_rand_forest_aorsf}
+\alias{details_rand_forest_aorsf}
+\title{Oblique random survival forests via aorsf}
+\description{
+\code{\link[aorsf:orsf]{aorsf::orsf()}} fits a model that creates a large number of decision
+trees, each de-correlated from the others. The final prediction uses all
+predictions from the individual trees and combines them.
+}
+\details{
+For this engine, there is a single mode: censored regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{trees}: # Trees (type: integer, default: 500L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 5L)
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default:
+ceiling(sqrt(n_predictors)))
+}
+
+Additionally, this model has one engine-specific tuning parameter:
+\itemize{
+\item \code{split_min_stat}: Minimum test statistic required to split a node.
+Default is \code{3.841459} for the log-rank test, which is roughly a
+p-value of 0.05.
+}
+}
+}
+\section{Translation from parsnip to the original package (censored regression)}{
+\if{html}{\out{
}}\preformatted{library(censored)
+
+rand_forest() \%>\%
+ set_engine("aorsf") \%>\%
+ set_mode("censored regression") \%>\%
+ translate()
+}\if{html}{\out{
}}
+
+\if{html}{\out{}}\preformatted{## Random Forest Model Specification (censored regression)
+##
+## Computational engine: aorsf
+##
+## Model fit template:
+## aorsf::orsf(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
+}\if{html}{\out{
}}
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+
+Predictions of survival probability at a time exceeding the maximum
+observed event time are the predicted survival probability at the
+maximum observed time in the training data.
+}
+
+\subsection{References}{
+\itemize{
+\item Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min YI, Mcclure
+LA, Howard G, Simon N. Oblique random survival forests. Annals of
+applied statistics 2019 Sep; 13(3):1847-83. DOI: 10.1214/19-AOAS1261
+\item Jaeger BC, Welden S, Lenoir K, Pajewski NM. aorsf: An R package for
+supervised learning using the oblique random survival forest.
+Journal of Open Source Software 2022, 7(77), 1 4705.
+\url{https://doi.org/10.21105/joss.04705}.
+\item Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey A,
+Pajewski NM. Accelerated and interpretable oblique random survival
+forests. arXiv e-prints 2022 Aug; arXiv-2208. URL:
+\url{https://arxiv.org/abs/2208.01129}
+}
+}
+}
+
+\keyword{internal}
diff --git a/man/rmd/rand_forest_aorsf.Rmd b/man/rmd/rand_forest_aorsf.Rmd
new file mode 100644
index 000000000..5a31d9ee3
--- /dev/null
+++ b/man/rmd/rand_forest_aorsf.Rmd
@@ -0,0 +1,65 @@
+```{r, child = "aaa.Rmd", include = FALSE}
+```
+
+`r descr_models("rand_forest", "aorsf")`
+
+## Tuning Parameters
+
+```{r aorsf-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("trees", "min_n", "mtry"),
+ default = c("500L", "5L", "ceiling(sqrt(n_predictors))"))
+
+param <-
+ rand_forest() %>%
+ set_engine("aorsf") %>%
+ set_mode("censored regression") %>%
+ make_parameter_list(defaults) %>%
+ distinct()
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r aorsf-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+Additionally, this model has one engine-specific tuning parameter:
+
+ * `split_min_stat`: Minimum test statistic required to split a node. Default is `3.841459` for the log-rank test, which is roughly a p-value of 0.05.
+
+
+# Translation from parsnip to the original package (censored regression)
+
+`r uses_extension("rand_forest", "aorsf", "censored regression")`
+
+```{r aorsf-creg}
+library(censored)
+
+rand_forest() %>%
+ set_engine("aorsf") %>%
+ set_mode("censored regression") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+## Other details
+
+Predictions of survival probability at a time exceeding the maximum observed event time are the predicted survival probability at the maximum observed time in the training data.
+
+## References
+
+- Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min YI, Mcclure LA, Howard G, Simon N. Oblique random survival forests. Annals of applied statistics 2019 Sep; 13(3):1847-83. DOI: 10.1214/19-AOAS1261
+
+- Jaeger BC, Welden S, Lenoir K, Pajewski NM. aorsf: An R package for supervised learning using the oblique random survival forest. Journal of Open Source Software 2022, 7(77), 1 4705. https://doi.org/10.21105/joss.04705.
+
+- Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey A, Pajewski NM. Accelerated and interpretable oblique random survival forests. arXiv e-prints 2022 Aug; arXiv-2208. URL: https://arxiv.org/abs/2208.01129
diff --git a/man/rmd/rand_forest_aorsf.md b/man/rmd/rand_forest_aorsf.md
new file mode 100644
index 000000000..1c56f8fea
--- /dev/null
+++ b/man/rmd/rand_forest_aorsf.md
@@ -0,0 +1,68 @@
+
+
+
+For this engine, there is a single mode: censored regression
+
+## Tuning Parameters
+
+
+
+This model has 3 tuning parameters:
+
+- `trees`: # Trees (type: integer, default: 500L)
+
+- `min_n`: Minimal Node Size (type: integer, default: 5L)
+
+- `mtry`: # Randomly Selected Predictors (type: integer, default: ceiling(sqrt(n_predictors)))
+
+Additionally, this model has one engine-specific tuning parameter:
+
+ * `split_min_stat`: Minimum test statistic required to split a node. Default is `3.841459` for the log-rank test, which is roughly a p-value of 0.05.
+
+
+# Translation from parsnip to the original package (censored regression)
+
+The **censored** extension package is required to fit this model.
+
+
+```r
+library(censored)
+
+rand_forest() %>%
+ set_engine("aorsf") %>%
+ set_mode("censored regression") %>%
+ translate()
+```
+
+```
+## Random Forest Model Specification (censored regression)
+##
+## Computational engine: aorsf
+##
+## Model fit template:
+## aorsf::orsf(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
+```
+
+## Preprocessing requirements
+
+
+This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+## Other details
+
+Predictions of survival probability at a time exceeding the maximum observed event time are the predicted survival probability at the maximum observed time in the training data.
+
+## References
+
+- Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min YI, Mcclure LA, Howard G, Simon N. Oblique random survival forests. Annals of applied statistics 2019 Sep; 13(3):1847-83. DOI: 10.1214/19-AOAS1261
+
+- Jaeger BC, Welden S, Lenoir K, Pajewski NM. aorsf: An R package for supervised learning using the oblique random survival forest. Journal of Open Source Software 2022, 7(77), 1 4705. https://doi.org/10.21105/joss.04705.
+
+- Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey A, Pajewski NM. Accelerated and interpretable oblique random survival forests. arXiv e-prints 2022 Aug; arXiv-2208. URL: https://arxiv.org/abs/2208.01129