diff --git a/R/data.R b/R/data.R index cdc4ed0..aea3ed0 100644 --- a/R/data.R +++ b/R/data.R @@ -2,21 +2,21 @@ #' #' A list of the names of available datasets #' -#' @source \url{https://github.com/EpistasisLab/penn-ml-benchmarks} +#' @source \url{https://github.com/EpistasisLab/pmlb} "dataset_names" #' Names of available classification datasets #' #' A list of the names of available classification datasets #' -#' @source \url{https://github.com/EpistasisLab/penn-ml-benchmarks} +#' @source \url{https://github.com/EpistasisLab/pmlb} "classification_dataset_names" #' Names of available regression datasets #' #' A list of the names of available regression datasets #' -#' @source \url{https://github.com/EpistasisLab/penn-ml-benchmarks} +#' @source \url{https://github.com/EpistasisLab/pmlb} "regression_dataset_names" #' Summary statistics for the all datasets @@ -35,5 +35,5 @@ #' \item{task:}{Type of problem/task. Can be classification or regression.} #' } #' -#' @source \url{https://github.com/EpistasisLab/penn-ml-benchmarks} +#' @source \url{https://github.com/EpistasisLab/pmlb} "summary_stats" diff --git a/R/nearest.R b/R/nearest.R index 704b6ec..c920646 100644 --- a/R/nearest.R +++ b/R/nearest.R @@ -10,7 +10,7 @@ #' @param n_neighbors Integer. The number of dataset names to return as neighbors. #' @param dimensions Character vector specifying dataset characteristics to include in similarity calculation. #' Dimensions must correspond to numeric columns of -#' [all_summary_stats.tsv](https://github.com/EpistasisLab/penn-ml-benchmarks/blob/master/pmlb/all_summary_stats.tsv). +#' [all_summary_stats.tsv](https://github.com/EpistasisLab/pmlb/blob/master/pmlb/all_summary_stats.tsv). #' If 'all' (default), uses all numeric columns. #' @param task Character string specifying classification or regression for summary stat generation. #' @param target_name Character string specifying column of target/dependent variable. diff --git a/R/pmlb.R b/R/pmlb.R index 56a3b47..c7e1e51 100644 --- a/R/pmlb.R +++ b/R/pmlb.R @@ -93,7 +93,7 @@ fetch_data <- function(dataset_name, return_X_y = FALSE, local_cache_dir = NA, d #' pmlb: R interface to the Penn Machine Learning Benchmarks data repository #' -#' The \href{https://github.com/EpistasisLab/penn-ml-benchmarks}{PMLB} repository contains a curated collection of data sets for evaluating and +#' The \href{https://github.com/EpistasisLab/pmlb}{PMLB} repository contains a curated collection of data sets for evaluating and #' comparing machine learning algorithms. #' These data sets cover a range of applications, and include binary/multi-class classification problems and regression problems, #' as well as combinations of categorical, ordinal, and continuous features. There are approximately 290 data sets included in the PMLB repository diff --git a/data-raw/get-summary.R b/data-raw/get-summary.R index 11dfe3d..e849738 100644 --- a/data-raw/get-summary.R +++ b/data-raw/get-summary.R @@ -1,4 +1,4 @@ -links_to_stats <- 'https://github.com/EpistasisLab/penn-ml-benchmarks/raw/master/pmlb/all_summary_stats.tsv' +links_to_stats <- 'https://github.com/EpistasisLab/pmlb/raw/master/pmlb/all_summary_stats.tsv' summary_stats <- read.csv(links_to_stats, sep = '\t') colnames(summary_stats) <- tolower(gsub('X.', 'n_', colnames(summary_stats))) dataset_names <- summary_stats$dataset diff --git a/man/classification_dataset_names.Rd b/man/classification_dataset_names.Rd index a089ff2..b682290 100644 --- a/man/classification_dataset_names.Rd +++ b/man/classification_dataset_names.Rd @@ -8,7 +8,7 @@ An object of class \code{character} of length 162. } \source{ -\url{https://github.com/EpistasisLab/penn-ml-benchmarks} +\url{https://github.com/EpistasisLab/pmlb} } \usage{ classification_dataset_names diff --git a/man/dataset_names.Rd b/man/dataset_names.Rd index f1463a4..1692325 100644 --- a/man/dataset_names.Rd +++ b/man/dataset_names.Rd @@ -8,7 +8,7 @@ An object of class \code{character} of length 284. } \source{ -\url{https://github.com/EpistasisLab/penn-ml-benchmarks} +\url{https://github.com/EpistasisLab/pmlb} } \usage{ dataset_names diff --git a/man/nearest_datasets-methods.Rd b/man/nearest_datasets-methods.Rd index 897cd28..1dd3343 100644 --- a/man/nearest_datasets-methods.Rd +++ b/man/nearest_datasets-methods.Rd @@ -39,7 +39,7 @@ or data.frame of n_samples x n_features(or n_features+1 with a target column)} \item{dimensions}{Character vector specifying dataset characteristics to include in similarity calculation. Dimensions must correspond to numeric columns of -[all_summary_stats.tsv](https://github.com/EpistasisLab/penn-ml-benchmarks/blob/master/pmlb/all_summary_stats.tsv). +[all_summary_stats.tsv](https://github.com/EpistasisLab/pmlb/blob/master/pmlb/all_summary_stats.tsv). If 'all' (default), uses all numeric columns.} \item{target_name}{Character string specifying column of target/dependent variable.} diff --git a/man/pmlb.Rd b/man/pmlb.Rd index 883ec21..bc522c3 100644 --- a/man/pmlb.Rd +++ b/man/pmlb.Rd @@ -5,7 +5,7 @@ \alias{pmlb} \title{pmlb: R interface to the Penn Machine Learning Benchmarks data repository} \description{ -The \href{https://github.com/EpistasisLab/penn-ml-benchmarks}{PMLB} repository contains a curated collection of data sets for evaluating and +The \href{https://github.com/EpistasisLab/pmlb}{PMLB} repository contains a curated collection of data sets for evaluating and comparing machine learning algorithms. These data sets cover a range of applications, and include binary/multi-class classification problems and regression problems, as well as combinations of categorical, ordinal, and continuous features. There are approximately 290 data sets included in the PMLB repository diff --git a/man/regression_dataset_names.Rd b/man/regression_dataset_names.Rd index e4a7b45..c28c42b 100644 --- a/man/regression_dataset_names.Rd +++ b/man/regression_dataset_names.Rd @@ -8,7 +8,7 @@ An object of class \code{character} of length 122. } \source{ -\url{https://github.com/EpistasisLab/penn-ml-benchmarks} +\url{https://github.com/EpistasisLab/pmlb} } \usage{ regression_dataset_names diff --git a/man/summary_stats.Rd b/man/summary_stats.Rd index e6a3ddd..6f8539f 100644 --- a/man/summary_stats.Rd +++ b/man/summary_stats.Rd @@ -20,7 +20,7 @@ A data frame with 10 variables: } } \source{ -\url{https://github.com/EpistasisLab/penn-ml-benchmarks} +\url{https://github.com/EpistasisLab/pmlb} } \usage{ summary_stats