martinctc · martinctc · Jul 21, 2023 · Jul 21, 2023 · Jul 24, 2023 · Nov 3, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,6 @@
 ^icons$
 ^_development$
 ^\.github$
+^_pkgdown.yml$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .Rhistory
 .RData
 .Ruserdata
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,15 +4,14 @@ Title: Useful Support Functions for Survey Analysis
 Version: 0.1.0.9000
 Author: Martin Chan
 Maintainer: Martin Chan <martinchan53@gmail.com>
-URL: https://github.com/martinctc/surveytoolbox
-Description: A R package containing useful support functions for survey analysis.
+URL: https://github.com/martinctc/surveytoolbox/
+Description: A collection of tools for analyzing and visualizing survey data in R. It includes functions for manipulating labels, creating data dictionaries, converting variable types, and more. 
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
 Imports: 
-    base,
     dplyr,
     stringr,
     stats,
@@ -21,11 +20,19 @@ Imports:
     readr,
     haven,
     tidyr,
-    psych,
     magrittr,
     purrr,
     glue,
-    data.table
+    data.table,
+    broom,
+    rstatix,
+    graphics,
+    rlang
 Suggests: 
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    knitr,
+    rmarkdown,
+    ggplot2,
+    psych
 Config/testthat/edition: 3
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,6 +17,7 @@ export(chr_to_var)
 export(clean_strings)
 export(copy_df)
 export(cor_to_df)
+export(create_freq_dist)
 export(create_named_list)
 export(data_dict)
 export(extract_fa_loads)
@@ -40,19 +41,31 @@ export(squish)
 export(superspread)
 export(superspread_count)
 export(superspread_fill)
+export(test_chisq)
 export(timed_fn)
 export(ttest_nps)
 export(varl_tb)
 export(wrap_text)
 import(dplyr)
 import(haven)
 import(stringr)
+importFrom(broom,tidy)
 importFrom(data.table,":=")
+importFrom(dplyr,filter)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
 importFrom(glue,glue)
 importFrom(magrittr,"%>%")
 importFrom(purrr,is_null)
 importFrom(purrr,map)
+importFrom(rstatix,chisq_test)
+importFrom(stats,chisq.test)
+importFrom(stats,fisher.test)
+importFrom(graphics,hist)
+importFrom(rlang,sym)
+importFrom(rlang,.data)
 importFrom(tibble,enframe)
 importFrom(tibble,tibble)
 importFrom(tidyr,drop_na)
+importFrom(tidyr,pivot_longer)
 importFrom(tidyr,unnest)
diff --git a/R/CAGR.R b/R/CAGR.R
@@ -1,12 +1,14 @@
-#' Calculate CAGR
+#' @title Calculate CAGR
 #' 
-#' Calculates the Compound Annual Growth Rate (CAGR).
+#' @description Compute the Compound Annual Growth Rate (CAGR).
 #' @param value_begin The value at the start of the series.
 #' @param value_end The value at the end of the series.
 #' @param n_periods The number of periods to base the CAGR calculations on.
 #' 
 #' @seealso http://www.investopedia.com/terms/c/cagr.asp
 #' 
+#' @return numeric value
+#' 
 #' @export
 CAGR <- function(value_begin, value_end, n_periods){
 

diff --git a/R/any_x.R b/R/any_x.R
@@ -1,21 +1,26 @@
-#' @title Function that returns TRUE/FALSE if value exists in x, but returns NA if x consists entirely of NAs
+#' @title Function that returns either TRUE or FALSE if value exists in x, but
+#'   returns NA if x consists entirely of NAs
 #' 
 #' @description 
-#' A more nuanced response is returned than the standard R method,
-#' which does not return NAs if x is all NAs.
-#' Has useful applications in understanding a set of categorical variables
-#' belonging to a single question.
-#' E.g. A question on brand usage across 10 product types to understand 'any' usage of a brand x.
-#' 
-#' @return A logical vector whether a value exists in x, and returns NA if x contains only NAs.
+#' A more nuanced response is returned than the standard R method, which does
+#' not return NAs if x is all NAs. Has useful applications in understanding a
+#' set of categorical variables belonging to a single question.
+#' E.g. A question on brand usage across 10 product types to understand 'any'
+#' usage of a brand x.
+#'   
 #' @param x Vector of values to test.
-#' @param value Value to test whether it exists in x. NA is returned if none exists at all. 
+#' @param value Value to test whether it exists in x. NA is returned if none
+#'   exists at all.
+#'   
 #' @examples
 #' any_x(c(1,0,1),1) # TRUE
 #' any_x(c(1,NA,1),1) # TRUE
 #' any_x(c(0,0,NA),1) # FALSE
 #' any_x(c(NA,NA,NA),1) # NA
 #' 
+#' @return A logical vector whether a value exists in x, and returns NA if x
+#'   contains only NAs.
+#' 
 #' @export
 any_x <- function(x, value){
   if(all(is.na(x))){

diff --git a/R/append_to_list.R b/R/append_to_list.R
@@ -1,20 +1,22 @@
 #' @title Append an item to a list dynamically
 #' 
 #' @description
-#' The `append_to_list()` function appends an object to the specified list in Global Environment (default).
-#' This function is pipe-optimised, and allows the option of specifying a name for the new object in the list.
+#' The `append_to_list()` function appends an object to the specified list in
+#' Global Environment (default). This function is pipe-optimised, and allows the
+#' option of specifying a name for the new object in the list.
 #' 
 #' @param x An object to append to list, e.g. vector, data frame. 
 #' @param list_x Target list to append object to. 
-#' @param name Specify a character string for the name of the list. Defaults to blank
+#' @param name character string for the name of the list. Defaults to
+#'   a blank string
 #' @param enviro Specifies the environment
 #'
 #' @examples
 #' a_list <- list(NULL)
 #' append_to_list(iris,a_list,"iris") 
 #' 
 #' @export
-append_to_list <- function(x, list_x, name="", enviro = .GlobalEnv){
+append_to_list <- function(x, list_x, name = "", enviro = .GlobalEnv){
 
   temp <- deparse(substitute(list_x))
 

diff --git a/R/apply_row.R b/R/apply_row.R
@@ -1,12 +1,14 @@
-#' @title Apply a function rowwise, selecting variables with dplyr::select() syntax
+#' @title Apply a function rowwise, selecting variables with `dplyr::select()`
+#'   syntax
 #'
 #' @description
 #' `apply_row()` is a wrapper around `apply()` and `select()`,
-#' applying a function rowwise, and selecting variables with dplyr::select() syntax.
+#' applying a function rowwise, and selecting variables with `dplyr::select(`)
+#' syntax.
 #' This makes code slightly less verbose for rowwise operations.
 #'
 #' @param x Data frame or tibble to pass through.
-#' @param select_helpers Select variables using dplyr::select() syntax
+#' @param select_helpers Select variables using `dplyr::select()` syntax
 #' @param FUN Function to be applied to selected columns
 #' @param ... Additional arguments to the function.
 #' 
@@ -21,6 +23,8 @@
 #' iris %>% mutate(Any_Petal = apply_row(., petal_str, function(x) any(x > 1)))
 #' }
 #' 
+#' @return 
+#' transformed version of the vector `x`
 #' 
 #' @export
 

diff --git a/R/as_nps_cat.R b/R/as_nps_cat.R
@@ -1,14 +1,23 @@
+#' @title
 #' Convert numeric variable to NPS categorical variable
 #' 
+#' @description
 #' Returns a categorical variable with default values over 1, 2, and 3.
 #' Suited for running multinomial logistic regression. 
 #' To calculate the NPS score, use `as_nps()`.
 #' 
-#' @param x Numeric variable to pass through. Valid range is 0 to 10 inclusive, otherwise returns a NA.
+#' @param x Numeric variable to pass through. Valid range is 0 to 10 inclusive,
+#'   otherwise returns a NA.
 #' @param det Numeric value to represent the code for Detractor. Defaults to 1.
 #' @param pas Numeric value to represent the code for Passive. Defaults to 2.
 #' @param pro Numeric value to represent the code for Promoter. Defaults to 3.
 #' 
+#' @return a labelled double variable
+#' 
+#' @examples
+#' x <- sample(0:10, size = 50, replace = TRUE)
+#' as_nps_cat(x)
+#' 
 #' @export
 as_nps_cat <-function(x, det = 1, pas = 2, pro = 3){
   if(any(!is.numeric(c(det, pas, pro)))){

diff --git a/R/as_percent.R b/R/as_percent.R
@@ -1,10 +1,14 @@
+#' @title
 #' Convert as percent (string)
 #' 
+#' @description
 #' Convert a numeric value into a string with percentage sign.
+#' 
 #' @param num Numeric vector to pass through
 #' @param rounding Number of decimal places to round to. Default is 0.
 #' @examples 
 #' as_percent(.86748)
+#' 
 #' @export
 as_percent <- function(num, rounding = 0){
   paste0(round(num * 100, rounding),"%")

diff --git a/R/box_it.R b/R/box_it.R
@@ -1,4 +1,6 @@
-#' Convert ordinal variables into binary variables by "boxing"
+#' @title
+#' Convert ordinal variables into binary variables by creating top or bottom n
+#' 'box' categories
 #' 
 #' @description
 #' For instance, you can create a Top Two Box variable from a 7-point agreement
@@ -25,7 +27,10 @@
 #' @return a binary variable of labelled double type.
 #' 
 #' @examples
-#' box_it(sample(1:10,100,replace = TRUE)) # Converted to binary variable where 9, 10 are selected
+#' # Converted to binary variable where 9, 10 are selected
+#' box_it(sample(1:10,100,replace = TRUE)) 
+#' 
+#' # Example with missing values
 #' box_it(sample(c(1:10, NA),100,replace = TRUE))
 #' 
 #' # Example where specified numeric values are replaced with NAs

diff --git a/R/calc_pc_loglin.R b/R/calc_pc_loglin.R
@@ -1,12 +1,22 @@
+#' @title 
 #' Calculate percentage impact from coefficients of a log-linear model
 #' 
-#' Exponentiates coefficients and takes out 1 to calculate percentage impact.
-#' Returns a tibble
+#' @description
+#' This function exponentiates coefficients and takes out 1 to calculate the
+#' percentage impact of each variable on the response variable in a log-linear
+#' model. The function returns a tibble with three columns: `var`, `coef`, and
+#' `pc_impact`.
+#' 
+#' @param x A log-linear model object.
+#' 
+#' @return A tibble with three columns: `var`, `coef`, and `pc_impact`.
 #' 
 #' @import dplyr
 #' 
 #' @param x Log-linear model to be passed through
 #' 
+#' @return a [tibble][tibble::tibble-package]    
+#' 
 #' @export
 calc_pc_loglin <- function(x){
   x$coefficients %>%

diff --git a/R/char_to_lab.R b/R/char_to_lab.R
@@ -1,17 +1,24 @@
+#' @title
 #' Convert character variable to labelled integer variable
 #' 
-#' This function converts the character values into value labels, assigning each value an integer.
+#' @description
+#' This function converts the character values into value labels, assigning each
+#' value an integer. To achieve the same effect whilst prescribing a set of
+#' value-to-label mapping to the function, please see `char_to_var()`.
 #' 
 #' @param x Character vector to pass through
 #' 
 #' @importFrom tidyr drop_na
 #' 
 #' @export
 char_to_lab <- function(x){
+
   unique_x <- unique(x)
 
-  gen_df <- tibble::tibble(id=1:length(unique_x),
-                           var=as.character(unique_x))
+  gen_df <- tibble::tibble(
+    id = 1:length(unique_x),
+    var = as.character(unique_x)
+    )
 
   value_labels <- unlist(create_named_list(gen_df$var,gen_df$id))
 

diff --git a/R/create_freq_dist.R b/R/create_freq_dist.R
@@ -0,0 +1,33 @@
+#' @title Create frequency distribution table for a metric
+#'
+#' @description This function creates a frequency distribution table for a given
+#'   metric. The table contains the bin ranges and the counts of the data points
+#'   that fall within each bin.
+#'
+#' @param data A data frame containing the data
+#' @param metric string specifying the name of the metric for which the
+#'   frequency distribution is to be created
+#' 
+#' @examples
+#' create_freq_dist(iris, "Sepal.Length")
+#' 
+#' @export
+create_freq_dist <- function(data, metric){
+
+  hist_data <- hist(data[[metric]], plot = FALSE)
+
+  # Create labels for the bin ranges
+  bin_labels <- paste0(
+    hist_data$breaks[-length(hist_data$breaks)], 
+    " - ", 
+    hist_data$breaks[-1]
+  )
+
+  hist_df <- data.frame(
+    metric = metric,
+    bin_range = bin_labels,
+    counts = hist_data$counts
+  )
+
+  return(hist_df)
+}
diff --git a/R/extract_fa_loads.R b/R/extract_fa_loads.R
@@ -1,12 +1,19 @@
-#' Function to create a loadings file from the factanal() output
+#' @title 
+#' Function to create a loadings file from the `stats::factanal()` output
 #' 
 #' @param fa_object factanal() model
 #' @keywords factor analysis
+#' 
+#' 
 #' @examples 
-#' fa_output <- factanal(tidyr::drop_na(psych::bfi), factors = 6)
+#' fa_output <- stats::factanal(
+#'   tidyr::drop_na(psych::bfi),
+#'   factors = 6
+#'   )
 #' extract_fa_loads(fa_output)
 #' @export
 extract_fa_loads <-function(fa_object){
+
   loadings_object <- as.matrix(fa_object$loadings)
 
   # Find max and return column header

diff --git a/R/maxmin.R b/R/maxmin.R
@@ -1,7 +1,9 @@
-#' Max-Min Scaling Function
+#' @title Max-Min Scaling Function
 #'
+#' @description 
 #' This function allows you to scale vectors or an entire data frame using the max-min scaling method
 #' A numeric vector is always returned.
+#' 
 #' @param x Pass a vector or the required columns of a data frame through this argument.
 #' @keywords max-min
 #' @export
@@ -15,6 +17,7 @@
 #' iris %>% mutate(Petal.Length2 = maxmin(Petal.Length))
 #' 
 #' maxmin(iris$Petal.Length)
+#' 
 #' @export
 maxmin <- function(x){
     if(any(is.na(x))){
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,5 @@ @@
     .Rhistory
     .RData
     .Ruserdata
+    /doc/
+    /Meta/