update: preparation scripts

vusaverse · Jun 14, 2024 · 90dbcab · 90dbcab
1 parent 06d59b0
commit 90dbcab
Show file tree

Hide file tree

Showing 9 changed files with 917 additions and 16 deletions.
diff --git a/.Rprofile b/.Rprofile
@@ -1,2 +1,2 @@
 source("renv/activate.R")
-source("99_utils/00_load_packages.R")")
+source("99_utils/00_preparation.R")
diff --git a/99_utils/00_preparation.R b/99_utils/00_preparation.R
@@ -0,0 +1,76 @@
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## R code voor Student Analytics Vrije Universiteit Amsterdam
+## Copyright 2023 VU
+## Web Page: http://www.vu.nl
+## Contact: vu-analytics@vu.nl
+##
+##' *INFO*:
+## 1) ___
+##
+## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+## Get current branch
+Sys.setenv("BRANCH" = system("git branch --show-current", intern = TRUE))
+
+## Latest vusa version
+renv::install("vusaverse/vusa", rebuild = TRUE, prompt = FALSE)
+renv::record("vusaverse/vusa")
+
+## Restore packages from renv
+renv::restore(prompt = FALSE)
+
+
+## Load packages
+source("99_utils/load_packages.R")
+
+## Set system variables
+source("99_utils/load_system_variables.R")
+
+## Update the R snippets
+source("99_utils/import_snippets.R")
+
+## Create directory structure and adjust OUTPUT_DIR system variable
+source("99_utils/create_dir_setup.R")
+
+## TEMP
+Sys.setenv("load_and_save_config" = "99_utils/read_and_write_settings.csv")
+source("99_utils/read_and_write_settings_functions.R")
+
+# ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# ## slackr_setup()
+# ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# ## Slackr setup sa-bot
+slackr_setup(
+  channel = "#evalytics",
+  username = Sys.getenv("SLACK_BOT"),
+  icon_emoji = "",
+  incoming_webhook_url = Sys.getenv("SLACK_WEBHOOK"),
+  token = Sys.getenv("SLACK_TOKEN"),
+  config_file = "~/.slackr",
+  echo = F
+)
+
+
+##' *INFO* clear_global_proj
+object_names <- ls(envir = .GlobalEnv)
+
+# Concatenate the object names into a space-separated string
+default_keep_list <- paste(object_names, collapse = " ")
+
+# Set the environment variable
+Sys.setenv(DEFAULT_KEEP_LIST = default_keep_list)
+
+vusa::clear_global_proj()
+
+##'* INFO*
+##' All possible settings:
+##' https://docs.posit.co/ide/server-pro/reference/session_user_settings.html
+##' https://docs.posit.co/ide/server-pro/rstudio_pro_sessions/session_startup_scripts.html
+##' Enforce margin of 100; use rstudio.sessionInit hook as RStudio needs to be initiated.
+setHook("rstudio.sessionInit", function(newSession) {
+  if (newSession) {
+    vusa::use_rstudio_prefs_silent(
+      "margin_column" = as.integer(100)
+    )
+  }
+}, action = "append")
diff --git a/99_utils/create_dir_setup.R b/99_utils/create_dir_setup.R
@@ -0,0 +1,68 @@
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## R code for Education Analytics Vrije Universiteit Amsterdam
+## Copyright 2024 VU
+## Web Page: http://www.vu.nl
+## Contact: vu-analytics@vu.nl
+## Distribution outside of the VU: yes.
+##
+##' *INFO*:
+## 1) ___
+##
+## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+# Retrieve the project name from the current path
+project_name <- this.path::sys.proj() %>%
+  basename()
+
+# Read the CSV file containing read and write settings
+read_write_settings <- read_delim("99_utils/read_and_write_settings.csv")
+
+# Extract unique read directories, sort them, and store in sorted_read_dirs
+sorted_read_dirs <- read_write_settings %>%
+  pull(read_data_dir) %>%
+  unique() %>%
+  sort()
+
+# Extract unique write directories, sort them, and store in sorted_write_dirs
+sorted_write_dirs <- read_write_settings %>%
+  pull(write_data_dir) %>%
+  unique() %>%
+  sort()
+
+# Combine read and write directories, remove duplicates, and sort
+combined_dirs <- c(sorted_read_dirs, sorted_write_dirs) %>%
+  unique() %>%
+  sort()
+
+# Filter to keep only directories starting with a number
+filtered_dirs <- combined_dirs[str_detect(combined_dirs, "^[0-9]")]
+
+# Create a data frame with the basename of each directory
+dfDirectories <- data.frame(dir_basename = filtered_dirs)
+
+# Add a new column for the full directory path and check if it exists
+dfDirectories <- dfDirectories %>%
+  mutate(
+    full_dir_path = paste0(Sys.getenv("OUTPUT_DIR"), "_REPOSITORIES/", project_name, "/", Sys.getenv("BRANCH"), "/", dir_basename, "/"),
+    path_exists = dir.exists(full_dir_path)
+  )
+
+# Identify new paths that do not exist yet
+new_paths_to_create <- dfDirectories %>%
+  filter(!path_exists) %>%
+  pull(full_dir_path)
+
+# Create new directories for paths that do not exist
+if (!is_empty(new_paths_to_create)) {
+  map(new_paths_to_create, ~ dir.create(.x, recursive = TRUE))
+}
+
+# Update the OUTPUT_DIR environment variable to include the project name
+project_specific_output_dir <- paste0(Sys.getenv("OUTPUT_DIR"), "_REPOSITORIES/", project_name, "/")
+Sys.setenv(OUTPUT_DIR = project_specific_output_dir)
+
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## CLEAR ####
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+vusa::clear_script_objects()
diff --git a/99_utils/import_snippets.R b/99_utils/import_snippets.R
@@ -0,0 +1,62 @@
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## R code for Education Analytics Vrije Universiteit Amsterdam
+## Copyright 2024 VU
+## Web Page: http://www.vu.nl
+## Contact: vu-analytics@vu.nl
+##
+##' *INFO*:
+## 1) Get snippets file from GitHub
+## 2) TODO: create a single function to handle all of this
+##
+## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+# URL of the GitHub file
+github_file_url <- Sys.getenv("SNIPPETS")
+
+# Path to the temporary file where we will save the downloaded content
+temp_file_path <- fs::path_temp("r.snippets")
+
+# Download the file from GitHub
+downloaded_file_path <- vusa::download_github_file(github_file_url, temp_file_path)
+
+# Read the contents of the downloaded file
+downloaded_contents <- readLines(downloaded_file_path)
+
+snippets_SA <- downloaded_contents %>%
+  ## Trim whitespaces from the rightside, but keep tabs
+  str_replace("[ \r\n]+$", "")
+
+# read in the first file and split it into chunks
+file2_chunks <- split_file(temp_file_path)
+
+# read in the second file and split it into chunks
+file1_chunks <- split_file(vusa::get_snippets_file(type = "r"))
+
+
+# find the chunks in file2 that are not in file1
+diff_chunks <- setdiff(lapply(file2_chunks, paste, collapse="\n"), lapply(file1_chunks, paste, collapse="\n"))
+
+# find the chunks in file1 that are not in file2
+# diff_chunks <- setdiff(lapply(file1_chunks, paste, collapse="\n"), lapply(file2_chunks, paste, collapse="\n"))
+
+# append the missing chunks to file1
+if (length(diff_chunks) > 0) {
+  file1 <- readLines(get_snippets_file(type = "r"))
+  file1[length(file1) + 1] <- ""
+  file1[length(file1) + 1] <- paste(diff_chunks, collapse="\n\n")
+  writeLines(file1, get_snippets_file(type = "r"))
+  message("Local snippet file has been edited.")
+  rm(file1)
+}
+
+# Clean up the temporary file
+file.remove(temp_file_path)
+
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## CLEAR ####
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+clear_script_objects()
+
+
+
diff --git a/99_utils/load_packages.R b/99_utils/load_packages.R
@@ -0,0 +1,49 @@
+## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## R code voor Student Analytics Vrije Universiteit Amsterdam
+## Copyright 2023 VU
+## Web Page: http://www.vu.nl
+## Contact: vu-analytics@vu.nl
+##
+##' *INFO*:
+## 1) Loads packages for the project using library()
+##
+## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# Define the basic packages
+basic_packages <- c(
+  "dataMaid",       # Used for export analysis sets.
+  "MASS",           # Provides a lot of basic statistical functions
+  "ggplot2",        # For basic plots
+  "rvest",          # Used to get data from the web.
+  "readxl",         # Used to read Excel (.xls and .xlsx) files
+  "checkmate",      # Used for assertion tests
+  "cli",            # Used to add color to console messages
+  "digest",         # Used for hashing variables
+  "gridExtra",      # Used to place multiple graphical objects in a table
+  "haven",          # Used for importing SPSS, STATA, and SAS files
+  "httr",           # Used to work with HTTP
+  "janitor",        # Used to clean up variable names from special characters
+  "lubridate",      # Used to work with dates and times
+  "purrr",          # Used to work with functions and vectors
+  "readr",          # Used to read data (csv, tsv, and fwf)
+  "vroom",          # Used to quickly read CSV data
+  "slackr",         # Used to send messages in Slack
+  "stats",          # Used for statistical functions and calculations
+  "stringr",        # Used for functions to work with strings
+  "tibble",         # Used for editing and creating tibbles
+  "tidyr",          # Used to clean data in the tidyverse environment
+  "utils",          # Used for utility functions
+  "fst",            # Used for operations on large data files
+  "styler",         # Used for improving the style of script
+  "vusa",           # Mainly to always have the addins
+  "vvmover",           # Mainly to always have the addins
+  "dplyr"           # Used for the dplyr environment
+)
+
+# Load the packages into the library
+suppressMessages(purrr::walk(basic_packages, ~library(.x, character.only = TRUE, warn.conflicts = FALSE)))
+
+if (interactive()) {
+  library(tidylog)
+}
+
+vusa::clear_script_objects()
diff --git a/99_utils/load_system_variables.R b/99_utils/load_system_variables.R
@@ -5,31 +5,26 @@
 ##
 ##
 ## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-renv::restore()
-library(dplyr)
-library(purrr)
-library(vvmover)
-library(vusa)
-library(tidylog)
 
+## Function to set all environment variables
 set_all_envs <- function(var.name, var.value) {
   args = list(var.value)
   names(args) = var.name
   do.call(Sys.setenv, args)
 }
 
-## Lees in systeemvariabelen excel bestand
-##' *INFO*: Dit is momenteel enkel beschikbaar op "main", vandaar volledig bestandspad
+##' *INFO*: The RENVIRON_PATH is set in the .Renviron file
+##' TO add it there run the following code:
+##' usethis::edit_r_environ()
+##' and add the following line:
+##' RENVIRON_PATH = "path/to/your/file.xlsx"
 to_set <- readxl::read_xlsx(Sys.getenv("RENVIRON_PATH"))
 
-## zet variabelen in R system variables
-pmap(list(to_set$variable, to_set$value), set_all_envs)
-
-## Set SHAREPOINT_DIR
-Sys.setenv(SHAREPOINt_DIR = paste0("C:/Users/", Sys.getenv("USERNAME"), "/Vrije Universiteit Amsterdam/"))
+## Set systemvariables inR
+purrr::pmap(list(to_set$variable, to_set$value), set_all_envs)
 
 ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-## RUIM OP ####
+## CLEAR ####
 ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 clear_script_objects()
diff --git a/99_utils/read_and_write_settings.csv b/99_utils/read_and_write_settings.csv
@@ -0,0 +1,10 @@
+script_dir;type;read_data_dir;read_base_data_dir;read_add_branch;write_data_dir;write_base_data_dir;write_add_branch;write_rds;write_csv;write_fst;read_extension;read_message;write_message;notes
+01_read;;Datasets;NETWORK_DIR;FALSE;1. Ingelezen data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;csv;Be aware! In this directory very diverse, raw, data is loaded, so this function might be not be appropriate.;;
+02_manipulate;;1. Ingelezen data;OUTPUT_DIR;TRUE;2. Geprepareerde data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;;;
+03_aggregate;;2. Geprepareerde data;OUTPUT_DIR;TRUE;3. Analyseset;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;;;
+04. Analyseren;;3. Analyseset;OUTPUT_DIR;TRUE;4. Analyses;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;;;
+05. Rapporten;default;3. Analyseset;OUTPUT_DIR;TRUE;TABLEAU_DATA_PATH;NETWORK_DIR;FALSE;TRUE;TRUE;FALSE;rds;Be aware! This directory has multiple types, the default is used for data assets;Be aware! This directory has multiple types, the default is used for Tableau;
+05. Rapporten;exception;2. Geprepareerde data;OUTPUT_DIR;TRUE;5. Rapportages;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;;;
+20_test;;2. Geprepareerde data;OUTPUT_DIR;TRUE;20. Test;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;
+30_helperscripts;;2. Geprepareerde data;OUTPUT_DIR;TRUE;30. Ondersteunende data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;
+40_check;;2. Geprepareerde data;OUTPUT_DIR;TRUE;40. Gecontroleerde data;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;