Skip to content

Commit

Permalink
update: preparation scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
tin900 committed Jun 14, 2024
1 parent 06d59b0 commit 90dbcab
Show file tree
Hide file tree
Showing 9 changed files with 917 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .Rprofile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
source("renv/activate.R")
source("99_utils/00_load_packages.R")")
source("99_utils/00_preparation.R")
76 changes: 76 additions & 0 deletions 99_utils/00_preparation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## R code voor Student Analytics Vrije Universiteit Amsterdam
## Copyright 2023 VU
## Web Page: http://www.vu.nl
## Contact: vu-analytics@vu.nl
##
##' *INFO*:
## 1) ___
##
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

## Get current branch
Sys.setenv("BRANCH" = system("git branch --show-current", intern = TRUE))

## Latest vusa version
renv::install("vusaverse/vusa", rebuild = TRUE, prompt = FALSE)
renv::record("vusaverse/vusa")

## Restore packages from renv
renv::restore(prompt = FALSE)


## Load packages
source("99_utils/load_packages.R")

## Set system variables
source("99_utils/load_system_variables.R")

## Update the R snippets
source("99_utils/import_snippets.R")

## Create directory structure and adjust OUTPUT_DIR system variable
source("99_utils/create_dir_setup.R")

## TEMP
Sys.setenv("load_and_save_config" = "99_utils/read_and_write_settings.csv")
source("99_utils/read_and_write_settings_functions.R")

# ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ## slackr_setup()
# ## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ## Slackr setup sa-bot
slackr_setup(
channel = "#evalytics",
username = Sys.getenv("SLACK_BOT"),
icon_emoji = "",
incoming_webhook_url = Sys.getenv("SLACK_WEBHOOK"),
token = Sys.getenv("SLACK_TOKEN"),
config_file = "~/.slackr",
echo = F
)


##' *INFO* clear_global_proj
object_names <- ls(envir = .GlobalEnv)

# Concatenate the object names into a space-separated string
default_keep_list <- paste(object_names, collapse = " ")

# Set the environment variable
Sys.setenv(DEFAULT_KEEP_LIST = default_keep_list)

vusa::clear_global_proj()

##'* INFO*
##' All possible settings:
##' https://docs.posit.co/ide/server-pro/reference/session_user_settings.html
##' https://docs.posit.co/ide/server-pro/rstudio_pro_sessions/session_startup_scripts.html
##' Enforce margin of 100; use rstudio.sessionInit hook as RStudio needs to be initiated.
setHook("rstudio.sessionInit", function(newSession) {
if (newSession) {
vusa::use_rstudio_prefs_silent(
"margin_column" = as.integer(100)
)
}
}, action = "append")
68 changes: 68 additions & 0 deletions 99_utils/create_dir_setup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## R code for Education Analytics Vrije Universiteit Amsterdam
## Copyright 2024 VU
## Web Page: http://www.vu.nl
## Contact: vu-analytics@vu.nl
## Distribution outside of the VU: yes.
##
##' *INFO*:
## 1) ___
##
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Retrieve the project name from the current path
project_name <- this.path::sys.proj() %>%
basename()

# Read the CSV file containing read and write settings
read_write_settings <- read_delim("99_utils/read_and_write_settings.csv")

# Extract unique read directories, sort them, and store in sorted_read_dirs
sorted_read_dirs <- read_write_settings %>%
pull(read_data_dir) %>%
unique() %>%
sort()

# Extract unique write directories, sort them, and store in sorted_write_dirs
sorted_write_dirs <- read_write_settings %>%
pull(write_data_dir) %>%
unique() %>%
sort()

# Combine read and write directories, remove duplicates, and sort
combined_dirs <- c(sorted_read_dirs, sorted_write_dirs) %>%
unique() %>%
sort()

# Filter to keep only directories starting with a number
filtered_dirs <- combined_dirs[str_detect(combined_dirs, "^[0-9]")]

# Create a data frame with the basename of each directory
dfDirectories <- data.frame(dir_basename = filtered_dirs)

# Add a new column for the full directory path and check if it exists
dfDirectories <- dfDirectories %>%
mutate(
full_dir_path = paste0(Sys.getenv("OUTPUT_DIR"), "_REPOSITORIES/", project_name, "/", Sys.getenv("BRANCH"), "/", dir_basename, "/"),
path_exists = dir.exists(full_dir_path)
)

# Identify new paths that do not exist yet
new_paths_to_create <- dfDirectories %>%
filter(!path_exists) %>%
pull(full_dir_path)

# Create new directories for paths that do not exist
if (!is_empty(new_paths_to_create)) {
map(new_paths_to_create, ~ dir.create(.x, recursive = TRUE))
}

# Update the OUTPUT_DIR environment variable to include the project name
project_specific_output_dir <- paste0(Sys.getenv("OUTPUT_DIR"), "_REPOSITORIES/", project_name, "/")
Sys.setenv(OUTPUT_DIR = project_specific_output_dir)

## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## CLEAR ####
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

vusa::clear_script_objects()
62 changes: 62 additions & 0 deletions 99_utils/import_snippets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## R code for Education Analytics Vrije Universiteit Amsterdam
## Copyright 2024 VU
## Web Page: http://www.vu.nl
## Contact: vu-analytics@vu.nl
##
##' *INFO*:
## 1) Get snippets file from GitHub
## 2) TODO: create a single function to handle all of this
##
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# URL of the GitHub file
github_file_url <- Sys.getenv("SNIPPETS")

# Path to the temporary file where we will save the downloaded content
temp_file_path <- fs::path_temp("r.snippets")

# Download the file from GitHub
downloaded_file_path <- vusa::download_github_file(github_file_url, temp_file_path)

# Read the contents of the downloaded file
downloaded_contents <- readLines(downloaded_file_path)

snippets_SA <- downloaded_contents %>%
## Trim whitespaces from the rightside, but keep tabs
str_replace("[ \r\n]+$", "")

# read in the first file and split it into chunks
file2_chunks <- split_file(temp_file_path)

# read in the second file and split it into chunks
file1_chunks <- split_file(vusa::get_snippets_file(type = "r"))


# find the chunks in file2 that are not in file1
diff_chunks <- setdiff(lapply(file2_chunks, paste, collapse="\n"), lapply(file1_chunks, paste, collapse="\n"))

# find the chunks in file1 that are not in file2
# diff_chunks <- setdiff(lapply(file1_chunks, paste, collapse="\n"), lapply(file2_chunks, paste, collapse="\n"))

# append the missing chunks to file1
if (length(diff_chunks) > 0) {
file1 <- readLines(get_snippets_file(type = "r"))
file1[length(file1) + 1] <- ""
file1[length(file1) + 1] <- paste(diff_chunks, collapse="\n\n")
writeLines(file1, get_snippets_file(type = "r"))
message("Local snippet file has been edited.")
rm(file1)
}

# Clean up the temporary file
file.remove(temp_file_path)

## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## CLEAR ####
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

clear_script_objects()



49 changes: 49 additions & 0 deletions 99_utils/load_packages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## R code voor Student Analytics Vrije Universiteit Amsterdam
## Copyright 2023 VU
## Web Page: http://www.vu.nl
## Contact: vu-analytics@vu.nl
##
##' *INFO*:
## 1) Loads packages for the project using library()
##
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Define the basic packages
basic_packages <- c(
"dataMaid", # Used for export analysis sets.
"MASS", # Provides a lot of basic statistical functions
"ggplot2", # For basic plots
"rvest", # Used to get data from the web.
"readxl", # Used to read Excel (.xls and .xlsx) files
"checkmate", # Used for assertion tests
"cli", # Used to add color to console messages
"digest", # Used for hashing variables
"gridExtra", # Used to place multiple graphical objects in a table
"haven", # Used for importing SPSS, STATA, and SAS files
"httr", # Used to work with HTTP
"janitor", # Used to clean up variable names from special characters
"lubridate", # Used to work with dates and times
"purrr", # Used to work with functions and vectors
"readr", # Used to read data (csv, tsv, and fwf)
"vroom", # Used to quickly read CSV data
"slackr", # Used to send messages in Slack
"stats", # Used for statistical functions and calculations
"stringr", # Used for functions to work with strings
"tibble", # Used for editing and creating tibbles
"tidyr", # Used to clean data in the tidyverse environment
"utils", # Used for utility functions
"fst", # Used for operations on large data files
"styler", # Used for improving the style of script
"vusa", # Mainly to always have the addins
"vvmover", # Mainly to always have the addins
"dplyr" # Used for the dplyr environment
)

# Load the packages into the library
suppressMessages(purrr::walk(basic_packages, ~library(.x, character.only = TRUE, warn.conflicts = FALSE)))

if (interactive()) {
library(tidylog)
}

vusa::clear_script_objects()
23 changes: 9 additions & 14 deletions 99_utils/load_system_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,26 @@
##
##
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
renv::restore()
library(dplyr)
library(purrr)
library(vvmover)
library(vusa)
library(tidylog)

## Function to set all environment variables
set_all_envs <- function(var.name, var.value) {
args = list(var.value)
names(args) = var.name
do.call(Sys.setenv, args)
}

## Lees in systeemvariabelen excel bestand
##' *INFO*: Dit is momenteel enkel beschikbaar op "main", vandaar volledig bestandspad
##' *INFO*: The RENVIRON_PATH is set in the .Renviron file
##' TO add it there run the following code:
##' usethis::edit_r_environ()
##' and add the following line:
##' RENVIRON_PATH = "path/to/your/file.xlsx"
to_set <- readxl::read_xlsx(Sys.getenv("RENVIRON_PATH"))

## zet variabelen in R system variables
pmap(list(to_set$variable, to_set$value), set_all_envs)

## Set SHAREPOINT_DIR
Sys.setenv(SHAREPOINt_DIR = paste0("C:/Users/", Sys.getenv("USERNAME"), "/Vrije Universiteit Amsterdam/"))
## Set systemvariables inR
purrr::pmap(list(to_set$variable, to_set$value), set_all_envs)

## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## RUIM OP ####
## CLEAR ####
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

clear_script_objects()
10 changes: 10 additions & 0 deletions 99_utils/read_and_write_settings.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
script_dir;type;read_data_dir;read_base_data_dir;read_add_branch;write_data_dir;write_base_data_dir;write_add_branch;write_rds;write_csv;write_fst;read_extension;read_message;write_message;notes
01_read;;Datasets;NETWORK_DIR;FALSE;1. Ingelezen data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;csv;Be aware! In this directory very diverse, raw, data is loaded, so this function might be not be appropriate.;;
02_manipulate;;1. Ingelezen data;OUTPUT_DIR;TRUE;2. Geprepareerde data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;;;
03_aggregate;;2. Geprepareerde data;OUTPUT_DIR;TRUE;3. Analyseset;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;;;
04. Analyseren;;3. Analyseset;OUTPUT_DIR;TRUE;4. Analyses;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;;;
05. Rapporten;default;3. Analyseset;OUTPUT_DIR;TRUE;TABLEAU_DATA_PATH;NETWORK_DIR;FALSE;TRUE;TRUE;FALSE;rds;Be aware! This directory has multiple types, the default is used for data assets;Be aware! This directory has multiple types, the default is used for Tableau;
05. Rapporten;exception;2. Geprepareerde data;OUTPUT_DIR;TRUE;5. Rapportages;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;;;
20_test;;2. Geprepareerde data;OUTPUT_DIR;TRUE;20. Test;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;
30_helperscripts;;2. Geprepareerde data;OUTPUT_DIR;TRUE;30. Ondersteunende data;OUTPUT_DIR;TRUE;TRUE;FALSE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;
40_check;;2. Geprepareerde data;OUTPUT_DIR;TRUE;40. Gecontroleerde data;OUTPUT_DIR;TRUE;TRUE;TRUE;FALSE;rds;Be aware! In this directory very diverse, potentially raw, data is loaded, so this function might be not be appropriate.;;
Loading

0 comments on commit 90dbcab

Please sign in to comment.