-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_tms_data.R
69 lines (59 loc) · 2.82 KB
/
process_tms_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(tidyverse)
library(readxl)
#Import stability results
tms.results <- read_xlsx("./data/TMS_saliva_stability_final.xlsx", sheet = "Sheet1")
head(tms.results)
#rename columns
colnames(tms.results)[3:7] <- colnames(tms.results)[2]
colnames(tms.results)[11:15] <- colnames(tms.results)[10]
colnames(tms.results)[19:23] <- colnames(tms.results)[18]
colnames(tms.results)[27:31] <- colnames(tms.results)[26]
colnames(tms.results)[35:39] <- colnames(tms.results)[34]
results.df <- tibble(sample_id="a", value=2, biomarker="c", timepoint="d")
for (column in 2:dim(tms.results)[2]){
if(!is.na(tms.results[2,column])){
x <-tibble(
sample_id = tms.results[2:17,1] %>% pull(),
value = tms.results[2:17,column] %>% pull(),
biomarker = rep(colnames(tms.results)[column],16),
timepoint = rep(tms.results[1,column]%>% pull(),16)
)
results.df <- rbind(results.df, x)
}
}
results.df <- results.df[-1,] %>%
#Split analysis dates
mutate(analysis.date = str_split(timepoint, " ", simplify = TRUE)[,2]) %>%
mutate(analysis.date = gsub("\\(|\\)", "", analysis.date)) %>%
#Create days since baseline variable
mutate(analysis.date = as.Date(analysis.date, tryFormats = "%d/%m/%y")) %>%
mutate(analysis.date = strptime(analysis.date, format="%Y-%m-%d", tz="GMT")) %>%
mutate(days = difftime(analysis.date, strptime("2020-02-21", format="%Y-%m-%d", tz="UTC"), units = "days")) %>%
mutate(days = as.numeric(days)) %>%
#years
mutate(years = days / 365.25) %>%
#create timepoint and biomarker columns
mutate(timepoint = str_split(timepoint, " ", simplify = TRUE)[,1]) %>%
mutate(units = str_split(biomarker, "\\(", simplify = TRUE)[,2]) %>%
mutate(units = gsub("\\(|\\)", "", units)) %>%
mutate(biomarker = str_split(biomarker, "\\(", simplify = TRUE)[,1]) %>%
mutate(across(where(is.character), str_trim)) %>%
#change insufficient and mucinous to missing
mutate(value = ifelse(grepl("Ins", .$value), NA, .$value)) %>%
mutate(value = ifelse(grepl("Mucinous", .$value), NA, .$value)) %>%
#REMOVE NAs from value
filter(!is.na(value)) %>%
mutate(value = as.numeric(value))
biomarker_names <- unique(results.df$biomarker)
results.df$fraction <- results.df$value
for (subject in unique(results.df$sample_id)){
subject.df <- results.df[results.df$sample_id == subject,]
for(biomarker in biomarker_names){
biomarker_by_subject <- subject.df[subject.df$biomarker == biomarker,]
baseline <- biomarker_by_subject[biomarker_by_subject$timepoint == "T0","value"] %>% pull()
results.df$fraction[results.df$sample_id == subject & results.df$biomarker == biomarker] <- biomarker_by_subject$value / baseline
}
}
write.csv(results.df, "./data/tms_processed.csv", row.names = FALSE)
rm(list = ls())
tms_processed <- read.csv("./data/tms_processed.csv")