Skip to content

Commit

Permalink
Added repeatability stats
Browse files Browse the repository at this point in the history
-Added calculation and reporting of repeatability stats
-Repeatability stats are available in Table 4
-Fixed a bug that prevented calculation of p-value for multiple conditions
  • Loading branch information
JoachimGoedhart committed Jan 31, 2023
1 parent 145982a commit 9d81b4e
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 9 deletions.
69 changes: 60 additions & 9 deletions app.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Improvements:
# display of p-values (use of scientific notation if smaller than 0.001
# Needs fixing, since doesn't work for multiple conditions)
# Implement measures of reproducibility/repeatability


library(shiny)
library(tidyverse)
Expand All @@ -39,6 +44,7 @@ library(broom)
source("geom_flat_violin.R")
source("themes.R")
source("function_tidy_df.R")
source("repeatability.R")

###### Functions ##########

Expand Down Expand Up @@ -450,6 +456,7 @@ conditionalPanel(condition = "input.show_table == true", h3("Difference with the
h3("Table 1: Statistics for individual replicates"),dataTableOutput('data_summary'),
h3("Table 2: Statistics for conditions"),dataTableOutput('data_summary_condition') ,
h3("Table 3: Statistics for comparison of means between conditions"),dataTableOutput('data_difference'),
h3("Table 4: Statistics for repeatability"),dataTableOutput('data_repeats'),
NULL
),
tabPanel("About", includeHTML("about.html")
Expand Down Expand Up @@ -480,9 +487,9 @@ df_upload <- reactive({

if (input$data_input == 2) {
data <- df_tidy_example2
x_var.selected <<- "Method"
x_var.selected <<- "Condition"
y_var.selected <<- "BP"
g_var.selected <<- "Replicate"
g_var.selected <<- "N"
} else if (input$data_input == 1) {
data <- df_tidy_example
x_var.selected <<- "Treatment"
Expand Down Expand Up @@ -1494,12 +1501,13 @@ df_difference <- reactive({
df_difference <- df_difference %>% mutate_at(c(2:4), round, input$digits) %>% mutate_at(c(5), round, 8)

#Use scientific notation if smaller than 0.001
if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) {
df_difference$p.value <- formatC(df_difference$p.value, format = "e", digits = 2)
#Any p-value lower than 1e-10 is noted as <1e-10
} else if (df_difference$p.value<1e-10) {
df_difference$p.value <- "<1e-10"
}
# Needs fixing, since doesn't work for multiple conditions
# if (df_difference$p.value<0.001 && df_difference$p.value>=1e-10) {
# df_difference$p.value <- formatC(df_difference$p.value, format = "e", digits = 2)
# #Any p-value lower than 1e-10 is noted as <1e-10
# } else if (df_difference$p.value<1e-10) {
# df_difference$p.value <- "<1e-10"
# }

return(df_difference)

Expand All @@ -1525,13 +1533,37 @@ df_summary_condition <- reactive({
`95%CI_hi` = mean - qt((1-Confidence_level)/2, n - 1) * sem,
NULL)


# ## Need to add the repeats for each subject
# df_id <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup()
# ## Calculate measures of repeatability
# df_repeatability <- df_id %>% repeatability(values=Value, replicates=replicates , groups=Condition)
# df_repeatability <- df_repeatability %>%
# select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb))
#
# observe({print(head(df_repeatability))})


return(df)
})

df_summary_condition_rounded <- reactive({
df <- df_summary_condition() %>% mutate_at(c(3:7), round, input$digits)
})

df_repeats_rounded <- reactive({

## Need to add the repeats for each subject
df <- df_selected() %>% group_by(Condition, Replica) %>% mutate(replicates=row_number()) %>% ungroup()

## Calculate paramaters
df <- df %>% repeatability(values=Value, replicates=replicates , groups=Condition) %>%
select(-c(Replica, replicates, TSS, SSw, SSb, MSw, MSb))


df <- df %>% mutate_at(c(4:9), round, input$digits)
})

#### A predefined selection of stats for the table ###########

observeEvent(input$summary_replicate, {
Expand Down Expand Up @@ -1599,6 +1631,21 @@ output$data_difference <- renderDataTable(
)
)

#### Render the data summary as a table ###########

output$data_repeats <- renderDataTable(
datatable(
df_repeats_rounded(),
# colnames = c(ID = 1),
selection = 'none',
extensions = c('Buttons', 'ColReorder'),
options = list(dom = 'Bfrtip', pageLength = 100,
buttons = c('copy', 'csv','excel', 'pdf'),
editable=FALSE, colReorder = list(realtime = FALSE), columnDefs = list(list(className = 'dt-center', targets = '_all'))
)
)
)


############## Render the data summary as a table ###########

Expand All @@ -1624,15 +1671,19 @@ output$legend <- renderText({

HTML_Legend <- append(HTML_Legend, paste('<p><u>Table 3</u>: Statistics for the comparison of the conditions to "',input$zero,'" based on the <b>mean</b> of the replicates.</br>', sep=""))

HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the and 95% confidence interval is an interval estimate. ', sep=""))

HTML_Legend <- append(HTML_Legend, paste('The difference is a point estimate of the size of the effect and the 95% confidence interval is an interval estimate. ', sep=""))


if (input$connect!='blank') {
HTML_Legend <- append(HTML_Legend, paste('The replicates are paired between conditions and a paired t-test is used to calculate the p-value. ', sep=""))
} else if (input$connect=='blank') {
HTML_Legend <- append(HTML_Legend, paste("The replicates are <b>not</b> paired and Welch's t-test is performed to calculate the p-value. ", sep=""))
}

HTML_Legend <- append(HTML_Legend, paste('<p><u>Table 4</u>: Statistics for the repeatability for each of the conditions. For explanation of the parameters see <a href="https://doi.org/10.1593/tlo.09268">Barnhart & Barboriak, 2009</a></br></p>', sep=""))


if (length(df$Condition)>1) {
HTML_Legend <- append(HTML_Legend, paste("</br>The p-values are <b>not corrected</b> for multiple comparisons. Consider alternative statistical analyses.", sep=""))

Expand Down
96 changes: 96 additions & 0 deletions repeatability.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
## Function to calculate measures of repeatability
## Code for testing the function at the bottom

library(tidyverse)

# Function to calculate the 'sum of squares'
sos <- function(x) {
sum((mean(x)-x)^2)
}

# Function that takes a dataframe with at least two columns.
# One column which has (measurement) values and another column that indicates the repeats
# Note, these repeats or the 'observational units' or replicates (Not the subject or 'experimental unit')
# Optional: a third column when multiple conditions/groups are present.
# The result is a dataframe with the repeatability coefficient (RC) and IntraClass Correlation (ICC)
# When groups are defined, the result will be shown for each group

repeatability <- function(df, values, replicates, groups) {

#This is necessary to deal with arguments in tidyverse functions
replicate <- enquo(replicates)
group <- enquo(groups)
value <- enquo(values)

if (rlang::quo_is_missing(replicate)) {
print("Warning: Replicates not identifed")
}

#Add IDs for the subjects
df_id <- df %>% group_by(!! group, !! replicate) %>% mutate(Subject=row_number()) %>% ungroup()

#Calculate n and k
# n is the number of Experimental Units
# k is the number of repeated measurements, or Observational Units
df_id <- df_id %>% group_by(!! replicate, !! group) %>% mutate(n=n()) %>% ungroup()
df_id <- df_id %>% group_by(Subject, !! group) %>% mutate(k=n()) %>% ungroup()

#Calculate 'total sum of squares' for each condition TSS
df_id <- df_id %>% group_by(!! group) %>% mutate(TSS = sos(!! value)) %>% ungroup()

#Simplify the dataframe, will depend on presence of 'groups' argument
if (rlang::quo_is_missing(group)) {
print("Single group")
df_result <- df_id %>% distinct(n, k, .keep_all = TRUE)
} else {
print("Multiple groups")
df_result <- df_id %>% distinct(!! group, .keep_all = TRUE)
}


#Calculate 'within groups sum of squares' SSw for each condition
df_result$SSw <- df_id %>%
group_by(!! group, Subject) %>%
summarize(sos = sos(!! value), .groups = 'drop') %>%
group_by(!! group) %>% summarize(SSw=sum(sos)) %>% pull(SSw)

#Simplify the dataframe by removing irrelevant columns
df_result <- df_result %>% select(-c(Subject, quo_name(value)))

#Calculate the ICC and RC
df_result <- df_result %>%
mutate(SSb = TSS-SSw) %>%
mutate(MSw = SSw / (n*(k-1))) %>%
mutate(MSb = SSb / (n-1)) %>%
mutate(ICC = (MSb - MSw) / (MSb + ((k-1)*MSw))) %>%
# mutate(VR = (MSw * k) / (MSb + ((k-1)*MSw))) %>%
mutate(`total SD` = sqrt(MSb + ((k-1)*MSw)/k)) %>%
#### Add Effective Sample Size = n*k/(1+(k-1)*ICC)
#### DOI: 10.1093/cvr/cvx151
mutate(`Effective N` = (n*k)/(1+(k-1)*ICC)) %>%
mutate(RC = 1.96*sqrt(2) * sqrt(MSw)) %>%
mutate(`RC (95%CI_lo)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.975, (n*(k-1)))),
`RC (95%CI_hi)` = 1.96*sqrt(2)*sqrt(SSw/qchisq(0.025, (n*(k-1))))
)

return(df_result)
}


##########################################
## For testing, load this dataframe:
## df <- read.csv("Table_5_physiotherapy_tidy.csv")
## Run test: df %>% repeatability(values=Value, replicates=Measurement)

## For testing with multiple groups/conditions, load this dataframe:
## df <- read.csv("SystBloodPressure_tidy.csv")
## Run the function like this:
## repeatability(df, values=BP, replicates=Replicate , groups=Method)
## or:
## df %>% repeatability(values=BP, replicates=n , groups=Condition)

## synthetic <- read.csv("synthetic.csv")
## synthetic %>% repeatability(values=Values, replicates=expUnit , groups=Condition)



0 comments on commit 9d81b4e

Please sign in to comment.