-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkidney-stone-treatment-analysis-r.Rmd
75 lines (63 loc) · 1.8 KB
/
kidney-stone-treatment-analysis-r.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
---
title: "R Notebook"
output: html_notebook
---
```{r}
# Load the readr and dplyr packages
library(readr)
library(dplyr)
# Read datasets kidney_stone_data.csv into data
data <- read_csv("C:/Users/User/Documents/Sandbox/simpsons-paradox/datasets/kidney_stone_data.csv")
# Take a look at the first few rows of the dataset
head(data)
```
```{r}
# Calculate the number and frequency of success and failure of each treatment
data %>%
group_by(treatment, success) %>%
summarise(N = n()) %>%
mutate(Freq = round(N/sum(N), 3))
```
```{r}
# Calculate number and frequency of success and failure by stone size for each treatment
sum_data <-
data %>%
group_by(treatment, stone_size, success ) %>%
summarise(N = n()) %>%
mutate(Freq = round(N/sum(N),3))
# Print out the data frame we just created
print(sum_data)
```
```{r}
# Load ggplot2
library(ggplot2)
# Create a bar plot to show stone size count within each treatment
sum_data %>%
# x-axis as "treatment" and the y-axis as "N" (the count)
ggplot(aes(x = treatment, y = N)) +
geom_bar(aes(fill = stone_size), stat="identity")
```
```{r}
# Load the broom package
library(broom)
# Run a Chi-squared test
trt_ss <- chisq.test(data$treatment, data$stone_size)
# Print out the result in tidy format
tidy(trt_ss)
```
```{r}
# Run a multiple logistic regression
m <- glm(data = data, data$success ~ data$treatment + data$stone_size, family = "binomial")
# Print out model coefficient table in tidy format
tidy(m)
```
```{r}
# Save the tidy model output into an object
tidy_m <- tidy(m)
# Plot the coefficient estimates with 95% CI for each term in the model
tidy_m %>%
ggplot(aes(x = term, y = estimate)) +
geom_pointrange(aes(ymin = estimate - 1.96 * std.error,
ymax = estimate + 1.96 * std.error)) +
geom_hline(yintercept = 0)
```