What They Forgot to Teach You About R
- Nominare i file evitando spazi, accenti e puntuazione. Usa “-” per la human redability e “_” per isolare i metadata dal nome (es. "2020-02-10_sample1_WB") e identificarli facilmente con regex:
metadata <- stringr::str_split_fixed(x, [_\\.]
-
Crea Rprojects: avviando un notebook da progetto sei sicuro che la wd sia settata nella cartella di appartenenza e che l’ambiente delle variabili sia quello giusto. Non usare mai patwhay assoluti, soprattutto mai usare setwd(). Lascia che sia l’Rproj a richiamare la WD
-
Riavviare R prima di ogni esecuzione perchè:
-
potresti star usando funzioni omonime di librerie precedentemente caricate
-
un altro progetto potrebbe averti portato nella WD sbagliata
-
-
lasciare settato options(stringsAsFactors = FALSE)
-
Carica la libreria tidyverse per ultima, in modo da evitare che altre librerie mascherino le sue funzioni
myfunction <- function(arg1, arg2, ... ){
# statements
return(object)
}
x %in% c("value1", "value2", "value3")
'%!in%' <- function(x,y)!('%in%'(x,y))
f(x) # is equivalent to
x %>% f
f(x, parameters) # is equivalent to
x %>% f(parameters)
h(g(f(x))) # is equivalent to
x %>% f %>% g %>% h
f(y, x) # is equivalent to
x %>% f(y, .)
f(y, data = x) # is equivalent to
x %>% f(y, data = .)
f(g(x))
x %>% {f(g(.))}
data$some_variable <- data$some_variable %>% transform
# is equivalent to
data$some_variable %<>% transform
summary(iris)
skimr::skim(iris)
3 %in% c(1,2,3,4,5,6,7,8)
[1] TRUE
flights[flights$month == 1 & flights$day == 1, ]
# is equivalent to
flights %>% filter(month == 1, day == 1)
# multiple arguments are equivalent to &
flights %>% filter(month == 1 & day == 1)
# select a single column
flights %>% select(tailnum)
# select multiple column
flights %>% select(tailnum, day, month)
# select and rename the column
flights %>% select(tail_num = tailnum)
# select all but some columns
flights %>% select(-(year:day)) # or
flights %>% select(-c(year, month))
# select column based on theire name
flights %>% select(starts_with(c("one", "th")))
flights %>% rename(tail_num = tailnum)
df <- df %>% select(fisrt_column, second_column, everything())
mutate(flights,
gain = arr_delay - dep_delay,
gain_per_hour = gain / (air_time / 60)
)
# use the function transmutate() if you want to remove the used columns
df <- df %>% mutate(height = replace(height, name == “Mike”, NA))
summarise(flights,
delay.mean = mean(dep_delay, na.rm = TRUE), delay.sd = sd(dep_delay, na.rm = TRUE)
)
df <- df %>%
rownames_to_column() %>%
gather(var, value, -rowname) %>%
spread(rowname, value)
# rename the second column
colnames(data.frame)[2] <- "newname2"
# selects all the rows containing a certain match inside a certain col
data.frame %>% filter(str_detect(col, fixed("match")))
# will split a string like "CTRL-2day LowGlucose"
data.frame %>% separate(col, # the column to split
c("col1", "col2", "col3"), # names of the columns
sep = "[:punct:]|[:blank:]", # separator
remove = TRUE, # remove the original column
)
data.frame$col %<>% factor(levels = c("first", "second", "third"))
st.err <- function(x) {sd(x, na.rm=TRUE)/sqrt(length(x[!is.na(x)]))
scale_y_continuous(labels = function(x) paste0(x*100, "%"))
+ scale_fill_x(name = 'Title')
+ guides(fill = FALSE)
# or
+ scale_fill_x(guide = FALSE)
myColors <- c("level1" = "#000000", "level2" = "#003300")
# or
library(RColorBrewer)
myColors <- brewer.pal(length(levels(df$factors)), "Set1")
names(myColors) <- levels(df$factors)
+ scale_colour_manual(values = myColors)
set.seed(999)
n = 1000
df = data.frame(factors = sample(letters[1:8], n, replace = TRUE),
x = rnorm(n), y = runif(n))
x <- runif(N)
y <- 5 * x + 3 + rnorm(N)