-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_sex_age.R
67 lines (48 loc) · 2.52 KB
/
add_sex_age.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# ____________________________________________________________________________
# Script information ####
# title: Add sample-level metadata
# author: Jose Alquicira Hernandez
# date: 2021-11-10
# description: Add sample-level metadata (sex, age, ancestry pirlier)
# ____________________________________________________________________________
# HPC details ####
# screen -S add_meta
# qrsh -N add_meta -l mem_requested=250G -pe smp 1 -q short.q
# conda activate r-4.1.1
# ____________________________________________________________________________
# Import libraries ####
library("dsLib")
library("Seurat")
library("SeuratDisk")
library("data.table")
# ____________________________________________________________________________
# Set output ####
output <- here("results", "2021-11-10_add_metadata")
dir.create(output)
# ____________________________________________________________________________
# Import data ####
data <- readRDS("results/2021-10-30_aligned_data/integrated.RDS")
sample_md <- fread("data/age_sex_info.tsv")
fam <- fread("../onek1k_genotypes/10-final/10-final.fam")
fam[, individual := paste(V1, V2, sep = "_")]
setnames(sample_md, "sampleid", "individual")
# ____________________________________________________________________________
# Add sex and age sample-level metadata ####
md <- as.data.table(data[[]], keep.rownames = "barcode")
md <- merge(md, sample_md, by = "individual")
md <- md[, .(barcode, sex, age)]
md <- as.data.frame(md)
rownames(md) <- md$barcode
md$barcode <- NULL
data <- AddMetaData(data, md)
i <- fifelse(data$individual %chin% fam$individual, FALSE, TRUE)
data$ethnic_outlier <- i
# Removing this individual because of low number of cells (57)
data <- data[, data$individual != "966_967"]
# ____________________________________________________________________________
# Export data ####
SaveH5Seurat(data, filename = here(output, "onek1k.h5seurat"), overwrite = TRUE)
saveRDS(data, file = here(output, "onek1k.RDS"))
# ____________________________________________________________________________
# Session info ####
print_session(here(output))