-
Notifications
You must be signed in to change notification settings - Fork 1
/
init_age.R
147 lines (103 loc) · 5.04 KB
/
init_age.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
library(tidyverse)
library(SummarizedExperiment)
library(data.table)
library(GenomicRanges)
library(DBI)
library(dplyr)
library(here)
library(doParallel)
library(Biostrings)
library(tidyverse)
library(protr)
library(optparse)
# source("/home/sruiz/PROJECTS/recount3-database-project/init_age.R")
base_folder <- here::here()
dependencies_folder <- paste0(base_folder, "/dependencies/")
#####################################
## LOAD SOURCE SCRIPTS
#####################################
scripts_folder <- paste0(base_folder, "/scripts/")
setwd(file.path(scripts_folder))
files.sources = list.files()
sapply(files.sources, source)
setwd(file.path(base_folder))
#####################################
## SET MAIN VARIABLES
## AND PATHS
#####################################
gtf_version <- 111
supportive_reads <- 1
replace <- T
data_source <- "data_sources/gtex"
project_name <- paste0("GTEX_", supportive_reads, "read_subsampleFALSE")
database_name <- paste0(project_name, "_age")
levelqc1_folder <- paste0(base_folder, "/database/", project_name, "/", gtf_version, "/")
database_folder <- paste0(base_folder, "/database/", database_name, "/", gtf_version, "/")
results_folder <- paste0(base_folder, "/results/", project_name, "/", gtf_version, "/")
tpm_folder <- paste0(base_folder, "/results/tpm/")
age_projects <- readRDS(file = paste0(results_folder, "/all_final_projects_used.rds"))
##################################
## CALLS - SQL GENERATION
##################################
if ( !file.exists(paste0(results_folder, "/age_init_projects.rds")) ) {
project_init <- age_stratification_init_data(projects.id = age_projects,
gtf.version = gtf_version,
project.name = project_name,
results.folder = results_folder) %>%
as_tibble()
saveRDS(object = project_init,
file = paste0(results_folder, "/age_init_projects.rds"))
} else {
project_init <- readRDS(file = paste0(results_folder, "/age_init_projects.rds"))
}
age_projects <- project_init$project %>% unique()
for (project_id in age_projects) {
# project_id <- age_projects[1]
# project_id <- "BRAIN"
print(paste0(Sys.time(), " --> ", project_id))
project_init_local <- project_init %>% filter(project == project_id)
project_results_folder <- file.path(results_folder, project_id)
age_stratification_annotate(age.groups = project_init_local$age_group %>% unique(),
project.id = project_id,
gtf.version = gtf_version,
project.name = project_name,
age.samples.clusters = project_init_local,
results.folder = project_results_folder)
age_stratification_junction_pairing(age.groups = project_init_local$age_group %>% unique(),
project.id = project_id,
gtf.version = gtf_version,
project.name = project_name,
age.samples.clusters = project_init_local,
results.folder = project_results_folder,
replace)
}
GetAllAnnotatedSplitReads(recount3.project.IDs = age_projects,
all.clusters = project_init$age_group %>% unique(),
database.folder = database_folder,
results.folder = results_folder,
num.cores = 8)
GetAllRawJxnPairings(recount3.project.IDs = age_projects,
all.clusters = project_init$age_group %>% unique(),
database.folder = database_folder,
results.folder = results_folder,
num.cores = 8)
age_projects <- readRDS(file = paste0(results_folder, "/all_final_projects_used.rds"))
GenerateRecount3TPM(recount3.project.IDs = age_projects,
data.source = data_source,
tpm.folder = tpm_folder,
results.folder = results_folder)
TidyDataPiorSQL(recount3.project.IDs = age_projects,
levelqc1.folder = levelqc1_folder,
all.clusters = project_init$age_group %>% unique(),
database.folder = database_folder,
results.folder = results_folder)
GenerateTranscriptBiotypePercentage(gtf.version = gtf_version,
database.folder = file.path(base_folder, "database", project_name, gtf_version),
results.folder = database_folder)
database_path <- paste0(database_folder, "/", database_name, ".sqlite")
SqlDatabaseGeneration(database.path = database_path,
recount3.project.IDs = age_projects,
remove.all = T,
database.folder = database_folder,
results.folder = results_folder,
gtf.version = gtf_version)