-
Notifications
You must be signed in to change notification settings - Fork 4
/
global.R
168 lines (133 loc) · 7.52 KB
/
global.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
library(shiny)
library(shinythemes)
library(shinyWidgets)
library(shinycssloaders)
library(feather, quietly = T)
library(dplyr, quietly = T)
library(tidyr, quietly = T)
library(data.table, quietly = T)
library(tidyverse, quietly = T)
library(ggplot2, quietly = T)
library(cowplot, quietly = T)
library(lattice, quietly = T)
library(stringr, quietly = T)
library(viridis, quietly = T)
library(scales, quietly = T)
library(stringi, quietly = T) # use function stri_dup to add white space
library(DT, quietly = T)
library(magrittr, quietly = T)
library(karyoploteR, quietly = T) #Need R > 3.5
library(TxDb.Hsapiens.UCSC.hg38.knownGene, quietly = T)
source("utilities/sql_queries.R")
source("utilities/meta.R")
source("utilities/comb_pval.R")
#source("utilities/name_convert.R")
source("utilities/forestplots.R")
source("utilities/karyoplot.R")
#source("utilities/gviz_gene_track.R")
##############################################
## Choices for tissue, asthma and treatment ##
##############################################
#Tissue types
tissue_choices <-c("Airway smooth muscle"="ASM", "Bronchial epithelium"="BE", "Large airway epithelium"="LAE", "Lens epithelium" = "LEC",
"BEAS-2B" = "BEAS-2B","Nasal epithelium"="NE","Small airway epithelium"="SAE", "Trachea"="trachea", "Buccal mucosa"="Buccal",
"Whole lung"="Lung","Skeletal muscle myotubes"="myotubes","CD4"="CD4", "CD8"="CD8", "MCF10A-Myc" = "MCF10A-Myc","CD3" = "CD3",
"A549" = "A549","Lymphoblastoid cell" = "LCL","Macrophage" = "MACRO", "Alveolar macrophages" = "AM",
"Peripheral blood mononuclear cell"="PBMC","White blood cell"="WBC","Whole blood"="Blood",
"Lymphoblastic leukemia cell" = "chALL","Osteosarcoma U2OS cell" = "U2OS", "WI38 fibroblast"="WI38")
tissue_selected <- c("Airway smooth muscle"="ASM", "Bronchial epithelium"="BE")
#Disease types
asthma_choices <- c("Allergic asthma"="allergic_asthma", "Asthma"="asthma",
"Fatal asthma"="fatal_asthma", "Mild to moderate asthma"="mild_to_moderate_asthma","Severe asthma"="severe_asthma",
"Mild asthma with rhinitis"="rhinitis_mild_asthma","Severe asthma with rhinitis"="rhinitis_severe_asthma",
"Non-allergic asthma"="non_allergic_asthma")
asthma_selected <- c("Asthma"="asthma", "Severe asthma"="severe_asthma")
#Treatment choices
treatment_choices <- c("β2-agonist"="BA", "Glucocorticoid" = "GC",
"Phosphodiesterase inhibitor"="PDE","Vitamin D"="vitD")
#"Smoking"="smoking", "E-cigarette" = "ecig")
#smoking choices
smoking_choices <- c("Cigarette"="cig", "E-cigarette" = "ecig")
# pollutant choices
pollutant_choices <- c("Polycyclic aromatic hydrocarbons"="PAH", "Particulate matter"="pollutant")
#exposure choices
treatment_choices <- c(treatment_choices, smoking_choices, pollutant_choices)
#treatment_choices <- c(treatment_choices, smoking_choices)
treatment_selected <- c("Glucocorticoid" = "GC", "Cigarette"="cig")
#experiment choices
experiment_choices=c("Cell-based assay"="invitro","Human response study"="invivo")
experiment_selected=c("Cell-based assay"="invitro")
#GWAS options
gwas_choices <- c("Ferreira"="snp_fer_subs","GABRIEL"="snp_gabriel_subs","GRASP"="snp_subs",
"EVE all subjects"="snp_eve_all_subs", "EVE African Americans"="snp_eve_aa_subs", "EVE European Americans"="snp_eve_ea_subs", "EVE Latinos"="snp_eve_la_subs",
"TAGC Multiancestry"="snp_TAGC_multi_subs", "TAGC European ancestry"="snp_TAGC_euro_subs",
"UKBiobank Asthma"="snp_UKBB_asthma_subs", "UKBiobank COPD"="snp_UKBB_copd_subs", "UKBiobank ACO"="snp_UKBB_aco_subs")
gwas_selected <- c("GRASP"="snp_subs", "EVE all subjects"="snp_eve_all_subs")
#gwas_selected <- c("UKBiobank Asthma"="snp_UKBB_asthma_subs")
#pval_select <- c("0.05"="normal")
#names(pval_select) = paste0("0.05", stri_dup(intToUtf8(160), 18))
pval_select <- c("1x10-5"="nominal")
names(pval_select) = paste0("1x10-5", stri_dup(intToUtf8(160), 18))
pval_for_select <- tibble(value=c("normal","nominal","genomewide"),
label=c("0.05","1x10-5","5x10-8"),
html=c("0.05", "1x10<sup>-5</sup>" , "5x10<sup>-8</sup>"))
pval_for_select[[2]][1] = pval_for_select[[3]][1] <- paste0("0.05", stri_dup(intToUtf8(160), 18))
pval_for_select[[2]][2] = paste0("1x10-5", stri_dup(intToUtf8(160), 18))
pval_for_select[[2]][3] = paste0("5x10-8", stri_dup(intToUtf8(160), 18))
#Gene list
# all_genes <- read_feather("realgar_data/gene_list.feather")
# gene_list <- as.vector(all_genes$V1)
# rm(all_genes)
#all_genes <- readRDS("/mnt/volume_nyc3_01/realgar_data/gene_symbol_POS.RDS")
#all_genes <- readRDS("/mnt/volume_nyc3_01/realgar_files/gene_symbol_coords_hg19.RDS")
all_genes <- readRDS("realgar_data/gene_symbol_coords_hg38.RDS")
gene_list <- as.vector(all_genes$symbol)
#Gene choices (Not in use)
#genec <- read_feather("realgar_data/Sig_gene_list.feather")
#gene_choices <- as.vector(genec$V1)
#rm(genec)
####################
## READ IN FILES ##
####################
# load descriptions of all gene expression and GWAS datasets
Alldata_Info <- read_feather("realgar_data/Microarray_data_infosheet_latest_R.feather")
#Alldata_Info <- read.csv("realgar_data/Microarray_data_infosheet_latest_R.csv")
#then split off into gene expression and GWAS dataset info - else forest plot text columns get messed up
GWAS_Dataset_Info <- Alldata_Info[which(Alldata_Info$App == "GWAS"),]
Dataset_Info <- Alldata_Info[which(!(Alldata_Info$App == "GWAS")),]
#Remove big data ---
rm(Alldata_Info)
Dataset_Info$PMID <- as.character(Dataset_Info$PMID) #else next line does not work
Dataset_Info[is.na(Dataset_Info$PMID), "PMID"] <- ""
Dataset_Info$Report <- as.character(c("QC"))
##BA_PDE dataset ---
BA_PDE_Info <- Dataset_Info %>% dplyr::filter(Asthma == "BA_PDE")
##ChIP-Seq dataset
chipseq_dataset <- read_feather("realgar_data/realgar_ChIPSeq_datasets.feather")
####################
## GWAS SNP data ##
####################
#load info for gene tracks: gene locations, TFBS, SNPs, etc.
#from feather files ---
# chrom_bands <- read_feather("realgar_data/chrom_bands.feather") #chromosome band info for ideogram - makes ideogram load 25 seconds faster
####################
## GWAS SNP data ##
####################
# load GR-binding sites
GRbinding <- read_feather("realgar_data/GR_binding_sites_sig.feather")
###########################
## Transcriptomic data ##
###########################
#Load data files - gene names and dataset info
# "lcte" appended to beginning of filename stands for "lung cell transcriptome explorer"
sras <- read_feather("transcriptomics/asthmagenes_deseq2/lcte_dataset_info_asm.feather") %>% tibble::as_tibble()
all_genes_te <- read_feather("transcriptomics/lcte_gene_names.feather") %>% tibble::as_tibble()
unfiltered_genes <- read_feather("transcriptomics/lcte_sleuth_unfiltered_genes.feather") %>% tibble::as_tibble()
# create rnaseq_choices vector from sras
tissue_long <- unname(sapply(sras$Tissue, function(x){names(tissue_choices[tissue_choices==x])}))
studies <- paste0(sras$SRA_ID, " (", tissue_long, ")")
rnaseq_choices <- setNames(sras$SRA_ID, studies)
# make a list of gene symbols in all datasets for checking whether gene symbol entered is valid - used later on
#deseq2_filtered_genes <- unlist(lapply(unname(rnaseq_choices), function(study)de[[study]][,"gene_symbol"])) %>% unique()
deseq2_filtered_genes_tb <- read_feather("transcriptomics/lcte_sleuth_filtered_genes.feather")
deseq2_filtered_genes <- deseq2_filtered_genes_tb$gene_symbol