-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownstream_scenic_analysis.wdl
156 lines (133 loc) · 5.85 KB
/
downstream_scenic_analysis.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
version 1.0
workflow filter_scenicplus {
call run_filter_scplus
output {
File downstream_analysis_output = run_filter_scplus.filtered_scplus_object
}
}
task run_filter_scplus {
input {
String output_dir # gbucket (add / at end)
File scplus_obj_file # scplus_obj2.pkl
File region_ranking_file # region_ranking.pkl
File gene_ranking_file # gene_ranking.pkl
Int cpu = 24
Int memory = 256
String docker = "dyeramosu/scenic_plus_terra:1.0.0"
Int preemptible = 0
Int disk_space = 128
}
command <<<
set -e
mkdir tmpdir
mkdir downstream_scenicplus_output_wdl
python << CODE
# imports
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import pickle
import dill
from pycisTopic.cistopic_class import *
import scipy.stats as st
# load scenic plus object
scplus_obj = dill.load(open('~{scplus_obj_file}', 'rb'))
# filter scenicplus output
from scenicplus.preprocessing.filtering import apply_std_filtering_to_eRegulons
apply_std_filtering_to_eRegulons(scplus_obj)
# score the enrichment of eRegulons using the AUCell function
from scenicplus.eregulon_enrichment import score_eRegulons
region_ranking = dill.load(open('~{region_ranking_file}', 'rb'))
gene_ranking = dill.load(open('~{gene_ranking_file}', 'rb'))
score_eRegulons(scplus_obj,
ranking = region_ranking,
eRegulon_signatures_key = 'eRegulon_signatures_filtered',
key_added = 'eRegulon_AUC_filtered',
enrichment_type= 'region',
auc_threshold = 0.05,
normalize = False,
n_cpu = 5)
score_eRegulons(scplus_obj,
gene_ranking,
eRegulon_signatures_key = 'eRegulon_signatures_filtered',
key_added = 'eRegulon_AUC_filtered',
enrichment_type = 'gene',
auc_threshold = 0.05,
normalize= False,
n_cpu = 5)
# eRegulon dimensionality reduction
from scenicplus.dimensionality_reduction import run_eRegulons_tsne, run_eRegulons_umap
run_eRegulons_umap(
scplus_obj = scplus_obj,
auc_key = 'eRegulon_AUC',
reduction_name = 'eRegulons_UMAP', #overwrite previously calculated UMAP
)
run_eRegulons_tsne(
scplus_obj = scplus_obj,
auc_key = 'eRegulon_AUC',
reduction_name = 'eRegulons_tSNE', #overwrite previously calculated tSNE
)
# compute correlation between TF expression and target region enrichment scores (AUC values)
from scenicplus.cistromes import TF_cistrome_correlation, generate_pseudobulks
generate_pseudobulks(
scplus_obj = scplus_obj,
variable = 'GEX_states',
auc_key = 'eRegulon_AUC_filtered',
signature_key = 'Gene_based')
generate_pseudobulks(
scplus_obj = scplus_obj,
variable = 'GEX_states',
auc_key = 'eRegulon_AUC_filtered',
signature_key = 'Region_based')
TF_cistrome_correlation(
scplus_obj,
use_pseudobulk = True,
variable = 'GEX_states',
auc_key = 'eRegulon_AUC_filtered',
signature_key = 'Gene_based',
out_key = 'filtered_gene_based')
TF_cistrome_correlation(
scplus_obj,
use_pseudobulk = True,
variable = 'GEX_states',
auc_key = 'eRegulon_AUC_filtered',
signature_key = 'Region_based',
out_key = 'filtered_region_based')
# select for eRegulons
thresholds = {
'rho': [-0.65, 0.60],
'n_targets': 0
}
selected_cistromes = scplus_obj.uns['TF_cistrome_correlation']['filtered_region_based'].loc[
np.logical_or(
scplus_obj.uns['TF_cistrome_correlation']['filtered_region_based']['Rho'] > thresholds['rho'][1],
scplus_obj.uns['TF_cistrome_correlation']['filtered_region_based']['Rho'] < thresholds['rho'][0]
)]['Cistrome'].to_list()
selected_eRegulons = [x.split('_(')[0] for x in selected_cistromes]
selected_eRegulons_gene_sig = [
x for x in scplus_obj.uns['eRegulon_signatures_filtered']['Gene_based'].keys()
if x.split('_(')[0] in selected_eRegulons]
selected_eRegulons_region_sig = [
x for x in scplus_obj.uns['eRegulon_signatures_filtered']['Region_based'].keys()
if x.split('_(')[0] in selected_eRegulons]
# save the results in the scenicplus object
scplus_obj.uns['selected_eRegulon'] = {'Gene_based': selected_eRegulons_gene_sig, 'Region_based': selected_eRegulons_region_sig}
# save new scplus object
dill.dump(scplus_obj, open(os.path.join('downstream_scenicplus_output_wdl', 'filtered_scplus_obj.pkl'), 'wb'), protocol=-1)
CODE
gsutil -m cp downstream_scenicplus_output_wdl/filtered_scplus_obj.pkl ~{output_dir}
>>>
output {
File filtered_scplus_object = 'downstream_scenicplus_output_wdl/filtered_scplus_obj.pkl'
}
runtime {
docker: docker
memory: memory + "G"
bootDiskSizeGb: 12
disks: "local-disk " + disk_space + " HDD"
cpu: cpu
preemptible: preemptible
}
}