-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpycistarget.wdl
113 lines (92 loc) · 3.72 KB
/
pycistarget.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
version 1.0
workflow cisTarget {
call run_pycistarget
output {
File pycistarget_output = run_pycistarget.pycistarget_object
}
}
task run_pycistarget {
input {
String output_dir # gbucket (no / at end)
File binarized_topic_region_file # binarized_topic_region.pkl
File DARs_file # DARs.pkl
File rankings_db_file # hg38_screen_v10_clust.regions_vs_motifs.rankings.feather
File scores_db_file # hg38_screen_v10_clust.regions_vs_motifs.scores.feather
File motif_annotation_file # motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl
Int cpu = 24
Int memory = 256
String docker = "dyeramosu/scenic_plus_terra:1.0.0"
Int preemptible = 0
Int disk_space = 128
}
command <<<
set -e
mkdir tmpdir
mkdir pycistarget_output_wdl
python << CODE
# imports
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import pickle
from pycisTopic.cistopic_class import *
# load candidate enhancer regions
region_bin_topics = pickle.load(open('~{binarized_topic_region_file}', 'rb'))
markers_dict = pickle.load(open('~{DARs_file}', 'rb'))
#markers_dict.pop('Unknown')
#markers_dict['DNA_repair_TNFa'] = markers_dict.pop('DNA repair / TNFa')
#markers_dict['Oxidative_Phosphorylation'] = markers_dict.pop('Oxidative phosphorylation')
# convert to dictionary of pyranges objects
import pyranges as pr
from pycistarget.utils import region_names_to_coordinates
region_sets = {}
region_sets['topics'] = {}
region_sets['DARs'] = {}
for topic in region_bin_topics.keys():
# only keep regions on known chromosomes
regions = region_bin_topics[topic].index[region_bin_topics[topic].index.str.startswith('chr')]
region_sets['topics'][topic] = pr.PyRanges(region_names_to_coordinates(regions))
for DAR in markers_dict.keys():
# only keep regions on known chromosomes
regions = markers_dict[DAR].index[markers_dict[DAR].index.str.startswith('chr')]
region_sets['DARs'][DAR] = pr.PyRanges(region_names_to_coordinates(regions))
# define rankings, score and motif annotation database
rankings_db = '~{rankings_db_file}'
scores_db = '~{scores_db_file}'
motif_annotation = '~{motif_annotation_file}'
# run pycistarget using the run_pycistarget wrapper function
from scenicplus.wrappers.run_pycistarget import run_pycistarget
run_pycistarget(
region_sets = region_sets,
species = 'homo_sapiens',
save_path = 'pycistarget_output_wdl/',
ctx_db_path = rankings_db,
dem_db_path = scores_db,
path_to_motif_annotations = motif_annotation,
run_without_promoters = True,
n_cpu = 1,
_temp_dir = 'tmpdir/',
annotation_version = 'v10nr_clust',
)
# try to open menr file
import dill
menr = dill.load(open(os.path.join('pycistarget_output_wdl', 'menr.pkl'), 'rb'))
CODE
gsutil -m cp pycistarget_output_wdl/menr.pkl ~{output_dir}
tar -czvf pycistarget_output.tar.gz pycistarget_output_wdl
gsutil rsync -r pycistarget_output_wdl ~{output_dir}
>>>
output {
File pycistarget_object = 'pycistarget_output.tar.gz'
}
runtime {
docker: docker
memory: memory + "G"
bootDiskSizeGb: 12
disks: "local-disk " + disk_space + " HDD"
cpu: cpu
preemptible: preemptible
}
}