-
Notifications
You must be signed in to change notification settings - Fork 5
/
main.nf
137 lines (111 loc) · 3.18 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
/* Prints help when asked for and exits */
def helpMessage() {
log.info"""
=========================================
COSMO => COrrection of Sample Mislabeling by Omics
=========================================
Usage:
nextflow run cosmo.nf
Arguments:
--d1_file Dataset with quantification data at gene level.
--d2_file Dataset with quantification data at gene level.
--cli_file Sample annotation data.
--cli_attribute Sample attribute(s) for prediction. Multiple attributes must be separated by ",".
--outdir Output folder.
--help Print help message.
""".stripIndent()
}
// Show help emssage
if (params.help) {
helpMessage()
exit 0
}
checkPathParamList = [params.d1_file, params.d2_file, params.cli_file]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
if (params.d1_file) { d1_file = file(params.d1_file) } else { exit 1, 'No file specified with --d1_file' }
if (params.d1_file) { d2_file = file(params.d2_file) } else { exit 1, 'No file specified with --d2_file' }
if (params.d1_file) { sample_file = file(params.cli_file) } else { exit 1, 'No file specified with --cli_file' }
log.info "Sample attribute will be used: $params.cli_attribute \n"
process PREPROCESS {
label 'process_low'
input:
path d1_file
path d2_file
path sample_file
output:
tuple path("out/${d1_file.name}"), path("out/${d2_file.name}"), path("out/${sample_file.name}")
script:
"""
format_input_data \\
--d1 $d1_file \\
--d2 $d2_file \\
--samples $sample_file \\
--out out
"""
}
process METHOD1 {
label 'process_medium'
input:
tuple path(d1_file), path(d2_file), path(samplefile)
path gene_tsv
output:
path "method1_out"
script:
"""
cosmo \\
one \\
--d1 $d1_file \\
--d2 $d2_file \\
--samples $samplefile \\
--out method1_out \\
--genes $gene_tsv \\
--attributes ${params.cli_attribute} \\
--cpus ${task.cpus}
"""
}
process METHOD2 {
label 'process_medium'
input:
tuple path(d1_file), path(d2_file), path(samplefile)
output:
path "method2_out"
script:
"""
method2_function.py \\
-d1 ${d1_file} \\
-d2 ${d2_file} \\
-s ${samplefile} \\
-l ${params.cli_attribute} \\
-o method2_out
"""
}
process COMBINE {
label 'process_medium'
input:
path method1_out_folder
path method2_out_folder
path sample_file
output:
path "cosmo*"
script:
"""
cosmo \\
combine \\
--method-one-out $method1_out_folder \\
--method-two-out $method2_out_folder \\
--samples $sample_file \\
--attributes ${params.cli_attribute} \\
--prefix cosmo \\
--cpus ${task.cpus} \\
--out .
"""
}
workflow {
genes = Channel.fromPath(params.genes)
PREPROCESS(d1_file, d2_file, sample_file)
METHOD1(PREPROCESS.out, genes.first())
METHOD2(PREPROCESS.out)
COMBINE(METHOD1.out, METHOD2.out, sample_file)
}