Skip to content

Commit d6584a2

Browse files
committedFeb 9, 2023
Rapid works now
1 parent 0ee65fc commit d6584a2

File tree

4 files changed

+80
-37
lines changed

4 files changed

+80
-37
lines changed
 

‎find

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
pwd
4+
docker run --rm -it -v `pwd`/workdir:/media -v /etc/localtime:/etc/localtime:ro genx_relatives:latest launcher.py find "$@"

‎launcher.py

+58-3
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,57 @@ def get_parser_args():
201201
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
202202
"""
203203
)
204-
204+
205+
parser.add_argument(
206+
'--rapid-error-rate',
207+
default=0.0025,
208+
type=float,
209+
help="""
210+
Minimum number of SNPs in IBD segment.
211+
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
212+
"""
213+
)
214+
215+
parser.add_argument(
216+
'--rapid-min-snp',
217+
default=500,
218+
type=int,
219+
help="""
220+
Minimum number of SNPs in IBD segment.
221+
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
222+
"""
223+
)
224+
225+
parser.add_argument(
226+
'--rapid-num-runs',
227+
default=10,
228+
type=int,
229+
help="""
230+
Minimum number of SNPs in IBD segment.
231+
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
232+
"""
233+
)
234+
235+
parser.add_argument(
236+
'--rapid-num-success',
237+
default=2,
238+
type=int,
239+
help="""
240+
Minimum number of SNPs in IBD segment.
241+
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
242+
"""
243+
)
244+
245+
parser.add_argument(
246+
'--rapid-seg-len',
247+
default=5.0,
248+
type=float,
249+
help="""
250+
Minimum number of SNPs in IBD segment.
251+
Smaller values of it tend to give more distant matches than default 500 and more false-positives.
252+
"""
253+
)
254+
205255
parser.add_argument(
206256
'--stat-file',
207257
default='stat_file.txt',
@@ -407,8 +457,8 @@ def copy_input(input_dir, working_dir, samples_file):
407457
config_dict['ref_dir'] = args.ref_directory
408458
if args.chip:
409459
config_dict['chip'] = args.chip
410-
if args.flow not in ['ibis', 'ibis-king', 'germline-king']:
411-
raise ValueError(f'--flow can be one of the ["ibis", "ibis-king", "germline-king"] and not {args.flow}')
460+
if args.flow not in ['ibis', 'ibis-king', 'germline-king', 'rapid']:
461+
raise ValueError(f'--flow can be one of the ["ibis", "ibis-king", "germline-king", "rapid"] and not {args.flow}')
412462
config_dict['flow'] = args.flow
413463
if args.command in ['preprocess', 'simulate', 'reference', 'bundle', 'simbig', 'remove_relatives']:
414464
config_dict['remove_imputation'] = args.remove_imputation
@@ -421,6 +471,11 @@ def copy_input(input_dir, working_dir, samples_file):
421471
config_dict['ibis_seg_len'] = args.ibis_seg_len
422472
config_dict['ibis_min_snp'] = args.ibis_min_snp
423473

474+
config_dict['rapid_error_rate'] = args.rapid_error_rate
475+
config_dict['rapid_min_snp'] = args.rapid_min_snp
476+
config_dict['rapid_num_runs'] = args.rapid_num_runs
477+
config_dict['rapid_num_success'] = args.rapid_num_success
478+
config_dict['rapid_seg_len'] = args.rapid_seg_len
424479
config_dict['missing_samples'] = args.missing_samples
425480
config_dict['alt_hom_samples'] = args.alt_hom_samples
426481
config_dict['het_samples'] = args.het_samples

‎rules/relatives_rapid.smk

+16-34
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ rule index_and_split:
66
input:
77
vcf = "preprocessed/data.vcf.gz"
88
output:
9-
"vcf/for_rapid_{chrom}.vcf.gz"
9+
vcf = "vcf/for_rapid_{chrom}.vcf.gz"
1010
conda:
11-
"../envs/bcftools.yaml"
11+
"bcftools"
1212
log:
1313
"logs/vcf/index_and_split-{chrom}.log"
1414
benchmark:
@@ -25,7 +25,7 @@ rule estimate_rapid_params:
2525
conda:
2626
"rapid_params"
2727
log:
28-
"logs/rapid/estimate_rapid_params.log"
28+
"logs/rapid/estimate_rapid_params-{chrom}.log"
2929
output:
3030
rapid_params = "rapid/params_{chrom}"
3131
params:
@@ -40,10 +40,10 @@ rule estimate_rapid_params:
4040
rule interpolate:
4141
input:
4242
vcf = rules.index_and_split.output['vcf'],
43-
cmmap=cmmap
43+
cmmap=CMMAP
4444
output: "cm/chr{chrom}.cm.g"
4545
conda:
46-
"../envs/interpolation.yaml"
46+
"interpolation"
4747
log:
4848
"logs/cm/interpolate-{chrom}.log"
4949
script:
@@ -58,7 +58,7 @@ rule erase_dosages:
5858
params:
5959
vcf='vcf/erased_chr{chrom}'
6060
conda:
61-
'../envs/bcftools.yaml'
61+
'bcftools'
6262
shell:
6363
"bcftools annotate -x 'fmt' {input.vcf} -O z -o {output.vcf}"
6464
'''
@@ -74,50 +74,32 @@ rule rapid:
7474
window_size=3,
7575
output_folder='rapid'
7676
output:
77-
"rapid/results_{chrom}.max.gz"
77+
seg = "rapid/results_{chrom}.max.gz"
78+
conda:
79+
"rapid"
7880
shell:
7981
"""
80-
rapid -i {input.vcf} -g {input.g} -d {params.min_cm_len} -o {params.output_folder} \
81-
-w {params.window_size} -r {params.num_runs} -s {params.num_success}"
82+
rapidibd -i {input.vcf} -g {input.g} -d {params.min_cm_len} -o {params.output_folder}/chr{wildcards.chrom} \
83+
-w {params.window_size} -r {params.num_runs} -s {params.num_success}
8284
"""
8385

8486

8587
rule merge_rapid_segments:
8688
input:
87-
seg = expand('rapid/results_{chrom}.max.gz', chrom=CHROMOSOMES)
89+
seg = expand('rapid/chr{chrom}/results.max.gz', chrom=CHROMOSOMES)
8890
output:
8991
seg = "rapid/results.max"
9092
shell:
9193
"""
9294
for file in {input}
9395
do
94-
cat ${{file}} >> {output}
96+
zcat ${{file}} >> {output}
9597
done
9698
"""
9799

98-
checkpoint transform_ibis_segments:
99-
input:
100-
ibd = rules.merge_rapid_segments.output.seg,
101-
fam = "preprocessed/data.fam"
102-
output:
103-
bucket_dir = directory("ibd")
104-
log:
105-
"logs/ibis/transform_ibis_segments.log"
106-
conda:
107-
"evaluation"
108-
script:
109-
"../scripts/transform_ibis_segments.py"
110-
111-
112-
def aggregate_input(wildcards):
113-
checkpoints.transform_ibis_segments.get()
114-
ids = glob_wildcards(f"ibd/{{id}}.tsv").id
115-
return expand(f"ibd/{{id}}.tsv", id=ids)
116-
117-
118100
rule ersa:
119101
input:
120-
ibd = aggregate_input
102+
ibd = rules.merge_rapid_segments.output['seg']
121103
output:
122104
"ersa/relatives.tsv"
123105
conda:
@@ -155,8 +137,8 @@ rule ersa:
155137

156138
rule postprocess_ersa:
157139
input:
158-
ibd=rules.ibis.output['ibd'],
159-
ersa=rules.ersa.output[0]
140+
ibd = rules.merge_rapid_segments.output['seg'],
141+
ersa = rules.ersa.output[0]
160142
output: "results/relatives.tsv"
161143
conda: "evaluation"
162144
log: "logs/merge/postprocess-ersa.log"

‎workflows/find/Snakefile

+2
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,5 @@ elif flow == 'ibis-king':
4343
include: "../../rules/relatives_ibis_king.smk"
4444
elif flow == 'germline-king':
4545
include: "../../rules/relatives.smk"
46+
elif flow == 'rapid':
47+
include: "../../rules/relatives_rapid.smk"

0 commit comments

Comments
 (0)
Please sign in to comment.