-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathreproduce.py
66 lines (53 loc) · 2.41 KB
/
reproduce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import argparse
import pipeline
def untangle_synthetic():
"""
Predict the scores for all the synthetic chromosomes, given the model trained only on chr19.
Calculate the prediction-metrics, assemble all the genomes.
Goal: Show that the model generalizes well to other synthetic chromosomes.
"""
data_path = f'data'
ref_path = f'data/references'
out = 'assembly_synth'
model_path = f'pretrained_models/model_15xchr19.pt'
train_dict = {}
valid_dict = {}
test_dict = {f'chr{i}': 1 for i in range(1, 23)} ; test_dict['chrX'] = 1
all_chr = pipeline.merge_dicts(train_dict, valid_dict, test_dict)
pipeline.file_structure_setup(data_path, ref_path)
pipeline.download_reference(ref_path)
pipeline.simulate_reads(data_path, ref_path, all_chr)
pipeline.generate_graphs(data_path, all_chr)
train_path, valid_path, test_path = pipeline.train_valid_split(data_path, train_dict, valid_dict, test_dict, out)
pipeline.predict(test_path, out=out, model_path=model_path)
def untangle_real():
"""
Predict the scores for all the real chromosomes, given the model trained only on synthetic chr19.
Calculate the prediction-metrics, assemble all the genomes.
Goal: Show that the model generalizes well to other real as well chromosomes.
"""
data_path = f'data'
ref_path = f'data/references'
out = 'assembly_real'
model_path = f'pretrained_models/model_15xchr19.pt'
train_dict = {}
valid_dict = {}
test_dict = {f'chr{i}_r': 1 for i in range(1, 23)} ; test_dict['chrX_r'] = 1
all_chr = pipeline.merge_dicts(train_dict, valid_dict, test_dict)
pipeline.file_structure_setup(data_path, ref_path)
pipeline.download_reference(ref_path)
pipeline.simulate_reads(data_path, ref_path, all_chr)
pipeline.generate_graphs(data_path, all_chr)
train_path, valid_path, test_path = pipeline.train_valid_split(data_path, train_dict, valid_dict, test_dict, out)
pipeline.predict(test_path, out=out, model_path=model_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default=None, help='Assemble synthetic or real chromosome')
args = parser.parse_args()
mode = args.mode
if mode == 'synth':
untangle_synthetic()
elif mode == 'real':
untangle_real()
else:
print(f'Run with either "--mode synth" or "--mode real"!')