forked from kimmo1019/DeepCDR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathend_to_end_csa.sh
95 lines (76 loc) · 2.59 KB
/
end_to_end_csa.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# Below are two examples of end-to-end csa scripts for a single [source, target, split combo]:
# 1. Within-study analysis
# 2. Cross-study analysis
# Note! The outputs from preprocess, train, and infer are saved into different dirs.
# ======================================================================
# To setup improve env vars, run this script first:
# source ./setup_improve.sh
# ======================================================================
# Download CSA data (if needed)
data_dir="csa_data"
if [ ! -d $PWD/$data_dir/ ]; then
echo "Download CSA data"
source download_csa.sh
fi
SPLIT=0
# This script abs path
# script_dir="$(dirname "$0")"
script_dir="$(cd "$(dirname "$0")" && pwd)"
echo "Script full path directory: $script_dir"
# ----------------------------------------
# 1. Within-study
# ---------------
SOURCE=CCLE
# SOURCE=gCSI
# SOURCE=GDSCv1
TARGET=$SOURCE
# Separate dirs
gout=${script_dir}/res.end_to_end2
ML_DATA_DIR=$gout/ml_data/${SOURCE}-${TARGET}/split_${SPLIT}
MODEL_DIR=$gout/models/${SOURCE}/split_${SPLIT}
INFER_DIR=$gout/infer/${SOURCE}-${TARGET}/split_${SPLIT}
# Preprocess (improvelib)
python deepcdr_preprocess_improve.py \
--train_split_file ${SOURCE}_split_${SPLIT}_train.txt \
--val_split_file ${SOURCE}_split_${SPLIT}_val.txt \
--test_split_file ${TARGET}_split_${SPLIT}_test.txt \
--input_dir ./csa_data/raw_data \
--output_dir $ML_DATA_DIR
# Train (improvelib)
python deepcdr_train_improve.py \
--input_dir $ML_DATA_DIR \
--output_dir $MODEL_DIR
# Infer (improvelib)
python deepcdr_infer_improve.py \
--input_data_dir $ML_DATA_DIR\
--input_model_dir $MODEL_DIR\
--output_dir $INFER_DIR \
--calc_infer_score true
# ----------------------------------------
# 2. Cross-study
# --------------
SOURCE=GDSCv1
TARGET=CCLE
# Separate dirs
gout=${script_dir}/res.end_to_end2
ML_DATA_DIR=$gout/ml_data/${SOURCE}-${TARGET}/split_${SPLIT}
MODEL_DIR=$gout/models/${SOURCE}/split_${SPLIT}
INFER_DIR=$gout/infer/${SOURCE}-${TARGET}/split_${SPLIT}
# Preprocess (improvelib)
python deepcdr_preprocess_improve.py \
--train_split_file ${SOURCE}_split_${SPLIT}_train.txt \
--val_split_file ${SOURCE}_split_${SPLIT}_val.txt \
--test_split_file ${TARGET}_all.txt \
--input_dir ./csa_data/raw_data \
--output_dir $ML_DATA_DIR
# Train (improvelib)
python deepcdr_train_improve.py \
--input_dir $ML_DATA_DIR \
--output_dir $MODEL_DIR
# Infer (improvelib)
python deepcdr_infer_improve.py \
--input_data_dir $ML_DATA_DIR\
--input_model_dir $MODEL_DIR\
--output_dir $INFER_DIR \
--calc_infer_score true