-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit_openfold.tmpl
114 lines (93 loc) · 3.24 KB
/
submit_openfold.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
#SBATCH --job-name=$FOLD_NAME
#SBATCH -D $OPENFOLD_SCRIPTS
#SBATCH -o $OUT_DIR/slurm-%A_%a.out
#SBATCH --account=$ACCOUNT
#SBATCH --time=4:00:00
#SBATCH --cpus-per-task=8
#SBATCH --mem=40G
set -ex
echo "activating env"
#mamba activate openfold_env
export OF_SCRIPTS=$OPENFOLD_SCRIPTS
export AFM_SCRIPTS=$AF_ANALYSIS_SCRIPTS
export WORK=/home/def-marechal/programs/Marechal_pipelines/environment
export FA_DIR=$IN_DIR
export OUT=$OUT_DIR
export DB=$OPENFOLD_DB
export NAME=$FOLD_NAME
align_start=`date +%s`
echo "running mmseqs alignment on ${FA_DIR}"
cd ${OF_SCRIPTS}/..
python3 ${OF_SCRIPTS}/precompute_alignments_mmseqs.py \
--threads 72 \
--hhsearch_binary_path hhsearch \
--pdb70 ${DB}/pdb100 \
--env_db colabfold_envdb_202108_db \
${FA_DIR}/${NAME}.fa \
${DB} \
uniref30_2302_db \
${OUT} \
mmseqs
echo "running uniprot alignment on ${FA_DIR}"
python ${OF_SCRIPTS}/precompute_alignments.py \
${FA_DIR} \
${OUT} \
--uniprot_database_path ${DB}/uniprot/uniprot.fasta \
--jackhmmer_binary_path jackhmmer \
--cpus_per_task 72
align_end=`date +%s`
align_time=$(((align_end-align_start)/60))
fold_start=`date +%s`
for model in 1 2 3
do
echo "folding using model $model"
python3 ${OF_SCRIPTS}/../run_pretrained_openfold.py \
${FA_DIR} \
${DB}/pdb_mmcif/mmcif_files \
--use_precomputed_alignments ${OUT} \
--config_preset "model_${model}_multimer_v3" \
--model_device "cuda:0" \
--output_dir ${OUT} \
--save_outputs
echo "generate JSON for model $model"
python ${OF_SCRIPTS}/pickle_to_json.py \
--model_pkl ${OUT}/predictions/${NAME}_model_${model}_multimer_v3_output_dict.pkl \
--output_dir ${OUT}/predictions/ \
--basename "${NAME}" \
--model_nbr ${model}
done
#echo "merging pae plddt plots"
#convert ${OUT}/predictions/*_pLDDT.png +append ${OUT}/predictions/${NAME}_all_PLDDT.png
#convert ${OUT}/predictions/*_PAE.png +append ${OUT}/predictions/${NAME}_all_PAE.png
echo "generating coverage plots"
python ${OF_SCRIPTS}/generate_coverage_plot.py \
--input_pkl ${OUT}/${NAME}_model_${model}_multimer_v3_feature_dict.pickle \
--output_dir ${OUT}/predictions/ \
--basename "${NAME}_multimer_v3_relaxed"
fold_end=`date +%s`
fold_time=$(((fold_end-fold_start)/60))
echo "running AF2multimer-analysis on ${OUT}/predictions/"
touch ${OUT}/predictions/${NAME}.done.txt
mkdir -p ${OUT}/predictions/unrelaxed
mv ${OUT}/predictions/*unrelaxed.pdb ${OUT}/predictions/unrelaxed/
python ${AFM_SCRIPTS}/colabfold_analysis.py --pred_folder ${OUT}/predictions
echo "generating PAE, plDDT plots and JSON files"
python ${OF_SCRIPTS}/generate_pae_plddt_plot.py \
--fasta ${FA_DIR}/${NAME}.fa \
--model1_pkl ${OUT}/predictions/${NAME}_model_1_multimer_v3_output_dict.pkl \
--model2_pkl ${OUT}/predictions/${NAME}_model_2_multimer_v3_output_dict.pkl \
--model3_pkl ${OUT}/predictions/${NAME}_model_3_multimer_v3_output_dict.pkl \
--output_dir ${OUT}/predictions/ \
--basename "${NAME}" \
--interface ${OUT}/predictions/predictions_analysis/interfaces.csv
echo "zipping results"
CURRENT_PATH=$PWD
cd ${OUT}
zip -r ../${NAME}.zip * -x "*.pkl" "*.pickle"
cd ${CURRENT_PATH}
# output: exp_FOLD_NAME\talign time\tfold time
# in min
echo "write time stats in /tank/jflucier/run_stats.tsv"
echo -e "${FOLD_NAME}\t$align_time\t$fold_time" >> /tank/jflucier/run_stats.tsv
#mamba deactivate