-
Notifications
You must be signed in to change notification settings - Fork 4
/
prepare.py
209 lines (149 loc) · 8.29 KB
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
""" Prepare raw PDB files for use with Rosetta """
import argparse
import os
from os.path import join, isdir, basename, abspath
import shutil
import subprocess
import platform
import time
import pandas as pd
from utils import save_argparse_args
def prep_working_dir(template_dir, working_dir, pdb_fn, overwrite_wd=False):
""" prep the working directory by copying over files from the template directory"""
# delete the current working directory if one exists
if overwrite_wd:
try:
shutil.rmtree(working_dir)
except FileNotFoundError:
pass
# copy over the full template directory as there are no files that need to be modified if it exists
try:
shutil.copytree(template_dir, working_dir)
except FileExistsError:
print("The working directory '{}' already exists. Please delete this directory or specify "
"a different working directory.".format(working_dir))
raise
# copy over PDB file into and rename it structure.pdb
shutil.copyfile(pdb_fn, join(working_dir, "structure.pdb"))
# def make_executable(fn):
# st = os.stat(fn)
# os.chmod(fn, st.st_mode | stat.S_IEXEC)
def clean_pdb_wrapper(keep_ligand, rosetta_main_dir, working_dir, chain):
""" run Rosetta's clean_pdb_keep_ligand.py script. assumes there is a structure.pdb file in the working_dir """
if keep_ligand:
cleaned_fn = run_clean_pdb_keep_ligand(rosetta_main_dir, working_dir)
else:
cleaned_fn = run_clean_pdb(rosetta_main_dir, working_dir, chain)
return cleaned_fn
def run_clean_pdb(rosetta_main_dir, working_dir, chain):
clean_pdb_script_fn = abspath(join(rosetta_main_dir, "tools/protein_tools/scripts/clean_pdb.py"))
clean_pdb_cmd = ['conda', 'run', '-n', 'clean_pdb', clean_pdb_script_fn, 'structure.pdb', chain]
# run the clean pdb script
clean_out_fn = join(working_dir, "clean_pdb.out")
with open(clean_out_fn, "w") as f:
return_code = subprocess.call(clean_pdb_cmd, cwd=working_dir, stdout=f, stderr=f)
if return_code != 0:
raise RuntimeError("Clean PDB did not execute successfully, return code: {}".format(return_code))
# this script saves the output file in the same directory as the input file
cleaned_pdb_fn = "structure_{}.pdb".format(chain)
# rename the cleaned pdb to structure_cleaned.pdb
# os.rename(join(working_dir, cleaned_pdb_fn), join(working_dir, "structure_cleaned.pdb"))
return cleaned_pdb_fn
def run_clean_pdb_keep_ligand(rosetta_main_dir, working_dir):
clean_pdb_keep_ligand_fn = "source/src/apps/public/relax_w_allatom_cst/clean_pdb_keep_ligand.py"
clean_pdb_script_fn = abspath(join(rosetta_main_dir, clean_pdb_keep_ligand_fn))
clean_pdb_cmd = ['conda', 'run', '-n', 'clean_pdb', clean_pdb_script_fn, 'structure.pdb', '-ignorechain']
# run the clean pdb script
clean_out_fn = join(working_dir, "clean_pdb.out")
with open(clean_out_fn, "w") as f:
return_code = subprocess.call(clean_pdb_cmd, cwd=working_dir, stdout=f, stderr=f)
if return_code != 0:
raise RuntimeError("Clean PDB did not execute successfully, return code: {}".format(return_code))
# this script saves the output file in the same directory as the input file
# the output filename is just the input filename with _00.pdb appended
cleaned_pdb_fn = "structure.pdb_00.pdb"
# rename the cleaned pdb to structure_cleaned.pdb
# os.rename(join(working_dir, cleaned_pdb_fn), join(working_dir, "structure_cleaned.pdb"))
return cleaned_pdb_fn
def run_relax(rosetta_main_dir, working_dir, cleaned_pdb_fn, nstruct=1):
""" run relax to prep the cleaned pdb for further use with Rosetta (as recommended by Rosetta docs) """
if platform.system() == "Linux":
relax_bin_fn = "relax.static.linuxgccrelease"
elif platform.system() == "Darwin":
relax_bin_fn = "relax.static.macosclangrelease"
else:
raise ValueError("unsupported platform: {}".format(platform.system()))
relax_bin_fn = abspath(join(rosetta_main_dir, "source", "bin", relax_bin_fn))
# path to the rosetta database
database_path = abspath(join(rosetta_main_dir, "database"))
relax_cmd = [relax_bin_fn, '-database', database_path, '-s', cleaned_pdb_fn,
'-nstruct', str(nstruct), '@flags_prepare_relax']
relax_out_fn = join(working_dir, "relax.out")
with open(relax_out_fn, "w") as f:
subprocess.call(relax_cmd, cwd=working_dir, stdout=f, stderr=f)
def parse_scores(working_dir):
""" get the filename of the lowest energy structure generated in the relax step """
scores_fn = join(working_dir, "score.sc")
# load the scores.sc file into a pandas dataframe
df = pd.read_csv(scores_fn, delim_whitespace=True, skiprows=1)
# get all structures with lowest energy (there may be multiple structures with same lowest energy)
lowest_energy_df = df[df.total_score == df.total_score.min()]
print("Found {} structures with lowest energy ({}).".format(len(lowest_energy_df),
lowest_energy_df.iloc[0]["total_score"]), flush=True)
# return the *first* structure with the lowest energy
return join(working_dir, lowest_energy_df.iloc[0]["description"] + ".pdb")
def get_output_dir(original_pdb_fn, out_dir_base):
# time zone is UTC (coordinated universal time)
output_dir = "{}_{}".format(basename(original_pdb_fn)[:-4],
time.strftime("%Y-%m-%d_%H-%M-%S", time.gmtime(time.time())))
return join(out_dir_base, output_dir)
def main(args):
output_dir = get_output_dir(args.pdb_fn, args.out_dir_base)
os.makedirs(output_dir)
print("output directory is: {}".format(output_dir), flush=True)
template_dir = "templates/prepare_wd_template"
working_dir = join(output_dir, "working_dir")
# set up the working directory
# this also copies over the PDB and renames it structure.pdb for consistency
prep_working_dir(template_dir, working_dir, args.pdb_fn, overwrite_wd=True)
# save the args to the working directory (which will be saved to the output dir)
args_dict = dict(vars(args))
save_argparse_args(args_dict, join(output_dir, "args.txt"))
# run the clean_pdb script
cleaned_pdb_fn = clean_pdb_wrapper(args.keep_ligand, args.rosetta_main_dir, working_dir, args.chain)
# relax with all-heavy-atom constraints
if args.relax_nstruct > 0:
run_relax(args.rosetta_main_dir, working_dir, cleaned_pdb_fn, nstruct=args.relax_nstruct)
# get the filename of the lowest scoring structure
lowest_energy_pdb_fn = parse_scores(working_dir)
# copy over the lowest energy PDB and rename it to match input filename
shutil.copyfile(lowest_energy_pdb_fn, join(output_dir, "{}_p.pdb".format(basename(args.pdb_fn)[:-4])))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--rosetta_main_dir",
help="The main directory of the rosetta distribution containing the binaries and "
"other files that are needed for this script (does not have to be full distribution)",
type=str,
default="rosetta_minimal")
parser.add_argument("--pdb_fn",
help="the PDB file to prepare",
type=str)
parser.add_argument("--keep_ligand",
help="whether to run clean_pdb.py or clean_pdb_keep_ligand.py",
action="store_true",
default=False)
parser.add_argument("--chain",
help="if using clean_pdb with keep_ligand==False, specify which chain to grab from "
"the raw PDB. ignorechain will grab all the chain",
type=str, default="ignorechain")
parser.add_argument("--relax_nstruct",
help="number of structures (restarts) in the relax step",
type=int,
default=10)
parser.add_argument("--out_dir_base",
help="base output directory",
type=str,
default="output/prepare_outputs")
main(parser.parse_args())