-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarbles.py
164 lines (131 loc) · 9.2 KB
/
marbles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
'''*****************************************************************************
Copyright (C) 2020 Niels Bohr Institute
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*****************************************************************************'''
import argparse
import os
has_plot = True
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
except ModuleNotFoundError:
has_plot = False
try:
import numpy as np
except ModuleNotFoundError:
has_plot = False
def plot( args ):
mpl.rcParams['figure.figsize'] = (7,8)
mpl.rcParams['axes.labelsize'] = 16
mpl.rcParams['xtick.labelsize'] = 14
mpl.rcParams['ytick.labelsize'] = 14
mpl.rcParams['legend.fontsize'] = 14
fit = np.loadtxt( f'{args.output}/best_fit.dat')
exp = np.loadtxt( args.input )
fig, (ax, axres) = plt.subplots(nrows = 2, sharex = False, gridspec_kw = {"height_ratios" : [5,1], "hspace" : 0} )
ax.errorbar( exp[:,0] * 10, exp[:,1], exp[:,2], zorder = 1, fmt = 's', markersize = 6,
label = 'Experiment', color = '#9400d3', alpha = 0.5 )
ax.plot( fit[:,0] * 10, fit[:,1], color = 'k', linewidth = 2, zorder = 10, label = 'Model Fit' )
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel(r'I(q) [cm$^{-1}$]')
axres.set_xscale('log')
axres.set_xlabel(r'q [nm$^{-1}]$')
axres.plot( exp[:,0] * 10, (exp[:,1] - fit[:,1])/exp[:,2], color = 'k', linewidth = 2, zorder = 1 )
axres.set_ylabel(r'$\Delta$I/$\sigma$')
plt.tight_layout()
plt.savefig( f'{args.output}/fit.pdf', format = 'pdf' )
def parse_inputs():
parser = argparse.ArgumentParser( description = 'Marbles v. 0.1. A software for the estimation of shapes of membrane proteins inserted in membrane nanodiscs' )
# Mandatory Inputs
parser.add_argument( '--sequence_file', '-s', type = str, required = True,
help = 'Path to the protein sequence file' )
parser.add_argument( '--input', '-i', type = str, required = True,
help = 'Path to the file containing the experimental SAXS signal. The file has to be formatted in three \
columns: the first one for q, the second one for I(q) and the last one for the error. q has to be expressed in A^-1, \
the intensity and the error in cm^-1. The file has to contain only numbers.' )
parser.add_argument( '--dmax', '-d', type = float, required = True,
help = 'Protein Dmax (in A)' )
parser.add_argument( '--output', '-o', type = str, required = True,
help = 'Path to directory where to store simulation results' )
parser.add_argument( '--fit', '-f', type = str, required = True,
help = 'Path to WillItFit output file for nanodisc best fit' )
parser.add_argument( '--inserted_residues', '-aa', type = int, required = True,
help = 'Number of residues to accomodate in the nanodisc' )
#IO options
parser.add_argument( '--intensity_stride', '-is', type = int, required = False, default = 1,
help = 'Stride used to print on file the calculated intensities (default: 1, at the end of every pass)' )
parser.add_argument( '--configuration_stride', '-cs', type = int, required = False, default = 1,
help = 'Stride used to print on file the calculated protein configurations (default: 1, once at each pass)' )
# Monte Carlo options
parser.add_argument( '--passes', '-p', type = int, required = False, default = 100,
help = 'Maximum number of Monte Carlo iterations (default: 100)' )
parser.add_argument( '--loops', '-l', type = int, required = False, default = 0,
help = 'Number of loops per Monte Carlo iteration (default: number of protein residues)' )
parser.add_argument( '--schedule', '-ss', type = float, required = False, default = 0.9,
help = 'Simulated annealing schedule (default: 0.9)' )
parser.add_argument( '--temperature_factor', '-t', type = float, required = False, default = 10,
help = 'Number by which the initial chi squared is divided to define the initial temperature (default: 10)' )
# Penalty function options
parser.add_argument( '--connect_strength', '-c', type = float, required = False, default = 300,
help = 'Strength of the connectivity penalty function (default: 300)' )
parser.add_argument( '--neighbours_strength', '-n', type = float, required = False, default = 1,
help = 'Strength of the neighbours distribution penalty function (default: 1)' )
parser.add_argument( '--insertion_strength', '-ii', type = float, required = False, default = 5,
help = 'Strength of the neighbours distribution penalty function (default: 5)' )
parser.add_argument( '--disordered_tail', '-dt', type = int, required = False, default = 0,
help = 'Number of residues composing a disordered tail protruding from the bottom leaflet of the bilayer (default: 0)' )
parser.add_argument( '--zshift', '-z', type = float, required = False, default = 40,
help = 'Intial shift, in A, of the protein with respect to nanodisc center along the direction normal to the nanodisc plane (default: 40A)' )
# Convergence options
parser.add_argument( '--convergence_temp', '-ct', type = float, required = False, default = 0.005,
help = 'Final temperature at which convergence is decleared (default: 0.005).' )
parser.add_argument( '--convergence_acceptance', '-ca', type = float, required = False,
help = 'Final acceptance ratio at which convergence is decleared (suggested: < 0.1). Overrides the --convergence_temp and --passes option.' )
# Advanced options
parser.add_argument('--sample_info', '-si', type = str, required = False, default = None,
help = 'Sample information file employed in WillItFit to set the chemical properties of the nanodisc. Necessary only if the nanodisc is composed by non POPC-lipids \
and a belt protein different from MSP1D1')
parser.add_argument( '--clash_distance', '-cd', type = float, required = False, default = 1.8,
help = 'Smaller possible distance between beads before move is rejected (default: 1.8A)' )
# parser.add_argument( '--maximum_distance', '-md', type = float, required = False, default = 5.1,
# help = 'Maximum distance between chosen beads allowed by Monte Carlo move (default: 5.1A)' )
parser.add_argument( '--connected', '-cc', type = float, required = False, default = 5.8,
help = 'Maximum distance within which two beads are considered connected (default: 4.5A)' )
parser.add_argument( '--qs_for_I0', '-qi', type = int, required = False, default = 5,
help = 'Number of low-q points to use to determine the value of I(0) (default: 5)' )
parser.add_argument( '--qs_for_b', '-qb', type = int, required = False, default = 4,
help = 'Number of low-q points to use to determine the value of background (default: 4)' )
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_inputs()
# the number of loops is set as default: read the sequence file and count the residues
if( args.loops == 0 ):
with open( args.sequence_file ) as f:
r = f.readlines()
args.loops = len(r[1])
maximum_distance = 5.1 #to be removed because it's not implemented
if( args.convergence_acceptance == None ):
args.convergence_acceptance = 0
if( args.sample_info == None ):
args.sample_info = 'None'
cmd = './runner ' + str(args.sequence_file) + ' ' + str(args.input) + ' ' + str(args.output) + ' ' + str(args.dmax) + \
' ' + str(args.passes) + ' ' + str(args.loops) + ' ' + str(args.connect_strength) + ' ' + str(args.neighbours_strength) + \
' ' + str(args.schedule) + ' ' + str(args.temperature_factor) + ' ' + str(args.clash_distance) + \
' ' + str(maximum_distance) + ' ' + str(args.connected) + ' ' + str(args.fit) + ' ' + str(args.inserted_residues) + \
' ' + str(args.insertion_strength) + ' ' + str(args.qs_for_I0) + ' ' + str(args.disordered_tail) + ' ' + str(args.zshift) + \
' ' + str(args.qs_for_b) + ' ' + str(args.convergence_temp) + ' ' + str(args.convergence_acceptance) + \
' ' + str(args.intensity_stride) + ' ' + str(args.configuration_stride) + ' ' + str(args.sample_info)
os.system( cmd )
if has_plot:
plot( args )