forked from IanAWatson/Lilly-Medchem-Rules
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lilly_Medchem_Rules.rb
executable file
·365 lines (272 loc) · 10.7 KB
/
Lilly_Medchem_Rules.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env ruby
# Run the rejection/demerit rules
ianhome=File.dirname($0) # location for supporting files
require "#{ianhome}/ruby/iwcmdline.rb"
require "#{ianhome}/ruby/bindir.rb"
$expert = false
$default_lower_atom_count_cutoff = 7
$default_soft_upper_atom_count_cutoff = 25
$default_hard_upper_atom_count_cutoff = 40
def usage (rc)
$stderr << "Runs the Lilly medchem rules\n"
$stderr << " -c <n> lower atom count cutoff (default #{$default_lower_atom_count_cutoff})\n" if $expert
$stderr << " -Cs <natoms> soft upper atom count cuttof (default #{$default_soft_upper_atom_count_cutoff})\n" if $expert
$stderr << " -Ch <natoms> hard upper atom count cuttof (default #{$default_hard_upper_atom_count_cutoff})\n" if $expert
$stderr << " -C <natoms> upper atom count limit - no atom count demerits\n"
$stderr << " -smarts <s> optional smarts to reject\n" if $expert
$stderr << " -rej <q> optional query file to reject\n" if $expert
$stderr << " -relaxed relaxed rules: 7-50 heavy atoms, 160 demerit cutoff\n"
$stderr << " -nodemerit hard rejections only, do not apply any demerits\n" if $expert
$stderr << " -S <fname> write output to <fname> rather than stdout\n" if $expert
$stderr << " -B <stem> output name stem for rejected molecules\n" if $expert
$stderr << " -log <stem> name stem for log files\n" if $expert
$stderr << " -bindir <dir> directory for executables\n" if $expert
$stderr << " -tp...-tp options passed directly to mc_first_pass\n" if $expert
$stderr << " -iwd...-iwd options passed directly to iwdemerit\n" if $expert
$stderr << " -odm <name> omit demerit with file name <name>\n" if $expert
$stderr << " -edm <fname> extra demerits to be applied - query file format only\n" if $expert
$stderr << " -dmrt <fname> extra demerits to be applied\n" if $expert
$stderr << " -dcf <fname> demerit control file (the -C option to iwdemerit)\n" if $expert
$stderr << " -q <dir> directory for queries\n" if $expert
$stderr << " -okiso allow isotopic atoms to pass through\n";
$stderr << " -label place isotopic labels on the matched atoms for rejected molecules (bad1.smi and bad2.smi)\n" if $expert
$stderr << " -symm <bonds> discard symmetric molecules where two symmetric atoms > <bonds> apart\n" if $expert
$stderr << " -nophosphorus reject all phosphorus containing molecules\n"
$stderr << " -noapdm do not append demerit reasons\n"
$stderr << " -i <type> input type\n" if $expert
$stderr << " -expert more options\n" unless $expert;
$stderr << " -v verbose output\n"
exit(rc)
end
cl = IWCmdline.new("-v-noapdm-i=s-expert-b=fraction-B=s-q=dir-log=s-tp=close-iwd=close-bindir=dir-smarts=s-rej=s-c=ipos-Cs=ipos-Ch=ipos-C=ipos-okiso-odm=s-edm=sfile-relaxed-nodemerit-S=s-dcf=sfile-nobadfiles-symm=ipos-nophosphorus-label")
if cl.unrecognised_options_encountered()
$stderr << "Unrecognised options encountered\n"
usage(1)
end
verbose = cl.option_present('v')
if cl.option_present('expert')
$expert = true
end
if ARGV.empty?
$stderr << "Insufficient arguments\n"
usage(2)
end
append_demerit_reason = ! cl.option_present('noapdm') # convoluted inverse logic
stop_afer_completing_step = 3 # no longer optional
input_type = ""
if cl.option_present('i')
input_type = '-i ' << cl.value('i')
end
# The ring bond ratio used by mc_first_pass
ring_bond_ratio = -1
if cl.option_present('b')
ring_bond_ratio = cl.value('b')
end
mc_first_pass_options = ""
if cl.option_present('tp')
mc_first_pass_options = cl.value('tp')
end
if cl.option_present('nophosphorus')
mc_first_pass_options << " -n P"
end
unless cl.option_present('okiso')
mc_first_pass_options << ' -I 0'
end
mc_first_pass_options << ' -A I -A ipp'
bindir = Bindir.new(ianhome)
cl.values('bindir').each do |d|
bindir.add_dir(d)
end
iwdemerit = bindir.find_executable('iwdemerit')
mc_first_pass = bindir.find_executable('mc_first_pass')
tsubstructure = bindir.find_executable('tsubstructure')
query_dir = "#{ianhome}/queries";
if cl.option_present('q')
query_dir = cl.value('q')
end
# The default query files
query_file = Array.new
query_file[1] = 'reject1'
query_file[2] = 'reject2'
query_file[3] = 'demerits'
# The default stem of the rejected files
bad_stem = false
if cl.option_present('B')
bad_stem = cl.value('B')
elsif cl.option_present('nobadfiles')
true
else
bad_stem = 'bad'
end
# The default stem for log files
logfilestem = 'ok';
if cl.option_present('log')
logfilestem = cl.value('log')
end
optional_queries = ""
cl.values('rej').each do |q|
optional_queries << " -q #{q}"
end
cl.values('smarts').each do |s|
optional_queries << " -s '#{s}'"
end
extra_iwdemerit_options = ""
if cl.option_present('relaxed')
$default_soft_upper_atom_count_cutoff = 26
$default_hard_upper_atom_count_cutoff = 50
extra_iwdemerit_options << " -f 160"
end
if cl.option_present('nodemerit')
extra_iwdemerit_options << " -r"
$default_soft_upper_atom_count_cutoff = $default_hard_upper_atom_count_cutoff - 1
end
if cl.option_present('iwd')
extra_iwdemerit_options << " " << cl.value('iwd')
end
if cl.option_present('symm')
extra_iwdemerit_options << " -s " << cl.value('symm').to_s
end
charge_assigner = "#{ianhome}/charge_assigner/queries"
unless FileTest.size?(charge_assigner)
$stderr << "Charge assigner not available, skipping\n"
else
extra_iwdemerit_options << " -N F:#{charge_assigner}"
end
lower_atom_count_cutoff = $default_lower_atom_count_cutoff
if cl.option_present('c')
lower_atom_count_cutoff = cl.value('c')
end
soft_upper_atom_count_cutoff = $default_soft_upper_atom_count_cutoff;
if cl.option_present('Cs')
soft_upper_atom_count_cutoff = cl.value('Cs')
end
hard_upper_atom_count_cutoff = $default_hard_upper_atom_count_cutoff;
if cl.option_present('Ch')
hard_upper_atom_count_cutoff = cl.value('Ch')
unless cl.option_present('Cs')
soft_upper_atom_count_cutoff = hard_upper_atom_count_cutoff - 1
end
end
if cl.option_present('C')
hard_upper_atom_count_cutoff = cl.value('C') + 1
soft_upper_atom_count_cutoff = hard_upper_atom_count_cutoff - 1
end
if hard_upper_atom_count_cutoff < soft_upper_atom_count_cutoff
hard_upper_atom_count_cutoff = soft_upper_atom_count_cutoff + 1
end
unless FileTest.directory?(query_dir)
$stderr << "Cannot continue, query dir '#{query_dir}' invalid\n"
exit(3)
end
raise "Query file '#{query_dir}/#{query_file[1]}' missing or inaccessible" unless (FileTest.size?("#{query_dir}/#{query_file[1]}") && FileTest.readable?("#{query_dir}/#{query_file[1]}"))
raise "Query file '#{query_dir}/#{query_file[2]}' missing or inaccessible" unless (FileTest.size?("#{query_dir}/#{query_file[2]}") && FileTest.readable?("#{query_dir}/#{query_file[2]}"))
raise "Query file '#{query_dir}/#{query_file[3]}' missing or inaccessible" unless (FileTest.size?("#{query_dir}/#{query_file[3]}") && FileTest.readable?("#{query_dir}/#{query_file[3]}"))
$stderr << "Queries from '#{query_dir}'\n" if (verbose)
files_to_be_deleted = Array.new
query_file3 = "#{query_dir}/#{query_file[3]}"
# I want to have the odm option behave as either a regular expression
# or as exact matches.
# Note that I don't check against multiple RX= directives
class Odm
def initialize(o)
@rx = false
@hash = Hash.new
o.each do |d|
if ('RX=' == d[0,3])
@rx = Regexp.new(d[3, d.size - 3], Regexp::IGNORECASE)
else
@hash[d] = true
end
end
end
def match(d)
if (@hash.has_key?(d))
return true
end
if (@rx)
return @rx.match(d)
end
return false
end
end
if cl.option_present('odm')
tmpdir = "."
if cl.option_present('tmpdir')
tmpdir = cl.value('tmpdir')
end
old_demerits = File.open("#{query_dir}/#{query_file[3]}", mode='r')
raise "Cannot open original demerit file '#{query_dir}/#{query_file[3]}'" unless (old_demerits)
temporary_demerit_file = "#{tmpdir}/demerits" << Process.pid.to_s
new_demerits = File.open(temporary_demerit_file, mode='w')
raise "Cannot open temporary demerit file '#{temporary_demerit_file}'" unless (new_demerits)
odm = Odm.new(cl.values('odm'))
demerit_rx = Regexp.new("^(\\S+)\.qry")
items_discarded = 0
old_demerits.each do |line|
m = demerit_rx.match(line)
next unless (m)
stem = m[1]
if odm.match(stem)
items_discarded += 1
else
new_demerits << "#{query_dir}/#{stem}.qry\n"
end
end
if 0 == items_discarded
$stderr << "Warning, no demerits discarded\n"
elsif verbose
$stderr << "Discarded #{items_discarded} demerits\n"
end
new_demerits.close
query_file3 = "#{temporary_demerit_file}"
files_to_be_deleted.push(query_file3)
end
iwdemerit_optional_control_file = false
if cl.option_present('dcf')
iwdemerit_optional_control_file = cl.value('dcf')
end
if cl.option_present('edm')
additional_demerits = cl.value('edm')
end
cmd = "#{mc_first_pass} ";
cmd << " -b #{ring_bond_ratio}" if ring_bond_ratio >= 0.0
cmd << " #{mc_first_pass_options}" if mc_first_pass_options.length > 0
cmd << " #{input_type} " if input_type.length > 0
cmd << " -c #{lower_atom_count_cutoff} -C #{hard_upper_atom_count_cutoff} -E autocreate -o smi -V -g all -g ltltr -i ICTE "
cmd << "-L #{bad_stem}0 -K TP1 " if bad_stem
cmd << "-a -S - #{ARGV.join(' ')} 2> #{logfilestem}0.log "
# Options passed to both tsubstructure invocations
tsub_common_opts = '-A D -E autocreate -b -u -i smi -o smi'
if cl.option_present('label')
tsub_common_opts << ' -j 1'
end
if stop_afer_completing_step >= 1
cmd << "| #{tsubstructure} #{tsub_common_opts} "
cmd << "-m #{bad_stem}1 -m QDT " if bad_stem
cmd << "-n - -q F:#{query_dir}/#{query_file[1]} "
cmd << optional_queries if optional_queries.length > 0
cmd << " - 2> #{logfilestem}1.log ";
if stop_afer_completing_step >= 2
cmd << "| #{tsubstructure} #{tsub_common_opts} "
cmd << "-m #{bad_stem}2 -m QDT " if bad_stem
cmd << "-n - -q F:#{query_dir}/#{query_file[2]} - 2> #{logfilestem}2.log ";
if stop_afer_completing_step >= 3
cmd << " | #{iwdemerit} -x #{extra_iwdemerit_options} -E autocreate -A D -i smi -o smi -q F:#{query_file3} "
cmd << "-R #{bad_stem}3 " if bad_stem
cmd << "-G - -c smax=#{soft_upper_atom_count_cutoff} -c hmax=#{hard_upper_atom_count_cutoff} "
cmd << "-q F:#{additional_demerits} " if additional_demerits
cmd << "-C #{iwdemerit_optional_control_file} " if iwdemerit_optional_control_file
cmd << "-t " if append_demerit_reason
cmd << "- 2> #{logfilestem}3.log "
end
end
end
if cl.option_present('S')
s = cl.value('S')
cmd << " > #{s}"
end
$stderr << "Command is '#{cmd}'\n" if verbose
system(cmd)
files_to_be_deleted.each do |f|
File.unlink(f) if FileTest.exists?(f)
end