-
Notifications
You must be signed in to change notification settings - Fork 2
/
myChEMBL_18_recipe.txt
225 lines (175 loc) · 6.06 KB
/
myChEMBL_18_recipe.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
## myChEMBL_18 VM Ubuntu 12.04
## Useful links
# https://code.google.com/p/rdkit/wiki/BuildingWithCmake
# https://code.google.com/p/rdkit/wiki/BuildingTheCartridge
# http://sourceforge.net/p/rdkit/code/2472/
## RDKit and system requirements
sudo apt-get install ipython
sudo apt-get install ipython-notebook
sudo apt-get install ipython-qtconsole
sudo apt-get install libboost-all-dev
sudo apt-get install postgresql
sudo apt-get install postgresql-server-dev-all
sudo apt-get install postgresql-doc
sudo apt-get install postgresql-contrib
sudo apt-get install flex
sudo apt-get install bison
sudo apt-get install g++
sudo apt-get install cmake
sudo apt-get install unzip
sudo apt-get install git
## New postgres user
sudo su postgres
createuser chembl
exit
## Cutting-edge RDKit source code from GitHub
# svn checkout svn://svn.code.sf.net/p/rdkit/code/trunk rdkit-code
# svn checkout svn://svn.code.sf.net/p/rdkit/code/tags/Release_2013_03_2 rdkit-code
git clone https://github.com/rdkit/rdkit
cd rdkit/
git checkout tags/Release_2014_03_1
cd
## Python modules
sudo apt-get install python-numpy
sudo apt-get install python-scipy
sudo apt-get install python-matplotlib
sudo apt-get install python-pip
sudo apt-get install python-psycopg2
sudo apt-get install python-imaging-tk
sudo apt-get install python-pandas
sudo apt-get install python-networkx
sudo apt-get install python-sklearn
sudo pip install -U ipython
sudo pip install -U jinja2
sudo pip install -U scikit-learn
sudo pip install -U tornado
sudo pip install -U pandas #will update numpy too
sudo pip install -U requests
## Download target prediction models
wget ftp://ftp.ebi.ac.uk/pub/databases/chembl/target_predictions/chembl_18_models.tar.gz
tar -zxf chembl_18_models.tar.gz
## RDKit ENV variables
export RDBASE=$HOME/rdkit
export LD_LIBRARY_PATH=$RDBASE/lib:$LD_LIBRARY_PATH
export PYTHONPATH=$RDBASE:$PYTHONPATH
## Download INCHI library
cd $RDBASE/External/INCHI-API
bash download-inchi.sh
## Build RDKit
cd $RDBASE
mkdir build
cd build
cmake -DRDK_BUILD_INCHI_SUPPORT=ON ..
make -j4 install
ctest
# 100% tests passed, 0 tests failed out of 77
# Total Test time (real) = 74.73 sec
## Build RDKit Cartridge
cd $RDBASE/Code/PgSQL/rdkit
make
sudo make install
make installcheck
============== dropping database "contrib_regression" ==============
NOTICE: database "contrib_regression" does not exist, skipping
DROP DATABASE
============== creating database "contrib_regression" ==============
CREATE DATABASE
ALTER DATABASE
============== running regression test queries ==============
test rdkit-91 ... ok
test props ... ok
test btree ... ok
test molgist ... ok
test bfpgist-91 ... ok
test sfpgist ... ok
test slfpgist ... ok
test fps ... ok
=====================
All 8 tests passed.
=====================
createdb test
psql test
test=# create extension "rdkit";
CREATE EXTENSION
test=# show rdkit.tanimoto_threshold;
rdkit.tanimoto_threshold
--------------------------
0.5
(1 row)
test=# select 'c1ccccc1O'::mol;
mol
-----------
Oc1ccccc1
(1 row)
## Install ChEMBLdb
wget ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_18/chembl_18_postgresql.tar.gz
tar zxf chembl_18_postgresql.tar.gz
createdb chembl_18
cd chembl_18_postgresql/
psql chembl_18 < chembl_18.pgdump.sql
sudo sysctl -w kernel.shmmax=2147483648
sudo /sbin/iptables -A INPUT -i eth0 -p tcp --destination-port 5432 -j ACCEPT
cd /home/chembl/src/mychembl
sudo git pull
sudo cp src/mychembl/configuration/mychembl_postgresql.conf /etc/postgresql/9.1/main/
sudo mv /etc/postgresql/9.1/main/mychembl_postgresql.conf /etc/postgresql/9.1/main/postgresql.conf
sudo cp src/mychembl/configuration/mychembl_pg_hba.conf /etc/postgresql/9.1/main/
sudo mv /etc/postgresql/9.1/main/mychembl_pg_hba.conf /etc/postgresql/9.1/main/pg_hba.conf
sudo cp src/mychembl/configuration/mychembl_sysctl.conf /etc/
sudo mv /etc/mychembl_sysctl.conf /etc/sysctl.conf
# sudo vim /etc/postgresql/9.1/main/postgresql.conf
# listen_addresses = '*'
# fsync = off # turns forced synchronization on or off
# synchronous_commit = off # immediate fsync at commit
# full_page_writes = off # recover from partial page writes
# shared_buffers = 1024MB
# work_mem = 128MB
# sudo vim /etc/postgresql/9.1/main/pg_hba.conf
# add line: host all all .ebi.ac.uk trust
# Change this: local all all peer
# to this: local all all trust
sudo service postgresql restart
# sudo vim /etc/sysctl.conf
# add this line kernel.shmmax = 2147483648
## New postgres read-only user
createuser -P mychembl
psql --username=chembl chembl_18
GRANT SELECT ON ALL TABLES IN SCHEMA public TO mychembl;
\q
## Create Notebook profile
cd
mkdir notebooks
ipython profile create mychembl
ipython notebook --profile=mychembl
exit
cp src/mychembl/configuration/mychembl_ipython_notebook_config.py ~/.ipython/profile_mychembl/
mv ~/.ipython/profile_mychembl/mychembl_ipython_notebook_config.py ~/.ipython/profile_mychembl/ipython_notebook_config.py
sudo cp src/mychembl/configuration/mychembl_interfaces /etc/network/
sudo mv /etc/network/interfaces /etc/network/interfaces.orig
sudo mv /etc/network/mychembl_interfaces /etc/network/interfaces
## Build RDKit Indices
psql chembl_18
create extension rdkit;
\q
psql -d chembl_18 -a -f indexes.sql
## Benchmarking
psql -c "select count(*) from mols_rdkit where m @>'O=C1OC2=CC=CC=C2C=C1';" chembl_18
# count
# -------
# 11886
# Time: 848.260 ms
psql -c "select count(*) from mols_rdkit where m@>'O=C(NC1=CC=CC=C1)C1=CC2=C(OC1=O)C=CC=C2';" chembl_18
# count
# -------
# 226
# Time: 61.439 ms
psql -c "select count(*) from mols_rdkit where m @>'c1ccc2c(c1)ccc1ccccc21';" chembl_18
# count
# -------
# 1546
# Time: 8532.081 ms
## PHP-APACHE stuff here: TODO for Mark
# Add the following line to /etc/php5/apache2/php.ini
;
extension=/usr/lib/php5/20090626/pgsql.so
;