ScaffComb: A Phenotype-Based Framework for Drug Combination Virtual Screening in Large-scale Chemical Datasets
This repository contains the source codes, the example data and some analysis codes.
- python 3.6.8
- tensorflow-gpu 1.12.0
- tensorboardx 2.0
- pytorch 1.3.1
- numpy 1.17.0
- rdkit 2020.03.1b1.0
- scipy 1.2.1
- scikit-learn 0.21.2
- matplotlib 3.1.1
> from GSG import AttnDGenerator
> dt_L1000 = pickle.load(open('data/data_GSG_input_train_example.pkl','rb'))
> dt_chembl = pickle.load(open('data/data_GSG_input_pretrain_example.pkl', 'rb'))
> tokens = pickle.load(open('data/tokens.pkl','rb'))
> # Pre-training with chembl scaffolds
> Trainer0 = AttnDGenerator(tokens, dt_chembl, batch_size=128, lr = 0.0001,
device=device, path='rst/GSG_pretrained/')
> Trainer0.train(1000)
> # Train with L1000 scaffolds
> Trainer1 = AttnDGenerator(tokens, dt_L1000, batch_size=128, lr = 0.0001,
device=device, path='rst/GSG_trained/')
> Trainer1.load_model('rst/GSG_pretrained/', 999)
> Trainer1.train(1000)
> scaf = Trainer1.G_sample(gene = gene)
> from DSP import trainer
> X, Y = pickle.load(open('data/data_DSP_input_example.pkl', 'rb'))
> X_ = X.copy()
> names = ['~'.join(sorted(x.split('~')[:2])) for x in X_.index.tolist()]
> X_.index = names
> X1 = X_.iloc[:, :1593]
> X2 = X_.iloc[:, 1593:3186]
> X3 = X_.iloc[:, 3186:]
> model = AugSSP(1593, 978, device=device)
> Trainer3 = trainer([X1, X2, X3, Y], model, batch_size=512,
path = 'rst/DSP_trained/model')
> Trainer3.load_model()
> Trainer3.one_fold_train(500)
> y_pred = Trainer3.predict([X1, X2, X3])
> from SDSP import trainer
> tokens = pickle.load(open('data/tokens.pkl','rb'))
> dt = pickle.load(open('data/data_SDSP_input_example.pkl', 'rb'))
> dt_train = [x[:40] for x in dt]
> dt_test = [x[40:] for x in dt]
> Trainer4 = trainer(tokens, dt_train, dt_test, latent_dim=512,
batch_size = 512, lr = 0.0001,
device=device, path='rst/SDSP_trained/model')
> Trainer4.train(500)
> from SDSP import SDSP_predict
> y_pred, y_pred_rev = SDSP_predict(Trainer4, dt)