From 0f801bb800a320a0f732b53d3ce9267e2ef01c09 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:23:25 -0400 Subject: [PATCH 01/17] add DRAGON outline --- R/DRAGON.R | 187 +++++++++++++++++++++++++++++++++++ tests/testthat/test-dragon.R | 26 +++++ 2 files changed, 213 insertions(+) create mode 100644 R/DRAGON.R create mode 100644 tests/testthat/test-dragon.R diff --git a/R/DRAGON.R b/R/DRAGON.R new file mode 100644 index 00000000..e19981a3 --- /dev/null +++ b/R/DRAGON.R @@ -0,0 +1,187 @@ +# Function to implement DRAGON: https://arxiv.org/abs/2104.01690 + +# helper functions as in the netzooPy impl +# def Scale(X): +# X_temp = X +# X_std = np.std(X_temp, axis=0) +# X_mean = np.mean(X_temp, axis=0) +# return (X_temp - X_mean) / X_std + +scale = function(x,bias=F) +{ + # sd does 1/(n-1), python does 1/n + # use the bias option for exact match with python + n = length(x) + if(bias) + { + n = length(x) + numer = (x-mean(x)) + denom = sqrt((n-1)/n)*sd(x) + return(numer/denom) + } + return((x-mean(x))/sd(x)) +} + +# def VarS(X): +# xbar = np.mean(X, 0) +# n = X.shape[0] +# x_minus_xbar = X - xbar +# a = x_minus_xbar*x_minus_xbar +# wbar = np.cov(X.T, bias=True)#x_minus_xbar.T@x_minus_xbar/n +# varS = a.T@a +# varS += - n*wbar**2 +# varS *= n/(n-1)**3 +# return(varS) + +# VarS calculates the unbiased estimate of the entries of +# S according to the formula in Appendix A of Schafer and Strimmer +# 2005 + +VarS = function(x) +{ + x = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) + # x is an n x p matrix of data + # xbar = np.mean(X, 0) + # n = X.shape[0] + n = nrow(x) + # x_minus_xbar = X - xbar + x_minus_xbar = apply(x,2,function(x){x-mean(x)}) # center + # sanity check + apply(x_minus_xbar,2,mean) + a = x_minus_xbar*x_minus_xbar + varHat_s = n/((n-1)^3)* apply(a,2,sum) + wbar = 1/n*t(x_minus_xbar) %*% x_minus_xbar # this should be an outer product + + # varS = a.T@a + # varS += - n*wbar**2 + # varS *= n/(n-1)**3 + + # try doing this with an array instead of matrix multiplication + xbar = apply(x,2,mean) + p = ncol(x) + w = array(dim=c(n,p,p)) + for(k in 1:n) + { + for(i in 1:p) + { + for(j in 1:p) # this will be symmetric, but leaving it now for clarity + { + w[k,i,j] = (x[k,i] - xbar[i])*(x[k,j]-xbar[j]) + } + } + } + + wbar = 1/n*apply(w,2:3,sum) + summand = 0 + for(k in 1:n) + summand = summand + (w[k,,]-wbar)^2 + + varhat_s = n/((n-1)^3)*summand + return(varhat_s) + # this code matches with the python output +} + +# def EsqS(X): +# #xbar = np.mean(X, 0) +# n = X.shape[0] +# #x_minus_xbar = X - xbar +# wbar = np.cov(X.T, bias=True) #x_minus_xbar.T@x_minus_xbar/n +# ES2 = wbar**2*n**2/(n-1)**2 +# return(ES2) + +EsqS = function(x) +{ + # following E[x^2] = Var(x) - E[x]^2 + return(VarS(x) - mean(x)^2) +} + +# def risk(lam): +#R = const + ((1.-lam[0]**2)*T1_1 + (1.-lam[1]**2)*T1_2 +# + (1.-lam[0]**2)**2*T2_1 + (1.-lam[1]**2)**2*T2_2 +# + (1.-lam[0]**2)*(1.-lam[1]**2)*T3 + lam[0]*lam[1]*T4) #reparametrize lamx = 1-lamx**2 for better optimization +# return(R) + +risk = function(lambda1, lambda2, t11, t12, t21, t22, t3, t4) +{ + return(lambda1*t11 + lambda2*t12 + + lambda1^2*t21 + lambda2^2*t22 + + lambda1*lambda2*t3 + + sqrt(1-lambda1)*sqrt(1-lambda2)*t4) +} + +# def estimate_penalty_parameters_dragon(X1, X2): +estimatePenaltyParameters = function(X1,X2) +{ + + # X1 is omics matrix 1, dimensions n x p1 + # X2 is omics matrix 2, dimensions n x p2 + # The matrices should have the same ordering by samples + + n = nrow(X1) + p1 = ncol(X1) + p2 = ncol(X2) + # n = X1.shape[0] + # p1 = X1.shape[1] + # p2 = X2.shape[1] + + X = cbind.data.frame(X1, X2) + # X = np.append(X1, X2, axis=1) + + # varS = VarS(X) + # eSqs = EsqS(X) + + varX = VarS(X) + esqS = EsqS(X) + + # IDs = np.cumsum([p1,p2]) + IDs = cumsum(c(1:p1,1:p2)) + + # varS1 = varS[0:IDs[0],0:IDs[0]] + # varS12 = varS[0:IDs[0],IDs[0]:IDs[1]] + # varS2 = varS[IDs[0]:IDs[1],IDs[0]:IDs[1]] + # + # eSqs1 = eSqs[0:IDs[0],0:IDs[0]] + # eSqs12 = eSqs[0:IDs[0],IDs[0]:IDs[1]] + # eSqs2 = eSqs[IDs[0]:IDs[1],IDs[0]:IDs[1]] + # + # const = (np.sum(varS1) + np.sum(varS2) - 2.*np.sum(varS12) + # + 4.*np.sum(eSqs12)) + # T1_1 = -2.*(np.sum(varS1) - np.trace(varS1) + np.sum(eSqs12)) + # T1_2 = -2.*(np.sum(varS2) - np.trace(varS2) + np.sum(eSqs12)) + # T2_1 = np.sum(eSqs1) - np.trace(eSqs1) + # T2_2 = np.sum(eSqs2) - np.trace(eSqs2) + # T3 = 2.*np.sum(eSqs12) + # T4 = 4.*(np.sum(varS12)-np.sum(eSqs12)) + # + # def risk(lam): + # R = const + ((1.-lam[0]**2)*T1_1 + (1.-lam[1]**2)*T1_2 + # + (1.-lam[0]**2)**2*T2_1 + (1.-lam[1]**2)**2*T2_2 + # + (1.-lam[0]**2)*(1.-lam[1]**2)*T3 + lam[0]*lam[1]*T4) #reparametrize lamx = 1-lamx**2 for better optimization + # return(R) + # + # x = np.arange(0., 1.01, 0.01) + # lamgrid = meshgrid(x, x) + # risk_grid = risk(lamgrid) + # indices = np.unravel_index(np.argmin(risk_grid.T, axis=None), risk_grid.shape) + # lams = [x[indices[0]],x[indices[1]]] + # + # res = minimize(risk, lams, method='L-BFGS-B',#'TNC',#'SLSQP', + # tol=1e-12, + # bounds = [[0.,1.],[0.,1.]]) + # + # penalty_parameters = (1.-res.x[0]**2), (1.-res.x[1]**2) + # + # def risk_orig(lam): + # R = const + (lam[0]*T1_1 + lam[1]*T1_2 + # + lam[0]**2*T2_1 + lam[1]**2*T2_2 + # + lam[0]*lam[1]*T3 + np.sqrt(1-lam[0])*np.sqrt(1-lam[1])*T4) #reparametrize lamx = 1-lamx**2 for better optimization + # return(R) + # risk_grid_orig = risk_orig(lamgrid) + # return(penalty_parameters, risk_grid_orig) + # +} + +dragon = function() +{ + +} diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R new file mode 100644 index 00000000..5e8a09d2 --- /dev/null +++ b/tests/testthat/test-dragon.R @@ -0,0 +1,26 @@ +# test dragon + +context("test DRAGON helper functions") + +test_that("[DRAGON] scale() function yields expected results", { + x = seq(1:100) + scale_unbiased = scale(x) # default is bias=F + expect_equal(sd(scale_unbiased),1,tolerance = 1e-15) + + xbar = mean(x) + xstd_bias = sqrt(1/length(x)*sum((x-xbar)^2)) + + # calculate the unbiased standard deviation of the vector scaled + # using the biased standard deviation. It will be slightly off from 1 + biased_sd_hand = sqrt(1/(length(x)-1)*sum(((x-xbar)/xstd_bias)^2)) + + scale_biased = scale(x,bias=T) + expect_equal(sd(scale_biased),biased_sd_hand, tolerance = 1e-15) +}) + +test_that("[DRAGON] VarS() function yields expected results", {} +) + +test_that("[DRAGON] EsqS() function yields expected results", {} +) + From 752502967a3f10fe099b3a08d56dba430eddb43f Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Wed, 9 Nov 2022 11:01:36 -0500 Subject: [PATCH 02/17] implemented vars and esqs, added tests --- R/DRAGON.R | 88 ++++++++++++++++++++++++++++-------- tests/testthat/test-dragon.R | 33 ++++++++++++-- 2 files changed, 98 insertions(+), 23 deletions(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index e19981a3..52f39018 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -1,5 +1,10 @@ # Function to implement DRAGON: https://arxiv.org/abs/2104.01690 +# to add to dependencies: + +library(matrixcalc) + + # helper functions as in the netzooPy impl # def Scale(X): # X_temp = X @@ -39,11 +44,11 @@ scale = function(x,bias=F) VarS = function(x) { - x = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) # x is an n x p matrix of data # xbar = np.mean(X, 0) # n = X.shape[0] n = nrow(x) + p = ncol(x) # x_minus_xbar = X - xbar x_minus_xbar = apply(x,2,function(x){x-mean(x)}) # center # sanity check @@ -86,33 +91,54 @@ VarS = function(x) # n = X.shape[0] # #x_minus_xbar = X - xbar # wbar = np.cov(X.T, bias=True) #x_minus_xbar.T@x_minus_xbar/n -# ES2 = wbar**2*n**2/(n-1)**2 +# ES2 = wbar**2*n**2/(n-1)**2 # # return(ES2) EsqS = function(x) { - # following E[x^2] = Var(x) - E[x]^2 - return(VarS(x) - mean(x)^2) + n = nrow(x) + wbar = (n-1)/n*cov(x) + ES2 = wbar^2*n^2/((n-1)^2) + return(ES2) + # this code matches with the python output } -# def risk(lam): -#R = const + ((1.-lam[0]**2)*T1_1 + (1.-lam[1]**2)*T1_2 -# + (1.-lam[0]**2)**2*T2_1 + (1.-lam[1]**2)**2*T2_2 -# + (1.-lam[0]**2)*(1.-lam[1]**2)*T3 + lam[0]*lam[1]*T4) #reparametrize lamx = 1-lamx**2 for better optimization +# def risk_orig(lam): +# R = const + (lam[0]*T1_1 + lam[1]*T1_2 +# + lam[0]**2*T2_1 + lam[1]**2*T2_2 +# + lam[0]*lam[1]*T3 + np.sqrt(1-lam[0])*np.sqrt(1-lam[1])*T4) #reparametrize lamx = 1-lamx**2 for better optimization # return(R) -risk = function(lambda1, lambda2, t11, t12, t21, t22, t3, t4) +risk_orig = function(lambda1, lambda2, t11, t12, t21, t22, t3, t4) { + print("[dragonR] risk_orig(): This is not the reparameterized version.") + return(lambda1*t11 + lambda2*t12 + lambda1^2*t21 + lambda2^2*t22 + lambda1*lambda2*t3 + sqrt(1-lambda1)*sqrt(1-lambda2)*t4) } +# def risk(lam): +# R = const + ((1.-lam[0]**2)*T1_1 + (1.-lam[1]**2)*T1_2 +# + (1.-lam[0]**2)**2*T2_1 + (1.-lam[1]**2)**2*T2_2 +# + (1.-lam[0]**2)*(1.-lam[1]**2)*T3 + lam[0]*lam[1]*T4) #reparametrize lamx = 1-lamx**2 for better optimization +# return(R) + +# reparameterize gamma1 = (1-lambda1^2), gamma2 = (1-lambda2^2) +risk = function(gamma1, gamma2, t11, t12, t21, t22, t3, t4) +{ + R = (1-gamma1^2)*t11 + (1-gamma2^2)*t12 + + (1-gamma1^2)^2*t21 + (1-gamma2^2)^2*t22 + + (1-gamma1^2)*(1-gamma2^2)*t3 + gamma1*gamma2*t4 + return(R) +} + # def estimate_penalty_parameters_dragon(X1, X2): estimatePenaltyParameters = function(X1,X2) { - + X1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) + X2 = matrix(c(9,7,8),nrow=3,byrow=T) # X1 is omics matrix 1, dimensions n x p1 # X2 is omics matrix 2, dimensions n x p2 # The matrices should have the same ordering by samples @@ -130,41 +156,63 @@ estimatePenaltyParameters = function(X1,X2) # varS = VarS(X) # eSqs = EsqS(X) - varX = VarS(X) + varS = VarS(X) esqS = EsqS(X) # IDs = np.cumsum([p1,p2]) - IDs = cumsum(c(1:p1,1:p2)) + IDs = cumsum(c(p1,p2)) # varS1 = varS[0:IDs[0],0:IDs[0]] + varS1 = varS[1:IDs[1],1:IDs[1]] # varS12 = varS[0:IDs[0],IDs[0]:IDs[1]] + varS12 = varS[1:IDs[1],(IDs[1]+1):IDs[2]] # varS2 = varS[IDs[0]:IDs[1],IDs[0]:IDs[1]] - # + varS2 = varS[(IDs[1]+1):IDs[2],(IDs[1]+1):IDs[2]] # eSqs1 = eSqs[0:IDs[0],0:IDs[0]] + esqS1 = esqS[1:IDs[1],1:IDs[1]] # eSqs12 = eSqs[0:IDs[0],IDs[0]:IDs[1]] + esqS12 = esqS[1:IDs[1],(IDs[1]+1):IDs[2]] # eSqs2 = eSqs[IDs[0]:IDs[1],IDs[0]:IDs[1]] + esqS2 = esqS[(IDs[1]+1):IDs[2],(IDs[1]+1):IDs[2]] # # const = (np.sum(varS1) + np.sum(varS2) - 2.*np.sum(varS12) # + 4.*np.sum(eSqs12)) + print("I did not include the constant because I think we don't need it for the optimization") + # T1_1 = -2.*(np.sum(varS1) - np.trace(varS1) + np.sum(eSqs12)) + T1_1 = -2*sum(varS1) - matrix.trace(as.matrix(varS1)) - sum(esqS12) # T1_2 = -2.*(np.sum(varS2) - np.trace(varS2) + np.sum(eSqs12)) + T1_2 = -2*sum(varS2) - matrix.trace(as.matrix(varS2)) # T2_1 = np.sum(eSqs1) - np.trace(eSqs1) + T2_1 = sum(esqS1) - matrix.trace(as.matrix(esqS1)) # T2_2 = np.sum(eSqs2) - np.trace(eSqs2) + T2_2 = sum(esqS2) - matrix.trace(as.matrix(esqS2)) # T3 = 2.*np.sum(eSqs12) + T3 = 2*sum(esqS12) # T4 = 4.*(np.sum(varS12)-np.sum(eSqs12)) - # - # def risk(lam): - # R = const + ((1.-lam[0]**2)*T1_1 + (1.-lam[1]**2)*T1_2 - # + (1.-lam[0]**2)**2*T2_1 + (1.-lam[1]**2)**2*T2_2 - # + (1.-lam[0]**2)*(1.-lam[1]**2)*T3 + lam[0]*lam[1]*T4) #reparametrize lamx = 1-lamx**2 for better optimization - # return(R) - # + T4 = 4*sum(varS12)-sum(esqS12) + # x = np.arange(0., 1.01, 0.01) + x = seq(0,1.01,by=0.01) + meshgrid = matrix(nrow = length(x),ncol=length(x)) + for(i in 1:length(x)) + { + for(j in 1:length(x)) + meshgrid[i,j] = risk(gamma1=x[i], + gamma2=x[j], + t11=T1_1, + t12=T1_2, + t21=T2_1, + t22=T2_2, + t3=T3, + t4=T4) + } # lamgrid = meshgrid(x, x) # risk_grid = risk(lamgrid) # indices = np.unravel_index(np.argmin(risk_grid.T, axis=None), risk_grid.shape) # lams = [x[indices[0]],x[indices[1]]] # + print("I think round 1 is seeding nad then we use optimization") # res = minimize(risk, lams, method='L-BFGS-B',#'TNC',#'SLSQP', # tol=1e-12, # bounds = [[0.,1.],[0.,1.]]) diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 5e8a09d2..50448a79 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -1,5 +1,4 @@ # test dragon - context("test DRAGON helper functions") test_that("[DRAGON] scale() function yields expected results", { @@ -18,9 +17,37 @@ test_that("[DRAGON] scale() function yields expected results", { expect_equal(sd(scale_biased),biased_sd_hand, tolerance = 1e-15) }) -test_that("[DRAGON] VarS() function yields expected results", {} +test_that("[DRAGON] VarS() function yields expected results", { + myX = matrix(c(1,2,9,3,1,7,5,12,8),byrow=T,ncol=3) + # gold standard calculated from DRAGON in netZooPy + pyVarS = matrix(c(4,37,1,37,342.25,9.25,1,9.25,0.25),byrow=T,ncol=3) + expect_equal(VarS(myX),pyVarS, tolerance=1e-15) +} ) -test_that("[DRAGON] EsqS() function yields expected results", {} +test_that("[DRAGON] EsqS() function yields expected results", { + myX = matrix(c(1,2,9,3,1,7,5,12,8),byrow=T,ncol=3) + # gold standard calculated from DRAGON in netZooPy + pyEsqS = matrix(c(16,100,1,100,1369,0.25,1,0.25,1),byrow=T,ncol=3) + expect_equal(EsqS(myX),pyEsqS, tolerance=1e-15) +} ) +test_that("[DRAGON] risk() function calculates correct risk",{ + Gamma1 = 1.1 + Gamma2 = 2 + T11 = 3 + T12 = 4 + T21 = 5 + T22 = 6 + T3 = 7 + T4 = 8 + # manual calc + R_hand = (1-Gamma1^2)*T11 + (1-Gamma2^2)*T12 + + (1-Gamma1^2)^2*T21 +(1-Gamma2^2)^2*T22 + + (1-Gamma1^2)*(1-Gamma2^2)*T3 + + Gamma1*Gamma2*T4 + R = risk(Gamma1,Gamma2,T11,T12,T21,T22,T3,T4) + expect_equal(R,R_hand, tolerance = 1e-15) +} +) From 6b0dc21966474041dd37d03577f1082e5659285b Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Sat, 12 Nov 2022 10:03:30 -0500 Subject: [PATCH 03/17] added optim --- R/DRAGON.R | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index 52f39018..62f199e9 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -126,11 +126,14 @@ risk_orig = function(lambda1, lambda2, t11, t12, t21, t22, t3, t4) # return(R) # reparameterize gamma1 = (1-lambda1^2), gamma2 = (1-lambda2^2) -risk = function(gamma1, gamma2, t11, t12, t21, t22, t3, t4) +risk = function(gamma, const, t11, t12, t21, t22, t3, t4) { - R = (1-gamma1^2)*t11 + (1-gamma2^2)*t12 + + gamma1 = gamma[1] + gamma2 = gamma[2] + + R = const + (1-gamma1^2)*t11 + (1-gamma2^2)*t12 + (1-gamma1^2)^2*t21 + (1-gamma2^2)^2*t22 + - (1-gamma1^2)*(1-gamma2^2)*t3 + gamma1*gamma2*t4 + (1-gamma1^2)*(1-gamma2^2)*t3 + gamma1*gamma2*t4 return(R) } @@ -180,9 +183,9 @@ estimatePenaltyParameters = function(X1,X2) print("I did not include the constant because I think we don't need it for the optimization") # T1_1 = -2.*(np.sum(varS1) - np.trace(varS1) + np.sum(eSqs12)) - T1_1 = -2*sum(varS1) - matrix.trace(as.matrix(varS1)) - sum(esqS12) + T1_1 = -2*(sum(varS1) - matrix.trace(as.matrix(varS1)) + sum(esqS12)) # T1_2 = -2.*(np.sum(varS2) - np.trace(varS2) + np.sum(eSqs12)) - T1_2 = -2*sum(varS2) - matrix.trace(as.matrix(varS2)) + T1_2 = -2*(sum(varS2) - matrix.trace(as.matrix(varS2)) + sum(esqS12)) # T2_1 = np.sum(eSqs1) - np.trace(eSqs1) T2_1 = sum(esqS1) - matrix.trace(as.matrix(esqS1)) # T2_2 = np.sum(eSqs2) - np.trace(eSqs2) @@ -192,14 +195,17 @@ estimatePenaltyParameters = function(X1,X2) # T4 = 4.*(np.sum(varS12)-np.sum(eSqs12)) T4 = 4*sum(varS12)-sum(esqS12) + const = (sum(varS1) + sum(varS2) - 2*sum(varS12) + + 4*sum(esqS12)) + # x = np.arange(0., 1.01, 0.01) - x = seq(0,1.01,by=0.01) - meshgrid = matrix(nrow = length(x),ncol=length(x)) + x = seq(0,1,by=0.01) + riskgrid = matrix(nrow = length(x),ncol=length(x)) for(i in 1:length(x)) { for(j in 1:length(x)) - meshgrid[i,j] = risk(gamma1=x[i], - gamma2=x[j], + riskgrid[i,j] = risk(gamma=c(x[i],x[j]), + const = const, t11=T1_1, t12=T1_2, t21=T2_1, @@ -207,16 +213,35 @@ estimatePenaltyParameters = function(X1,X2) t3=T3, t4=T4) } + + dim(riskgrid) # lamgrid = meshgrid(x, x) # risk_grid = risk(lamgrid) # indices = np.unravel_index(np.argmin(risk_grid.T, axis=None), risk_grid.shape) # lams = [x[indices[0]],x[indices[1]]] # - print("I think round 1 is seeding nad then we use optimization") + lams = x[arrayInd(which(riskgrid == min(riskgrid)),.dim=c(101,101))] + print("round 1 is seeding and then we use optimization") + lams + # res = minimize(risk, lams, method='L-BFGS-B',#'TNC',#'SLSQP', # tol=1e-12, # bounds = [[0.,1.],[0.,1.]]) - # + + # python result: array([0.79372539, 0. ]) + res = optim(lams,risk, + const=const, + t11=T1_1, + t12=T1_2, + t21=T2_1, + t22=T2_2, + t3=T3, + t4=T4, + method="L-BFGS-B", + lower=c(0,0), + upper=c(1,1), + control = list(pgtol = 1e-12)) + # penalty_parameters = (1.-res.x[0]**2), (1.-res.x[1]**2) # # def risk_orig(lam): From bd05fc2501073846abe88df05b370c48e166c717 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Wed, 30 Nov 2022 13:25:31 -0500 Subject: [PATCH 04/17] Added fn to get shrunken cov and unit tests --- R/DRAGON.R | 59 ++++++++++++++++++- .../dragon_test_get_shrunken_covariance.csv | 4 ++ tests/testthat/test-dragon.R | 24 +++++++- 3 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv diff --git a/R/DRAGON.R b/R/DRAGON.R index 62f199e9..4cabfae6 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -133,7 +133,7 @@ risk = function(gamma, const, t11, t12, t21, t22, t3, t4) R = const + (1-gamma1^2)*t11 + (1-gamma2^2)*t12 + (1-gamma1^2)^2*t21 + (1-gamma2^2)^2*t22 + - (1-gamma1^2)*(1-gamma2^2)*t3 + gamma1*gamma2*t4 + (1-gamma1^2)*(1-gamma2^2)*t3 + gamma1*gamma2*t4 return(R) } @@ -242,6 +242,7 @@ estimatePenaltyParameters = function(X1,X2) upper=c(1,1), control = list(pgtol = 1e-12)) + return(res) # penalty_parameters = (1.-res.x[0]**2), (1.-res.x[1]**2) # # def risk_orig(lam): @@ -254,6 +255,62 @@ estimatePenaltyParameters = function(X1,X2) # } +get_shrunken_covariance_dragon = function(X1,X2, lambdas) +{ + n = nrow(X1) + p1 = ncol(X1) + p2 = ncol(X2) + p = p1 + p2 + X = cbind.data.frame(X1,X2) + S = cov(X) # the R implementation of cov() uses the unbiased (1/(n-1)); we need the unbiased version for the lemma of Ledoit and Wolf + + # target matrix + Targ = diag(diag(S)) + Sigma = matrix(nrow=p, ncol=p) + + # Sigma = np.zeros((p,p)) + # IDs = np.cumsum([0,p1,p2]) + IDs = c(cumsum(c(p1,p2))) + + idx1 = 1:IDs[1] + idx2 = (IDs[1]+1):IDs[2] + + # Fill in Sigma_11 + Sigma[idx1,idx1] = (1-lambdas[1])*S[idx1,idx1] + lambdas[1]*Targ[idx1,idx1] + + # Fill in Sigma_22 + Sigma[idx2,idx2] = (1-lambdas[2])*S[idx2,idx2] + lambdas[2]*Targ[idx2,idx2] + + # Fill in Sigma_12 + Sigma[idx1,idx2] = sqrt((1-lambdas[1])*(1-lambdas[2]))*S[idx1,idx2] + sqrt(lambdas[1]*lambdas[2])*Targ[idx1,idx2] + + # Fill in Sigma_21 + Sigma[idx2,idx1] = sqrt((1-lambdas[1])*(1-lambdas[2]))*S[idx2,idx1] + sqrt(lambdas[1]*lambdas[2])*Targ[idx2,idx1] + + return(Sigma) +} + +get_partial_correlation_from_precision = function(Theta) +{ + +} + +get_precision_matrix_dragon = function(X1, X2, lambdas) +{ + +} + +get_partial_correlation_dragon = function(X1,X2,lambdas) +{ + +} + +estimate_kappa = function(n, p, lambda0, seed) +{ + +} + + dragon = function() { diff --git a/tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv b/tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv new file mode 100644 index 00000000..9b67b34e --- /dev/null +++ b/tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv @@ -0,0 +1,4 @@ +,0,1,2 +0,4.0,7.5,-0.6123724356957945 +1,7.5,37.0,0.30618621784789724 +2,-0.6123724356957945,0.30618621784789724,1.0 diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 50448a79..8fb92db6 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -1,4 +1,5 @@ -# test dragon +# unit-tests for DRAGON + context("test DRAGON helper functions") test_that("[DRAGON] scale() function yields expected results", { @@ -42,12 +43,29 @@ test_that("[DRAGON] risk() function calculates correct risk",{ T22 = 6 T3 = 7 T4 = 8 + const = 1 + # manual calc - R_hand = (1-Gamma1^2)*T11 + (1-Gamma2^2)*T12 + + R_hand = 1+(1-Gamma1^2)*T11 + (1-Gamma2^2)*T12 + (1-Gamma1^2)^2*T21 +(1-Gamma2^2)^2*T22 + (1-Gamma1^2)*(1-Gamma2^2)*T3 + Gamma1*Gamma2*T4 - R = risk(Gamma1,Gamma2,T11,T12,T21,T22,T3,T4) + R = risk(c(Gamma1,Gamma2),const,T11,T12,T21,T22,T3,T4) expect_equal(R,R_hand, tolerance = 1e-15) } ) + +test_that("[DRAGON] get_shrunken_covariance_dragon() function returns the right values",{ + # confirm that matches python results + myX = matrix(c(1,2,9,3,1,7,5,12,8),byrow=T,ncol=3) + X1 = as.matrix(myX[,1:2]) + X2 = as.matrix(myX[,3]) + lambdas = c(0.25,0.5) + res = get_shrunken_covariance_dragon(X1,X2,lambdas) + res_py = as.matrix(read.csv("tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) + expect_equal(as.vector(res),as.vector(res_py),tolerance = 1e-15) +} +) + +#testing format +#test_that(,{}) From 0d82cc38a61ca918611de4d4909f9f6e80e7e622 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Wed, 30 Nov 2022 13:32:37 -0500 Subject: [PATCH 05/17] Update DESCRIPTION --- DESCRIPTION | 2 -- 1 file changed, 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 47bf3e09..83ae821f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,7 @@ Package: netZooR Type: Package Title: Unified methods for the inference and analysis of gene regulatory networks - Version: 1.1.16 - Date: 2022-07-07 Authors@R: c(person("Marouen", "Ben Guebila", email = "benguebila@hsph.harvard.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0001-5934-966X")), From 9b4eb1f4d0ebecebb80528179efef2aff598b01f Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:31:39 -0500 Subject: [PATCH 06/17] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 83ae821f..a625d3fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -64,7 +64,8 @@ Imports: RCy3, tidyr, methods, dplyr, - graphics + graphics, + matrixcalc License: GPL-3 Encoding: UTF-8 LazyData: false From 046980ec1aef889bca2551944ddb621f417169a0 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Mon, 5 Dec 2022 07:01:42 -0500 Subject: [PATCH 07/17] mechanics for prec mat and parcor --- R/DRAGON.R | 40 +++++++++++++++++++++++++++++------- tests/testthat/test-dragon.R | 2 +- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index 4cabfae6..d7871e5a 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -290,26 +290,52 @@ get_shrunken_covariance_dragon = function(X1,X2, lambdas) return(Sigma) } -get_partial_correlation_from_precision = function(Theta) +get_precision_matrix_dragon = function(X1, X2, lambdas) { - + Sigma = get_shrunken_covariance_dragon(X1, X2, lambdas) + Theta = solve(Sigma) + return(Theta) + + # in the python implementation, mean is also returned. Omitting here + # X = np.hstack((X1, X2)) + # mu = np.mean(X, axis=0) } - -get_precision_matrix_dragon = function(X1, X2, lambdas) + +get_partial_correlation_from_precision = function(Theta,selfEdges=F) { - + # by default, does not return self edges (diagonal is set to zero) + ggm = -cov2cor(Theta) + if(!selfEdges) + ggm[diag(ggm)] = 0 + return(ggm) } get_partial_correlation_dragon = function(X1,X2,lambdas) { - + Theta = get_precision_matrix_dragon(X1, X2, lambdas) + ggm = get_partial_correlation_from_precision(Theta) + return(ggm) } -estimate_kappa = function(n, p, lambda0, seed) +# The functions below are for p-value estimation on the DRAGON results + +# The functions logli, estimate_kappa, and estimate_p_values are for benchmarking +# with standard GGM; omitting here +# logli = function(X, Theta, mu) +# estimate_kappa = function(n, p, lambda0, seed) +# estimate_p_values(r, n, lambda0, kappa='estimate', seed=1): + +# def estimate_kappa_dragon(n, p1, p2, lambdas, seed, simultaneous = False): +estimate_kappa_dragon = function(n, p1, n2, lambdas, seed, simultaneous = F) { } + +# def estimate_p_values_dragon(r, n, p1, p2, lambdas, kappa='estimate', seed=1, simultaneous = False): +estimate_p_values_dragon = function(r, n, p1, p2, lambdas, kappa="estimate",seed=1, simultaneous = F) +{ +} dragon = function() { diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 8fb92db6..cc801047 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -62,7 +62,7 @@ test_that("[DRAGON] get_shrunken_covariance_dragon() function returns the right X2 = as.matrix(myX[,3]) lambdas = c(0.25,0.5) res = get_shrunken_covariance_dragon(X1,X2,lambdas) - res_py = as.matrix(read.csv("tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) + res_py = as.matrix(read.csv("dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) expect_equal(as.vector(res),as.vector(res_py),tolerance = 1e-15) } ) From ce31f0fdcb89be358a734b5d2cfc0cb737103055 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:11:23 -0500 Subject: [PATCH 08/17] added testdata directory --- DESCRIPTION | 3 ++- R/DRAGON.R | 19 +++++++++++++++++++ .../dragon_test_get_shrunken_covariance.csv | 0 tests/testthat/test-dragon.R | 15 ++++++++++++++- 4 files changed, 35 insertions(+), 2 deletions(-) rename tests/{testthat => testdata}/dragon-test-files/dragon_test_get_shrunken_covariance.csv (100%) diff --git a/DESCRIPTION b/DESCRIPTION index 83ae821f..a625d3fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -64,7 +64,8 @@ Imports: RCy3, tidyr, methods, dplyr, - graphics + graphics, + matrixcalc License: GPL-3 Encoding: UTF-8 LazyData: false diff --git a/R/DRAGON.R b/R/DRAGON.R index d7871e5a..c6130ce3 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -325,6 +325,25 @@ get_partial_correlation_dragon = function(X1,X2,lambdas) # estimate_kappa = function(n, p, lambda0, seed) # estimate_p_values(r, n, lambda0, kappa='estimate', seed=1): +log_lik_shrunken = function(kappa, p, lambda, rhos) +{ + # kappa is to be optimized, so comes first in the arguments + # p is fixed (number of predictors) + # lambda is fixed (as estimated by DRAGON) + # rhos is fixed (observed partial correlations from the data) + mysum = 0 + + for(i in 1:p) + { + first_term = (kappa-3)/2*log((1-lambda)^2-rhos[i]^2) + second_term = lbeta(1/2, (kappa-1)/2) + third_term = (kappa-2)*log(1-lambda) + mysum = mysum + first_term - second_term - third_term + } + + return(mysum) +} + # def estimate_kappa_dragon(n, p1, p2, lambdas, seed, simultaneous = False): estimate_kappa_dragon = function(n, p1, n2, lambdas, seed, simultaneous = F) { diff --git a/tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv b/tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv similarity index 100% rename from tests/testthat/dragon-test-files/dragon_test_get_shrunken_covariance.csv rename to tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index cc801047..3828695a 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -62,10 +62,23 @@ test_that("[DRAGON] get_shrunken_covariance_dragon() function returns the right X2 = as.matrix(myX[,3]) lambdas = c(0.25,0.5) res = get_shrunken_covariance_dragon(X1,X2,lambdas) - res_py = as.matrix(read.csv("dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) + res_py = as.matrix(read.csv("./testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) expect_equal(as.vector(res),as.vector(res_py),tolerance = 1e-15) } ) +# test log likelihood function +test_that("[DRAGON] Log likelihood function for estimation of kappa is correct",{ + # log_lik_shrunken = function(kappa, p, lambda, rhos) + kappa = 10 + p = 100 + lambda = 0.1 + rhos = runif(n = 100, min = -0.9, max = 0.9) # equation is valid for [-(1-lambda),(1-lambda)] + log_lik_shrunken(kappa = kappa, + p = p, + lambda = lambda, + rhos = rhos) +}) + #testing format #test_that(,{}) From 4b586fc6d55f6de6dd1e5b92d09018a1b655c723 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Mon, 19 Dec 2022 19:18:37 -0500 Subject: [PATCH 09/17] fixed test paths --- R/DRAGON.R | 5 ++--- .../dragon_test_get_shrunken_covariance.csv | 4 ---- tests/testthat.R | 1 + tests/testthat/test-dragon.R | 5 +++-- 4 files changed, 6 insertions(+), 9 deletions(-) delete mode 100644 tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv diff --git a/R/DRAGON.R b/R/DRAGON.R index c6130ce3..226be485 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -180,8 +180,7 @@ estimatePenaltyParameters = function(X1,X2) # # const = (np.sum(varS1) + np.sum(varS2) - 2.*np.sum(varS12) # + 4.*np.sum(eSqs12)) - print("I did not include the constant because I think we don't need it for the optimization") - + # T1_1 = -2.*(np.sum(varS1) - np.trace(varS1) + np.sum(eSqs12)) T1_1 = -2*(sum(varS1) - matrix.trace(as.matrix(varS1)) + sum(esqS12)) # T1_2 = -2.*(np.sum(varS2) - np.trace(varS2) + np.sum(eSqs12)) @@ -221,7 +220,7 @@ estimatePenaltyParameters = function(X1,X2) # lams = [x[indices[0]],x[indices[1]]] # lams = x[arrayInd(which(riskgrid == min(riskgrid)),.dim=c(101,101))] - print("round 1 is seeding and then we use optimization") + # this is seeding with grid search and then we use optimization lams # res = minimize(risk, lams, method='L-BFGS-B',#'TNC',#'SLSQP', diff --git a/tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv b/tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv deleted file mode 100644 index 9b67b34e..00000000 --- a/tests/testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv +++ /dev/null @@ -1,4 +0,0 @@ -,0,1,2 -0,4.0,7.5,-0.6123724356957945 -1,7.5,37.0,0.30618621784789724 -2,-0.6123724356957945,0.30618621784789724,1.0 diff --git a/tests/testthat.R b/tests/testthat.R index 235ea9b2..19460642 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -4,5 +4,6 @@ library(netZooR) #download test data download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/ppi_medium.txt','testthat/ppi_medium.txt') download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/unittest_datasets/testDataset.RData','testthat/testDataset.RData') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon_test_get_shrunken_covariance.csv','testthat/dragon_test_get_shrunken_covariance.csv') test_check("netZooR") diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 3828695a..9d85d093 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -1,7 +1,8 @@ # unit-tests for DRAGON - context("test DRAGON helper functions") +source("../../R/DRAGON.R") + test_that("[DRAGON] scale() function yields expected results", { x = seq(1:100) scale_unbiased = scale(x) # default is bias=F @@ -62,7 +63,7 @@ test_that("[DRAGON] get_shrunken_covariance_dragon() function returns the right X2 = as.matrix(myX[,3]) lambdas = c(0.25,0.5) res = get_shrunken_covariance_dragon(X1,X2,lambdas) - res_py = as.matrix(read.csv("./testdata/dragon-test-files/dragon_test_get_shrunken_covariance.csv",row.names=1)) + res_py = as.matrix(read.csv("./dragon_test_get_shrunken_covariance.csv",row.names=1)) expect_equal(as.vector(res),as.vector(res_py),tolerance = 1e-15) } ) From c2babb7ea7116038bfc3cccc141f602d5ecfc8ad Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 20 Dec 2022 04:14:45 -0500 Subject: [PATCH 10/17] Update test-dragon.R --- tests/testthat/test-dragon.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 9d85d093..a0daf0ea 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -1,7 +1,7 @@ # unit-tests for DRAGON context("test DRAGON helper functions") -source("../../R/DRAGON.R") +# source("../../R/DRAGON.R") test_that("[DRAGON] scale() function yields expected results", { x = seq(1:100) From 9d9fb9ffe3ab5d177784079c9f1098edc3a12bf1 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 20 Dec 2022 21:10:09 -0500 Subject: [PATCH 11/17] added export function and tests --- DESCRIPTION | 8 +-- NAMESPACE | 1 + R/DRAGON.R | 114 +++++++++++++++++++++++------------ man/dragon.Rd | 27 +++++++++ tests/testthat/test-dragon.R | 70 +++++++++++++++++---- 5 files changed, 164 insertions(+), 56 deletions(-) create mode 100644 man/dragon.Rd diff --git a/DESCRIPTION b/DESCRIPTION index a625d3fd..ea47c148 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,8 @@ Depends: R (>= 4.1.0), igraph, reticulate, pandaR, - yarn + yarn, + matrixcalc biocViews: NetworkInference, Network, @@ -64,8 +65,7 @@ Imports: RCy3, tidyr, methods, dplyr, - graphics, - matrixcalc + graphics License: GPL-3 Encoding: UTF-8 LazyData: false @@ -76,6 +76,6 @@ Suggests: pkgdown VignetteEngine: knitr VignetteBuilder: knitr -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 BugReports: https://github.com/netZoo/netZooR/issues URL: https://github.com/netZoo/netZooR, https://netzoo.github.io/ diff --git a/NAMESPACE b/NAMESPACE index 29f527a0..604551ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(craneBipartite) export(craneUnipartite) export(createCondorObject) export(createPandaStyle) +export(dragon) export(elistToAdjMat) export(lioness) export(lionessPy) diff --git a/R/DRAGON.R b/R/DRAGON.R index 226be485..96b6abea 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -1,10 +1,3 @@ -# Function to implement DRAGON: https://arxiv.org/abs/2104.01690 - -# to add to dependencies: - -library(matrixcalc) - - # helper functions as in the netzooPy impl # def Scale(X): # X_temp = X @@ -62,27 +55,29 @@ VarS = function(x) # varS *= n/(n-1)**3 # try doing this with an array instead of matrix multiplication - xbar = apply(x,2,mean) - p = ncol(x) - w = array(dim=c(n,p,p)) - for(k in 1:n) - { - for(i in 1:p) - { - for(j in 1:p) # this will be symmetric, but leaving it now for clarity - { - w[k,i,j] = (x[k,i] - xbar[i])*(x[k,j]-xbar[j]) - } - } - } - - wbar = 1/n*apply(w,2:3,sum) - summand = 0 - for(k in 1:n) - summand = summand + (w[k,,]-wbar)^2 - - varhat_s = n/((n-1)^3)*summand - return(varhat_s) + # xbar = apply(x,2,mean) + # p = ncol(x) + # w = array(dim=c(n,p,p)) + # for(k in 1:n) + # { + # for(i in 1:p) + # { + # for(j in 1:p) # this will be symmetric, but leaving it now for clarity + # { + # w[k,i,j] = (x[k,i] - xbar[i])*(x[k,j]-xbar[j]) + # } + # } + # } + # + # wbar = 1/n*apply(w,2:3,sum) + # summand = 0 + # for(k in 1:n) + # summand = summand + (w[k,,]-wbar)^2 + # + # varhat_s = n/((n-1)^3)*summand + varS = t(a) %*% a + -n*wbar^2 + varHat_s = n/((n-1)^3)*varS + return(varHat_s) # this code matches with the python output } @@ -140,19 +135,17 @@ risk = function(gamma, const, t11, t12, t21, t22, t3, t4) # def estimate_penalty_parameters_dragon(X1, X2): estimatePenaltyParameters = function(X1,X2) { - X1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) - X2 = matrix(c(9,7,8),nrow=3,byrow=T) + # X1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) + # X2 = matrix(c(9,7,8),nrow=3,byrow=T) # X1 is omics matrix 1, dimensions n x p1 # X2 is omics matrix 2, dimensions n x p2 # The matrices should have the same ordering by samples - n = nrow(X1) p1 = ncol(X1) p2 = ncol(X2) # n = X1.shape[0] # p1 = X1.shape[1] # p2 = X2.shape[1] - X = cbind.data.frame(X1, X2) # X = np.append(X1, X2, axis=1) @@ -161,7 +154,6 @@ estimatePenaltyParameters = function(X1,X2) varS = VarS(X) esqS = EsqS(X) - # IDs = np.cumsum([p1,p2]) IDs = cumsum(c(p1,p2)) @@ -192,7 +184,7 @@ estimatePenaltyParameters = function(X1,X2) # T3 = 2.*np.sum(eSqs12) T3 = 2*sum(esqS12) # T4 = 4.*(np.sum(varS12)-np.sum(eSqs12)) - T4 = 4*sum(varS12)-sum(esqS12) + T4 = 4*(sum(varS12)-sum(esqS12)) const = (sum(varS1) + sum(varS2) - 2*sum(varS12) + 4*sum(esqS12)) @@ -203,6 +195,7 @@ estimatePenaltyParameters = function(X1,X2) for(i in 1:length(x)) { for(j in 1:length(x)) + { riskgrid[i,j] = risk(gamma=c(x[i],x[j]), const = const, t11=T1_1, @@ -211,6 +204,7 @@ estimatePenaltyParameters = function(X1,X2) t22=T2_2, t3=T3, t4=T4) + } } dim(riskgrid) @@ -221,7 +215,7 @@ estimatePenaltyParameters = function(X1,X2) # lams = x[arrayInd(which(riskgrid == min(riskgrid)),.dim=c(101,101))] # this is seeding with grid search and then we use optimization - lams + print(lams) # res = minimize(risk, lams, method='L-BFGS-B',#'TNC',#'SLSQP', # tol=1e-12, @@ -239,9 +233,11 @@ estimatePenaltyParameters = function(X1,X2) method="L-BFGS-B", lower=c(0,0), upper=c(1,1), - control = list(pgtol = 1e-12)) + control = list(trace=T,pgtol = 1e-15)) - return(res) + # reparameterize + lambdas = c(1-res$par[1]^2, 1-res$par[2]^2) + return(list("lambdas"=lambdas,"gammas"=res$par,"optim_result"=res,"risk_grid" = riskgrid)) # penalty_parameters = (1.-res.x[0]**2), (1.-res.x[1]**2) # # def risk_orig(lam): @@ -355,7 +351,47 @@ estimate_p_values_dragon = function(r, n, p1, p2, lambdas, kappa="estimate",seed } -dragon = function() +#' Run DRAGON in R. +#' +#' Description: Estimates a multi-omic Gaussian graphical model for two input layers of paired omic data. +#' +#' @param layer1 : first layer of omics data +#' @param layer2 : second layer of omics data +#' @param pval : calculate p-values for network edges +#' @param gradient : method for estimating parameters of p-value distribution. default = "finite_difference"; other option = "exact" +#' @return cov : the shrunken covariance matrix +#' @return prec: the shrunken precision matrix +#' @return ggm: the shrunken Gaussian graphical model (matrix of partial correlations). Self-edges are set to zero. +#' @return lambdas: vector of parameters (lambda1, lambda2) corresponding to omic-specific shrinkage +#' @export +dragon = function(layer1,layer2,pval = F,gradient = "finite_difference", verbose = T) { - + if(verbose) + print("[netZooR::dragon] Estimating penalty parameters...") + # estimate penalty parameters + myres = estimatePenaltyParameters(layer1, layer2) + lambdas = myres$lambdas + + if(verbose) + print(paste(c("[netZooR::dragon] Estimated parameters:",lambdas),collapse=" ")) + + if(verbose) + print("[netZooR::dragon] Calculating shrunken matrices...") + # apply penalty parameters to return shrunken covariance and ggm + shrunken_cov = get_shrunken_covariance_dragon(layer1, layer2,lambdas) + precmat = get_precision_matrix_dragon(layer1, layer2, lambdas) + ggm = get_partial_correlation_dragon(layer1, layer2, lambdas) + + # if pval, return pval approx with finite difference + if(pval) + { + print("[netZooR::dragon] p-value calculation not yet implemented in R; to estimate p-values, use netZooPy.") + } + + return(list("cov"=shrunken_cov, + "prec"=precmat, + "ggm"=ggm, + "lambdas"=lambdas, + "gammas"=myres$gammas, + "risk_grid"=myres$risk_grid)) } diff --git a/man/dragon.Rd b/man/dragon.Rd new file mode 100644 index 00000000..e15136a2 --- /dev/null +++ b/man/dragon.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DRAGON.R +\name{dragon} +\alias{dragon} +\title{Run DRAGON in R.} +\usage{ +dragon(layer1, layer2, pval = F, gradient = "finite_difference") +} +\arguments{ +\item{layer1}{: first layer of omics data} + +\item{layer2}{: second layer of omics data} + +\item{pval}{: calculate p-values for network edges} + +\item{gradient}{: method for estimating parameters of p-value distribution. default = "finite_difference"; other option = "exact"} +} +\value{ +cov + +prec + +ggm +} +\description{ +Description: Estimates a multi-omic Gaussian graphical model for two input layers of paired omic data. +} diff --git a/tests/testthat/test-dragon.R b/tests/testthat/test-dragon.R index 9d85d093..30247508 100644 --- a/tests/testthat/test-dragon.R +++ b/tests/testthat/test-dragon.R @@ -1,8 +1,6 @@ # unit-tests for DRAGON context("test DRAGON helper functions") -source("../../R/DRAGON.R") - test_that("[DRAGON] scale() function yields expected results", { x = seq(1:100) scale_unbiased = scale(x) # default is bias=F @@ -68,18 +66,64 @@ test_that("[DRAGON] get_shrunken_covariance_dragon() function returns the right } ) -# test log likelihood function -test_that("[DRAGON] Log likelihood function for estimation of kappa is correct",{ - # log_lik_shrunken = function(kappa, p, lambda, rhos) - kappa = 10 - p = 100 - lambda = 0.1 - rhos = runif(n = 100, min = -0.9, max = 0.9) # equation is valid for [-(1-lambda),(1-lambda)] - log_lik_shrunken(kappa = kappa, - p = p, - lambda = lambda, - rhos = rhos) +test_that("[DRAGON] Risk grid in netZooR impl. matches risk grid from netZooPy impl.", +{ + toy_layer1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) + toy_layer2 = matrix(c(9,7,8),nrow=3,byrow=T) + + res = dragon(layer1 = toy_layer1, layer2 = toy_layer2, pval = F) + # assert risk results are equal on toy data + risk_py = read.csv("risk_grid_netzoopy.csv",header=F) + expect_equal(as.vector(t(res$risk_grid)), as.vector(as.matrix(risk_py)),tolerance = 1e-10) }) +# test dragon() exported function +test_that("[DRAGON] dragon() exported function works on simulated data", +{ + # load dragon data simulated from python + toy_layer1 = read.csv("dragon_layer1.csv",header=F) + toy_layer2 = read.csv("dragon_layer2.csv",header=F) + res = dragon(layer1 = toy_layer1, layer2 = toy_layer2, pval = F) + + # assert tuning parameter results are equal to that found with python + python_lambda1 = 0.9074582855371163 + python_lambda2 = 0.9132919090041549 + + # These are the same 1e-5 + expect_equal(res$lambdas[1],python_lambda1,tol=1e-5) + expect_equal(res$lambdas[2],python_lambda2,tol=1e-5) + + # assert that output matrices are the same within this tolerance + py_cov = read.csv("dragon_python_cov.csv",header=F) + expect_equal(as.vector(res$cov),as.vector(as.matrix(py_cov)),tol=1e-5) + + # confirm the inversion matches to machine zero in R + expect_equal(as.vector(solve(res$cov)),as.vector(res$prec),tol=1e-15) + + # because of the propagation of differences that comes along with + # matrix inversion, we see bigger differences in the precision and ggm matrices + # tolerance of 1e-2 + + py_prec = read.csv("dragon_python_prec.csv",header=F) + expect_equal(as.vector(res$prec),as.vector(as.matrix(py_prec)),tol=1e-2) + + py_parcor = read.csv("dragon_python_parcor.csv",header=F) + expect_equal(as.vector(res$ggm),as.vector(as.matrix(py_parcor)),tol=1e-2) + +}) + +# test log likelihood function +# test_that("[DRAGON] Log likelihood function for estimation of kappa is correct",{ +# # log_lik_shrunken = function(kappa, p, lambda, rhos) +# kappa = 10 +# p = 100 +# lambda = 0.1 +# rhos = runif(n = 100, min = -0.9, max = 0.9) # equation is valid for [-(1-lambda),(1-lambda)] +# log_lik_shrunken(kappa = kappa, +# p = p, +# lambda = lambda, +# rhos = rhos) +# }) + #testing format #test_that(,{}) From 54750cf30343f6e0dab60266adefd87a1f59f887 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 20 Dec 2022 21:38:47 -0500 Subject: [PATCH 12/17] updated docs --- R/DRAGON.R | 24 +++++++++++++++--------- man/dragon.Rd | 24 ++++++++++++++---------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index 96b6abea..8143c514 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -355,16 +355,22 @@ estimate_p_values_dragon = function(r, n, p1, p2, lambdas, kappa="estimate",seed #' #' Description: Estimates a multi-omic Gaussian graphical model for two input layers of paired omic data. #' -#' @param layer1 : first layer of omics data -#' @param layer2 : second layer of omics data -#' @param pval : calculate p-values for network edges -#' @param gradient : method for estimating parameters of p-value distribution. default = "finite_difference"; other option = "exact" -#' @return cov : the shrunken covariance matrix -#' @return prec: the shrunken precision matrix -#' @return ggm: the shrunken Gaussian graphical model (matrix of partial correlations). Self-edges are set to zero. -#' @return lambdas: vector of parameters (lambda1, lambda2) corresponding to omic-specific shrinkage +#' @param layer1 : first layer of omics data; rows: samples (order must match layer2), columns: variables +#' @param layer2 : second layer of omics data; rows: samples (order must match layer1), columns: variables. +#' @param pval : calculate p-values for network edges. Not yet implemented in R; available in netZooPy. +#' @param gradient : method for estimating parameters of p-value distribution, applies only if p-val == T. default = "finite_difference"; other option = "exact" +#' @return A list of model results. cov : the shrunken covariance matrix +#' \itemize{ +#' \item{\code{cov}}{ the shrunken covariance matrix} +#' \item{\code{prec}}{ the shrunken precision matrix} +#' \item{\code{ggm}}{ the shrunken Gaussian graphical model; matrix of partial correlations. Self-edges (diagonal elements) are set to zero.} +#' \item{\code{lambdas}}{ Vector of omics-specific tuning parameters (lambda1, lambda2) for \code{layer1} and \code{layer2}} +#' \item{\code{gammas}}{ Reparameterized tuning parameters; gamma = 1 - lambda^2} +#' \item{\code{risk_grid}}{ Risk grid, for assessing optimization. Grid boundaries are in terms of gamma.} +#' } +#' #' @export -dragon = function(layer1,layer2,pval = F,gradient = "finite_difference", verbose = T) +dragon = function(layer1,layer2,pval = F,gradient = "finite_difference", verbose = F) { if(verbose) print("[netZooR::dragon] Estimating penalty parameters...") diff --git a/man/dragon.Rd b/man/dragon.Rd index e15136a2..6b72a603 100644 --- a/man/dragon.Rd +++ b/man/dragon.Rd @@ -4,23 +4,27 @@ \alias{dragon} \title{Run DRAGON in R.} \usage{ -dragon(layer1, layer2, pval = F, gradient = "finite_difference") +dragon(layer1, layer2, pval = F, gradient = "finite_difference", verbose = F) } \arguments{ -\item{layer1}{: first layer of omics data} +\item{layer1}{: first layer of omics data; rows: samples (order must match layer2), columns: variables} -\item{layer2}{: second layer of omics data} +\item{layer2}{: second layer of omics data; rows: samples (order must match layer1), columns: variables.} -\item{pval}{: calculate p-values for network edges} +\item{pval}{: calculate p-values for network edges. Not yet implemented in R; available in netZooPy.} -\item{gradient}{: method for estimating parameters of p-value distribution. default = "finite_difference"; other option = "exact"} +\item{gradient}{: method for estimating parameters of p-value distribution, applies only if p-val == T. default = "finite_difference"; other option = "exact"} } \value{ -cov - -prec - -ggm +A list of model results. cov : the shrunken covariance matrix +\itemize{ + \item{\code{cov}}{ the shrunken covariance matrix} + \item{\code{prec}}{ the shrunken precision matrix} + \item{\code{ggm}}{ the shrunken Gaussian graphical model; matrix of partial correlations. Self-edges (diagonal elements) are set to zero.} + \item{\code{lambdas}}{ Vector of omics-specific tuning parameters (lambda1, lambda2) for \code{layer1} and \code{layer2}} + \item{\code{gammas}}{ Reparameterized tuning parameters; gamma = 1 - lambda^2} + \item{\code{risk_grid}}{ Risk grid, for assessing optimization. Grid boundaries are in terms of gamma.} +} } \description{ Description: Estimates a multi-omic Gaussian graphical model for two input layers of paired omic data. From 218a9c7be8fcd3336b600bed8add13ee9c2fc4ed Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 20 Dec 2022 22:15:25 -0500 Subject: [PATCH 13/17] added ex datasets --- tests/testthat.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/testthat.R b/tests/testthat.R index 19460642..ffcb13b6 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -4,6 +4,12 @@ library(netZooR) #download test data download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/ppi_medium.txt','testthat/ppi_medium.txt') download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/unittest_datasets/testDataset.RData','testthat/testDataset.RData') -download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon_test_get_shrunken_covariance.csv','testthat/dragon_test_get_shrunken_covariance.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_test_get_shrunken_covariance.csv','testthat/dragon_test_get_shrunken_covariance.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_layer1.csv','testthat/dragon_layer1.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_layer2.csv','testthat/dragon_layer2.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_python_cov.csv','testthat/dragon_python_cov.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_python_prec.csv','testthat/dragon_python_prec.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/dragon_python_parcor.csv','testthat/dragon_python_parcor.csv') +download.file('https://netzoo.s3.us-east-2.amazonaws.com/netZooR/example_datasets/dragon/risk_grid_netzoopy.csv','testthat/risk_grid_netzoopy.csv') test_check("netZooR") From c4d4dbfdd0d4307aa9b0342f1967cef573e50af2 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 28 Nov 2023 14:59:55 -0500 Subject: [PATCH 14/17] fixed bug w/self-edges in GGM --- R/DRAGON.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index 8143c514..85011c5f 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -301,7 +301,7 @@ get_partial_correlation_from_precision = function(Theta,selfEdges=F) # by default, does not return self edges (diagonal is set to zero) ggm = -cov2cor(Theta) if(!selfEdges) - ggm[diag(ggm)] = 0 + diag(ggm) = 0 return(ggm) } From dd43ed640245a1e0c51e1992c22a751b5a154fca Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:04:55 -0500 Subject: [PATCH 15/17] fixed GGM diagonal issue --- R/DRAGON.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index ddb1eee4..16d843c2 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -318,7 +318,7 @@ get_partial_correlation_from_precision = function(Theta,selfEdges=FALSE) # by default, does not return self edges (diagonal is set to zero) ggm = -cov2cor(Theta) if(!selfEdges) - ggm[diag(ggm)] = 0 + diag(ggm) = 0 return(ggm) } From 1a207ec78189838d0879d14ec9bbc8fef6d2d6da Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:11:13 -0500 Subject: [PATCH 16/17] fixed merge conflicts in docs --- DESCRIPTION | 11 ----------- man/dragon.Rd | 5 +---- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1e93e6f5..636e0c37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,8 @@ Package: netZooR Type: Package Title: Unified methods for the inference and analysis of gene regulatory networks -<<<<<<< HEAD -Version: 1.1.16 -Date: 2022-07-07 -======= Version: 1.5.4 Date: 2023-09-25 ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 Authors@R: c(person("Marouen", "Ben Guebila", email = "benguebila@hsph.harvard.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0001-5934-966X")), person("Tian", "Wang", @@ -29,11 +24,6 @@ Description: netZooR unifies the implementations of several Network Zoo methods Depends: R (>= 4.2.0), igraph, reticulate, -<<<<<<< HEAD - pandaR, - yarn, - matrixcalc -======= yarn, pandaR, matrixcalc @@ -41,7 +31,6 @@ Remotes: stan-dev/cmdstanr, jnpaulson/pandaR, cytoscape/RCy3 ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 biocViews: NetworkInference, Network, diff --git a/man/dragon.Rd b/man/dragon.Rd index b99cc62b..bb2e3aac 100644 --- a/man/dragon.Rd +++ b/man/dragon.Rd @@ -4,9 +4,6 @@ \alias{dragon} \title{Run DRAGON in R.} \usage{ -<<<<<<< HEAD -dragon(layer1, layer2, pval = F, gradient = "finite_difference", verbose = F) -======= dragon( layer1, layer2, @@ -14,7 +11,7 @@ dragon( gradient = "finite_difference", verbose = FALSE ) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 + } \arguments{ \item{layer1}{: first layer of omics data; rows: samples (order must match layer2), columns: variables} From 64fbba6e4c9fd41378f786029214ad8a1f795a99 Mon Sep 17 00:00:00 2001 From: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:13:33 -0500 Subject: [PATCH 17/17] fixed more merge conflicts --- R/DRAGON.R | 29 +++-------------------------- man/dragon.Rd | 1 - 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/R/DRAGON.R b/R/DRAGON.R index 16d843c2..1cbc7743 100644 --- a/R/DRAGON.R +++ b/R/DRAGON.R @@ -5,11 +5,8 @@ # X_mean = np.mean(X_temp, axis=0) # return (X_temp - X_mean) / X_std -<<<<<<< HEAD -scale = function(x,bias=F) -======= + scale = function(x,bias=FALSE) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 { # sd does 1/(n-1), python does 1/n # use the bias option for exact match with python @@ -139,13 +136,8 @@ risk = function(gamma, const, t11, t12, t21, t22, t3, t4) # def estimate_penalty_parameters_dragon(X1, X2): estimatePenaltyParameters = function(X1,X2) { -<<<<<<< HEAD - # X1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=T) - # X2 = matrix(c(9,7,8),nrow=3,byrow=T) -======= # X1 = matrix(c(1,2,3,1,5,12),nrow=3,byrow=TRUE) # X2 = matrix(c(9,7,8),nrow=3,byrow=TRUE) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 # X1 is omics matrix 1, dimensions n x p1 # X2 is omics matrix 2, dimensions n x p2 # The matrices should have the same ordering by samples @@ -242,12 +234,8 @@ estimatePenaltyParameters = function(X1,X2) method="L-BFGS-B", lower=c(0,0), upper=c(1,1), -<<<<<<< HEAD - control = list(trace=T,pgtol = 1e-15)) -======= control = list(trace=TRUE,pgtol = 1e-15)) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 - + # reparameterize lambdas = c(1-res$par[1]^2, 1-res$par[2]^2) return(list("lambdas"=lambdas,"gammas"=res$par,"optim_result"=res,"risk_grid" = riskgrid)) @@ -309,11 +297,8 @@ get_precision_matrix_dragon = function(X1, X2, lambdas) # mu = np.mean(X, axis=0) } -<<<<<<< HEAD -get_partial_correlation_from_precision = function(Theta,selfEdges=F) -======= + get_partial_correlation_from_precision = function(Theta,selfEdges=FALSE) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 { # by default, does not return self edges (diagonal is set to zero) ggm = -cov2cor(Theta) @@ -375,12 +360,8 @@ estimate_p_values_dragon = function(r, n, p1, p2, lambdas, kappa="estimate",seed #' @param layer1 : first layer of omics data; rows: samples (order must match layer2), columns: variables #' @param layer2 : second layer of omics data; rows: samples (order must match layer1), columns: variables. #' @param pval : calculate p-values for network edges. Not yet implemented in R; available in netZooPy. -<<<<<<< HEAD -#' @param gradient : method for estimating parameters of p-value distribution, applies only if p-val == T. default = "finite_difference"; other option = "exact" -======= #' @param gradient : method for estimating parameters of p-value distribution, applies only if p-val == TRUE. default = "finite_difference"; other option = "exact" #' @param verbose : verbosity level (TRUE/FALSE) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 #' @return A list of model results. cov : the shrunken covariance matrix #' \itemize{ #' \item{\code{cov}}{ the shrunken covariance matrix} @@ -392,11 +373,7 @@ estimate_p_values_dragon = function(r, n, p1, p2, lambdas, kappa="estimate",seed #' } #' #' @export -<<<<<<< HEAD -dragon = function(layer1,layer2,pval = F,gradient = "finite_difference", verbose = F) -======= dragon = function(layer1,layer2,pval = FALSE,gradient = "finite_difference", verbose = FALSE) ->>>>>>> d64817e618f3d59f34f7de43ed6c678fb63774f7 { if(verbose) print("[netZooR::dragon] Estimating penalty parameters...") diff --git a/man/dragon.Rd b/man/dragon.Rd index bb2e3aac..fdf2aaeb 100644 --- a/man/dragon.Rd +++ b/man/dragon.Rd @@ -11,7 +11,6 @@ dragon( gradient = "finite_difference", verbose = FALSE ) - } \arguments{ \item{layer1}{: first layer of omics data; rows: samples (order must match layer2), columns: variables}