From 69042ae1ebac2796718b347a319af59e8bf46b58 Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 7 Apr 2021 21:28:26 -0700 Subject: [PATCH 01/11] modify build_MME() for partial connection --- src/1.JWAS/src/build_MME.jl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index f6d6cac8..a667a306 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -38,9 +38,25 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, num_latent_traits = false, nonlinear_function = false) #nonlinear_function(x1,x2) = x1+x2 if num_latent_traits != false lhs, rhs = strip.(split(model_equations,"=")) + rhs_split=strip.(split(rhs,"+")) + geno_term=[] + for i in rhs_split + if isdefined(Main,Symbol(i)) && typeof(getfield(Main,Symbol(i))) == Genotypes + push!(geno_term,i) + end + end + + if length(geno_term) != num_latent_traits + @show geno_term + error("#genotypes ≠ #hidden nodes") + end + + non_gene_term = filter(x->x ∉ geno_term,rhs_split) + non_gene_term = join(non_gene_term,"+") + model_equations = "" for i = 1:num_latent_traits - model_equations = model_equations*lhs*string(i)*"="*rhs*";" + model_equations = model_equations*lhs*string(i)*"="*non_gene_term*"+"*geno_term[i]*";" end model_equations = model_equations[1:(end-1)] end From 7b424b458e32ed93633e2783a513a4e6323a7e9f Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 14 Apr 2021 11:26:52 -0700 Subject: [PATCH 02/11] allow user-defined activation function --- src/1.JWAS/src/JWAS.jl | 1 + src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 8 +-- src/1.JWAS/src/Nonlinear/bnn_hmc.jl | 26 +++---- src/1.JWAS/src/Nonlinear/nonlinear.jl | 6 +- src/1.JWAS/src/build_MME.jl | 75 ++++++++++++++------ src/1.JWAS/src/types.jl | 3 +- 6 files changed, 76 insertions(+), 43 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index cacea8d5..a12576a7 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -6,6 +6,7 @@ using SparseArrays using LinearAlgebra using ProgressMeter using .PedModule +using ForwardDiff import StatsBase: describe #a new describe is exported diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 7014378b..07b1154b 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -31,6 +31,7 @@ function MCMC_BayesianAlphabet(mme,df) is_mega_trait = mme.MCMCinfo.mega_trait latent_traits = mme.latent_traits nonlinear_function = mme.nonlinear_function + activation_function = mme.activation_function ############################################################################ # Categorical Traits (starting values for maker effects defaulting to 0s) ############################################################################ @@ -167,9 +168,8 @@ function MCMC_BayesianAlphabet(mme,df) # MCMC (starting values for sol (zeros); mme.RNew; G0 are used) ############################################################################ # # Initialize mme for hmc before Gibbs - if mme.latent_traits == true - num_latent_traits = mme.M[1].ntraits - mme.weights_NN = vcat(mean(mme.ySparse),zeros(num_latent_traits)) + if latent_traits == true + mme.weights_NN = vcat(mean(mme.ySparse),zeros(mme.nModels)) end @showprogress "running MCMC ..." for iter=1:chain_length @@ -336,7 +336,7 @@ function MCMC_BayesianAlphabet(mme,df) #mme.M[1].genotypes here is 5-by-5 if latent_traits == true #to update ycorr! - sample_latent_traits(yobs,mme,ycorr,nonlinear_function) + sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_function) end ######################################################################## # 5. Update priors using posteriors (empirical) LATER diff --git a/src/1.JWAS/src/Nonlinear/bnn_hmc.jl b/src/1.JWAS/src/Nonlinear/bnn_hmc.jl index 00e764fe..c5ada2c2 100644 --- a/src/1.JWAS/src/Nonlinear/bnn_hmc.jl +++ b/src/1.JWAS/src/Nonlinear/bnn_hmc.jl @@ -30,24 +30,24 @@ #helper 1: calculate gradiant of all latent traits for all individual -function calc_gradient_z(ylats,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) +function calc_gradient_z(ylats,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) μ1, w1 = weights_NN[1], weights_NN[2:end] - tanh_ylats = tanh.(ylats) - #dlogfz =- (Z - ones(n)*Mu0' - X*W0) * inv(Sigma2z) #(n,l1) + g_ylats = activation_function.(ylats) + g_ylats_derivative = ForwardDiff.derivative.(activation_function, ylats) dlogf_ylats = - ycorr * inv(σ_ylats) - dlogfy = ((yobs .- μ1 - tanh_ylats*w1)/σ_yobs) * w1' .* (-tanh_ylats.^2 .+ 1) #size: (n, l1) + dlogfy = ((yobs .- μ1 - g_ylats*w1)/σ_yobs) * w1' .* g_ylats_derivative #size: (n, l1) gradient_ylats = dlogf_ylats + dlogfy return gradient_ylats #size (n,l1) end # helper 2: calculate log p(z|y) to help calculate the acceptance rate -function calc_log_p_z(ylats,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) +function calc_log_p_z(ylats,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) μ1 = weights_NN[1] w1 = weights_NN[2:end] - #logfz = -0.5*sum(((Z-ones(n)*Mu0'-X*W0).^2)*inv(Sigma2z),dims=2) .- (0.5*log(prod(diag(Sigma2z)))) + g_ylats = activation_function.(ylats) logf_ylats = -0.5*sum((ycorr.^2)*inv(σ_ylats),dims=2) .- (0.5*log(prod(diag(σ_ylats)))) - logfy = -0.5*(yobs .- μ1 - tanh.(ylats)*w1).^2 /σ_yobs .- 0.5*log(σ_yobs) + logfy = -0.5*(yobs .- μ1 - g_ylats*w1).^2 /σ_yobs .- 0.5*log(σ_yobs) log_p_ylats= logf_ylats + logfy return log_p_ylats #size: (n,1) @@ -55,17 +55,17 @@ end #helper 3: one iterations of HMC to sample Z #ycor is a temporary variable to save ycorr after reshape; ycorr is residual for latent traits -function hmc_one_iteration(nLeapfrog,ϵ,ylats_old,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) +function hmc_one_iteration(nLeapfrog,ϵ,ylats_old,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) nobs, ntraits = size(ylats_old) ylats_old = copy(ylats_old) ylats_new = copy(ylats_old) #step 1: Initiate Φ from N(0,M) Φ = randn(nobs, ntraits) #rand(n,Normal(0,M=1.0)), tuning parameter: M - log_p_old = calc_log_p_z(ylats_old,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) - 0.5*sum(Φ.^2,dims=2) #(n,1) + log_p_old = calc_log_p_z(ylats_old,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) - 0.5*sum(Φ.^2,dims=2) #(n,1) #step 2: update (ylats,Φ) from 10 leapfrog #2(a): update Φ - Φ += 0.5 * ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) #(n,l1) + Φ += 0.5 * ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) #(n,l1) for leap_i in 1:nLeapfrog #2(b) update latent traits ylats_new += ϵ * Φ # (n,l1) @@ -73,15 +73,15 @@ function hmc_one_iteration(nLeapfrog,ϵ,ylats_old,yobs,weights_NN,σ_ylats,σ_yo #(c) half step of phi if leap_i == nLeapfrog #2(c): update Φ - Φ += 0.5 * ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) + Φ += 0.5 * ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) else #2(a)+2(c): update Φ - Φ += ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) + Φ += ϵ * calc_gradient_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) end end #Step3. acceptance rate - log_p_new = calc_log_p_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr) - 0.5*sum(Φ.^2,dims=2) #(n,1) + log_p_new = calc_log_p_z(ylats_new,yobs,weights_NN,σ_ylats,σ_yobs,ycorr,activation_function) - 0.5*sum(Φ.^2,dims=2) #(n,1) r = exp.(log_p_new - log_p_old) # (n,1) nojump = rand(nobs) .> r # bool (n,1) diff --git a/src/1.JWAS/src/Nonlinear/nonlinear.jl b/src/1.JWAS/src/Nonlinear/nonlinear.jl index e7067e65..5903d55a 100644 --- a/src/1.JWAS/src/Nonlinear/nonlinear.jl +++ b/src/1.JWAS/src/Nonlinear/nonlinear.jl @@ -7,7 +7,7 @@ # (2) neural network: a1*tan(x1)+a2*tan(x2) #nonlinear_function: #user-provide function, "Neural Network" -function sample_latent_traits(yobs,mme,ycorr,nonlinear_function) +function sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_function) ylats_old = mme.ySparse # current values of each latent trait; [trait_1_obs;trait_2_obs;...] μ_ylats = mme.ySparse - ycorr # mean of each latent trait, [trait_1_obs-residuals;trait_2_obs-residuals;...] # = vcat(getEBV(mme,1).+mme.sol[1],getEBV(mme,2).+mme.sol[2])) @@ -19,8 +19,8 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function) μ_ylats = reshape(μ_ylats,nobs,ntraits) if nonlinear_function == "Neural Network" #HMC - ylats_new = hmc_one_iteration(10,0.1,ylats_old,yobs,mme.weights_NN,mme.R,σ2_yobs,reshape(ycorr,nobs,ntraits)) - else + ylats_new = hmc_one_iteration(10,0.1,ylats_old,yobs,mme.weights_NN,mme.R,σ2_yobs,reshape(ycorr,nobs,ntraits),activation_function) + else #user-defined function, MH candidates = μ_ylats+randn(size(μ_ylats)) #candidate samples if nonlinear_function == "Neural Network (MH)" μ_yobs_candidate = [ones(nobs) tanh.(candidates)]*weights diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index a667a306..fb4c261e 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -35,28 +35,40 @@ models = build_model(model_equations,R); ``` """ function build_model(model_equations::AbstractString, R = false; df = 4.0, - num_latent_traits = false, nonlinear_function = false) #nonlinear_function(x1,x2) = x1+x2 - if num_latent_traits != false - lhs, rhs = strip.(split(model_equations,"=")) - rhs_split=strip.(split(rhs,"+")) - geno_term=[] - for i in rhs_split - if isdefined(Main,Symbol(i)) && typeof(getfield(Main,Symbol(i))) == Genotypes - push!(geno_term,i) - end - end - - if length(geno_term) != num_latent_traits - @show geno_term - error("#genotypes ≠ #hidden nodes") + num_latent_traits = false, nonlinear_function = false, activation_function = false) #nonlinear_function(x1,x2) = x1+x2 + if nonlinear_function != false #NNBayes + printstyled("NNBayes is used. \n",bold=false,color=:green) + + #print activation info + if activation_function != false #e.g, activation_function="tanh" + printstyled("NNBayes: activation function is $activation_function. HMC is used to sample hidden nodes. \n",bold=false,color=:green) + elseif activation_function == false #e.g, nonlinear_function=f(z1,z2) + printstyled("NNBayes: user-defined nonlinear_function for the relationship between hidden nodes and observed trait. MH is used to sample hidden nodes.\n",bold=false,color=:green) end - non_gene_term = filter(x->x ∉ geno_term,rhs_split) - non_gene_term = join(non_gene_term,"+") - + #print connection info; re-write model equation + lhs, rhs = strip.(split(model_equations,"=")) model_equations = "" - for i = 1:num_latent_traits - model_equations = model_equations*lhs*string(i)*"="*non_gene_term*"+"*geno_term[i]*";" + if num_latent_traits != false #e.g. num_latent_traits=5 + printstyled("NNBayes: fully connected with $num_latent_traits hidden nodes. \n",bold=false,color=:green) + for i = 1:num_latent_traits + model_equations = model_equations*lhs*string(i)*"="*rhs*";" + end + elseif num_latent_traits == false #partially-connected + rhs_split=strip.(split(rhs,"+")) + geno_term=[] + for i in rhs_split + if isdefined(Main,Symbol(i)) && typeof(getfield(Main,Symbol(i))) == Genotypes + push!(geno_term,i) + end + end + non_gene_term = filter(x->x ∉ geno_term,rhs_split) + non_gene_term = join(non_gene_term,"+") + num_latent_traits = length(geno_term) + for i = 1:length(geno_term) + model_equations = model_equations*lhs*string(i)*"="*non_gene_term*"+"*geno_term[i]*";" + end + printstyled("NNBayes: partially connected with $num_latent_traits hidden nodes. \n",bold=false,color=:green) end model_equations = model_equations[1:(end-1)] end @@ -126,10 +138,29 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end #latent traits - if num_latent_traits != false + if nonlinear_function != false #NNBayes mme.latent_traits = true - if nonlinear_function != false - mme.nonlinear_function = nonlinear_function + mme.nonlinear_function = nonlinear_function + + if activation_function != false #e.g., "tanh" + if activation_function == "tanh" + mytanh(x) = tanh(x) + mme.activation_function = mytanh + elseif activation_function == "sigmoid" + mysigmoid(x) = 1/(1+exp(-x)) + mme.activation_function = mysigmoid + elseif activation_function == "relu" + myrelu(x) = max(0, x) + mme.activation_function = myrelu + elseif activation_function == "leakyrelu" + myleakyrelu(x) = max(0.01x, x) + mme.activation_function = myleakyrelu + elseif activation_function == "linear" + mylinear(x) = x + mme.activation_function = mylinear + else + error("Please select the activation function from tanh/sigmoid/relu/leakyrelu/linear.") + end end end diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index 5cfc1beb..df6316ec 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -242,6 +242,7 @@ mutable struct MME nonlinear_function #user-provide function, "Neural Network" weights_NN σ2_yobs + activation_function function MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) @@ -268,6 +269,6 @@ mutable struct MME 0, false,false,false, false, - false,false,false,1.0) + false,false,false,1.0,false) end end From 267319b782d53e041082d6a5de0ecd28ceeba108 Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 5 May 2021 11:43:44 -0700 Subject: [PATCH 03/11] add ForwardDiff dependency --- Manifest.toml | 389 ++++++++++++++++++++++++++++++++++++++++++++++++++ Project.toml | 17 +-- 2 files changed, 398 insertions(+), 8 deletions(-) create mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 00000000..c374bdb2 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,389 @@ +# This file is machine-generated - editing it directly is not advised + +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[CSV]] +deps = ["Dates", "Mmap", "Parsers", "PooledArrays", "SentinelArrays", "Tables", "Unicode"] +git-tree-sha1 = "6d4242ef4cb1539e7ede8e01a47a32365e0a34cd" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.8.4" + +[[CategoricalArrays]] +deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "StructTypes", "Unicode"] +git-tree-sha1 = "18d7f3e82c1a80dd38c16453b8fd3f0a7db92f23" +uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" +version = "0.9.7" + +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "bd0cc939d94b8bd736dce5bbbe0d635db9f94af7" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "0.9.41" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "0a817fbe51c976de090aa8c997b7b719b786118d" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.28.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" + +[[Crayons]] +git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.0.4" + +[[DataAPI]] +git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.6.0" + +[[DataFrames]] +deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "d50972453ef464ddcebdf489d11885468b7b83a3" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "0.22.7" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.9" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.0.2" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[Distributions]] +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] +git-tree-sha1 = "a837fdf80f333415b69684ba8e8ae6ba76de6aaa" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.24.18" + +[[DocStringExtensions]] +deps = ["LibGit2", "Markdown", "Pkg", "Test"] +git-tree-sha1 = "9d4f64f79012636741cf01133158a54b24924c32" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.4" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "31939159aeb8ffad1d4d8ee44d07f8558273120a" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.11.7" + +[[Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "NaNMath", "Printf", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "e2af66012e08966366a43251e1fd421522908be6" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.18" + +[[Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[InvertedIndices]] +deps = ["Test"] +git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.0.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.3.0" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.1" + +[[LibGit2]] +deps = ["Printf"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[LogExpFunctions]] +deps = ["DocStringExtensions", "LinearAlgebra"] +git-tree-sha1 = "ed26854d7c2c867d143f0e07c198fc9e8b721d10" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.2.3" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.6" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.5" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.3+4" + +[[OrderedCollections]] +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.0" + +[[PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "f82a0e71f222199de8e9eb9a09977bd0767d52a0" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.11.0" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.1.0" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "cde4ce9d6f33219465b55162811d8de8139c0414" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.2.1" + +[[Preferences]] +deps = ["TOML"] +git-tree-sha1 = "ea79e4c9077208cd3bc5d29631a26bc0cff78902" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.2.1" + +[[PrettyTables]] +deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"] +git-tree-sha1 = "574a6b3ea95f04e8757c0280bb9c29f1a5e35138" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "0.11.1" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[ProgressMeter]] +deps = ["Distributed", "Printf"] +git-tree-sha1 = "d85d8f0339a9937afac93e152c76f4745b386202" +uuid = "92933f4c-e287-5a05-a399-4b506db050ca" +version = "1.6.0" + +[[QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.4.1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Reexport]] +git-tree-sha1 = "57d8440b0c7d98fc4f889e478e80f268d534c9d5" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.0.0" + +[[Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.6.1" + +[[Rmath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1b7bf41258f6c5c9c31df8c1ba34c1fc88674957" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.2.2+2" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "6ccde405cf0759eba835eb613130723cb8f10ff9" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.2.16" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["ChainRulesCore", "OpenSpecFun_jll"] +git-tree-sha1 = "5919936c0e92cff40e57d0ddf0ceb667d42e5902" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "1.3.0" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "2653e9c769343808781a8bd5010ee7a17c01152e" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.1.2" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsAPI]] +git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.0.0" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "2f6792d523d7448bbe2fec99eca9218f06cc746d" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.8" + +[[StatsFuns]] +deps = ["LogExpFunctions", "Rmath", "SpecialFunctions"] +git-tree-sha1 = "30cd8c360c54081f806b1ee14d2eecbef3c04c49" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "0.9.8" + +[[StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "e36adc471280e8b346ea24c5c87ba0571204be7a" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.7.2" + +[[SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[TOML]] +deps = ["Dates"] +git-tree-sha1 = "44aaac2d2aec4a850302f9aa69127c74f0c3787e" +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "c9d2d262e9a327be1f35844df25fe4561d258dc9" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.4.2" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" diff --git a/Project.toml b/Project.toml index 3cbc08ec..635442f0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,26 +1,27 @@ name = "JWAS" uuid = "c9a035f4-d403-5e6b-8649-6be755bc4798" -version = "0.11.5" authors = ["Hao Cheng "] +version = "0.11.5" [deps] -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] +CSV = "0.5, 0.6, 0.7, 0.8" DataFrames = "0.19, 0.20, 0.21, 0.22" Distributions = "0.21, 0.22, 0.23, 0.24" ProgressMeter = "1.0, 1.1, 1.2" -CSV = "0.5, 0.6, 0.7, 0.8" StatsBase = "0.30, 0.31, 0.32, 0.33" julia = "1" From cad06b3da98d5914fbeb95f457a8314a690d0527 Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 5 May 2021 14:34:13 -0700 Subject: [PATCH 04/11] tanh to activation_function --- src/1.JWAS/src/Nonlinear/nonlinear.jl | 6 +++--- src/1.JWAS/src/output.jl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/1.JWAS/src/Nonlinear/nonlinear.jl b/src/1.JWAS/src/Nonlinear/nonlinear.jl index 5903d55a..e859873d 100644 --- a/src/1.JWAS/src/Nonlinear/nonlinear.jl +++ b/src/1.JWAS/src/Nonlinear/nonlinear.jl @@ -23,7 +23,7 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_funct else #user-defined function, MH candidates = μ_ylats+randn(size(μ_ylats)) #candidate samples if nonlinear_function == "Neural Network (MH)" - μ_yobs_candidate = [ones(nobs) tanh.(candidates)]*weights + μ_yobs_candidate = [ones(nobs) activation_function.(candidates)]*weights μ_yobs_current = X*weights else #user-defined non-linear function μ_yobs_candidate = nonlinear_function.(Tuple([view(candidates,:,i) for i in 1:ntraits])...) @@ -37,7 +37,7 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_funct end if nonlinear_function == "Neural Network" #sample weights - X = [ones(nobs) tanh.(ylats_new)] + X = [ones(nobs) activation_function.(ylats_new)] lhs = X'X + I*0.00001 Ch = cholesky(lhs) L = Ch.L @@ -54,7 +54,7 @@ function sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_funct if nonlinear_function != "Neural Network" residuals = yobs-nonlinear_function.(Tuple([view(ylats_new,:,i) for i in 1:ntraits])...) else - residuals = yobs-[ones(nobs) tanh.(ylats_new)]*weights + residuals = yobs-[ones(nobs) activation_function.(ylats_new)]*weights end mme.σ2_yobs= dot(residuals,residuals)/rand(Chisq(nobs)) #(dot(x,x) + df*scale)/rand(Chisq(n+df)) end diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index f057bbf7..557cb997 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -416,7 +416,7 @@ function output_MCMC_samples(mme,vRes,G0, if mme.nonlinear_function != "Neural Network" BV_NN = mme.nonlinear_function.(Tuple([view(EBVmat,:,i) for i in 1:size(EBVmat,2)])...) else - BV_NN = [ones(size(EBVmat,1)) tanh.(EBVmat)]*mme.weights_NN + BV_NN = [ones(size(EBVmat,1)) mme.activation_function.(EBVmat)]*mme.weights_NN writedlm(outfile["neural_networks_bias_and_weights"],mme.weights_NN',',') end writedlm(outfile["EBV_NonLinear"],BV_NN',',') From d54eaf7bb50a929a341f13f27e304ea7e1d729b9 Mon Sep 17 00:00:00 2001 From: tj Date: Wed, 26 May 2021 10:39:02 -0700 Subject: [PATCH 05/11] fixed Project.toml --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 524b3fa8..022e7e52 100644 --- a/Project.toml +++ b/Project.toml @@ -2,7 +2,6 @@ name = "JWAS" uuid = "c9a035f4-d403-5e6b-8649-6be755bc4798" version = "0.13.0" authors = ["Hao Cheng "] -version = "0.11.5" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" From 89de4cc19976969a485f67f039193d9adfdc76a3 Mon Sep 17 00:00:00 2001 From: tj Date: Mon, 14 Jun 2021 21:08:39 -0700 Subject: [PATCH 06/11] write external functions to build model in NNBayes --- src/1.JWAS/src/JWAS.jl | 1 + src/1.JWAS/src/Nonlinear/nnbayes_check.jl | 107 ++++++++++++++++++++++ src/1.JWAS/src/build_MME.jl | 72 +++------------ 3 files changed, 121 insertions(+), 59 deletions(-) create mode 100644 src/1.JWAS/src/Nonlinear/nnbayes_check.jl diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index 10c8328f..b9adca3d 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -49,6 +49,7 @@ include("structure_equation_model/SEM.jl") #Latent Traits include("Nonlinear/nonlinear.jl") include("Nonlinear/bnn_hmc.jl") +include("Nonlinear/nnbayes_check.jl") #input include("input_data_validation.jl") diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl new file mode 100644 index 00000000..b9e82c05 --- /dev/null +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -0,0 +1,107 @@ +#Below function is to check parameters for NNBayes and print information +function nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,activation_function) + printstyled("Bayesian Neural Network is used with follwing information: \n",bold=false,color=:green) + + #part1: fully/partial-connected NN + if typeof(num_latent_traits) == Int64 #fully-connected. e.g, num_latent_traits=5 + printstyled(" - Neural network: fully connected neural network \n",bold=false,color=:green) + printstyled(" - Number of hidden nodes: $num_latent_traits \n",bold=false,color=:green) + elseif num_latent_traits == false #partial-connected. + printstyled(" - Neural network: partially connected neural network \n",bold=false,color=:green) + else + error("Please check you number of latent traits") + end + + #part2: activation function/user-defined non-linear function + if nonlinear_function == "Neural Network" #NN + if activation_function in ["tanh","sigmoid","relu","leakyrelu","linear"] + printstyled(" - Activation function: $activation_function.\n",bold=false,color=:green) + printstyled(" - Sampler: Hamiltonian Monte Carlo. \n",bold=false,color=:green) + else + error("Please select the activation function from tanh/sigmoid/relu/leakyrelu/linear") + end + elseif isa(nonlinear_function, Function) #user-defined nonlinear function. e.g, CropGrowthModel() + if activation_function == false + printstyled(" - Nonlinear function: user-defined nonlinear_function for the relationship between hidden nodes and observed trait is used.\n",bold=false,color=:green) + printstyled(" - Sampler: Matropolis-Hastings.\n",bold=false,color=:green) + else + error("activation function is not allowed for user-defined nonlinear function") + end + else + error("nonlinear_function can only be Neural Network or a user-defined nonlinear function") + end +end + + +#Below function is to re-phase modelm for NNBayes +function nnbayes_model_equation(model_equations,num_latent_traits) + + lhs, rhs = strip.(split(model_equations,"=")) + model_equations = "" + + if typeof(num_latent_traits) == Int64 #fully-connected + # old: y=intercept+geno + # new: y1=intercept+geno;y2=intercept+geno + for i = 1:num_latent_traits + model_equations = model_equations*lhs*string(i)*"="*rhs*";" + end + elseif num_latent_traits == false #partially-connected + # old: y=intercept+geno1+geno2 + # new: y1= intercept+geno1;y2=intercept+geno2 + rhs_split=strip.(split(rhs,"+")) + geno_term=[] + for i in rhs_split + if isdefined(Main,Symbol(i)) && typeof(getfield(Main,Symbol(i))) == Genotypes + push!(geno_term,i) + end + end + non_gene_term = filter(x->x ∉ geno_term,rhs_split) + non_gene_term = join(non_gene_term,"+") + + for i = 1:length(geno_term) + model_equations = model_equations*lhs*string(i)*"="*non_gene_term*"+"*geno_term[i]*";" + end + end + model_equations = model_equations[1:(end-1)] +end + + +# below function is to check whether the loaded genotype matches the model equation +function nnbayes_check_nhiddennode(num_latent_traits,mme) + if typeof(num_latent_traits) == Int64 #fully-connected. e.g, num_latent_traits=5 + if length(mme.M)>1 + error("fully-connected NN only allow one genotype; num_latent_traits is not allowed in partial-connected NN ") + end + elseif num_latent_traits == false #partial-connected. + if length(mme.M)==1 + error("partial-connected NN requirs >1 genotype group") + else + num_latent_traits = length(mme.M) + printstyled(" - Number of hidden nodes: $num_latent_traits \n",bold=false,color=:green) + end #Note, if only geno1 & geno2 are loaded by get_genotypes, but there is "geno3" in equation, then geno3 will be treated like age. + end +end + + + +# below function is to define the activation function for neural network +function nnbayes_activation(activation_function) + if activation_function == "tanh" + mytanh(x) = tanh(x) + return mytanh + elseif activation_function == "sigmoid" + mysigmoid(x) = 1/(1+exp(-x)) + return mysigmoid + elseif activation_function == "relu" + myrelu(x) = max(0, x) + return myrelu + elseif activation_function == "leakyrelu" + myleakyrelu(x) = max(0.01x, x) + return myleakyrelu + elseif activation_function == "linear" + mylinear(x) = x + return mylinear + else + error("invalid actication function") + end +end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 9016822f..6c66fb20 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -37,40 +37,11 @@ models = build_model(model_equations,R); function build_model(model_equations::AbstractString, R = false; df = 4.0, num_latent_traits = false, nonlinear_function = false, activation_function = false) #nonlinear_function(x1,x2) = x1+x2 if nonlinear_function != false #NNBayes - printstyled("Bayesian Neural Network is used with follwing information: \n",bold=false,color=:green) - #print activation info - if activation_function != false #e.g, activation_function="tanh" - printstyled("Activation function: $activation_function.\n Sampler: Hamiltonian Monte Carlo. \n",bold=false,color=:green) - elseif activation_function == false #e.g, nonlinear_function=f(z1,z2) - printstyled("Nonlinear function: user-defined nonlinear_function for the relationship between hidden nodes and observed trait is used.\n Sampler: Matropolis-Hastings.\n",bold=false,color=:green) - end + #NNBayes: check parameters + nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,activation_function) - #print connection info; re-write model equation - lhs, rhs = strip.(split(model_equations,"=")) - model_equations = "" - if num_latent_traits != false #e.g. num_latent_traits=5 - printstyled("Neural network: fully connected neural network \n",bold=false,color=:green) - printstyled("Number of hidden nodes: $num_latent_traits \n",bold=false,color=:green) - for i = 1:num_latent_traits - model_equations = model_equations*lhs*string(i)*"="*rhs*";" - end - elseif num_latent_traits == false #partially-connected - rhs_split=strip.(split(rhs,"+")) - geno_term=[] - for i in rhs_split - if isdefined(Main,Symbol(i)) && typeof(getfield(Main,Symbol(i))) == Genotypes - push!(geno_term,i) - end - end - non_gene_term = filter(x->x ∉ geno_term,rhs_split) - non_gene_term = join(non_gene_term,"+") - num_latent_traits = length(geno_term) - for i = 1:length(geno_term) - model_equations = model_equations*lhs*string(i)*"="*non_gene_term*"+"*geno_term[i]*";" - end - printstyled("NNBayes: partially connected with $num_latent_traits hidden nodes. \n",bold=false,color=:green) - end - model_equations = model_equations[1:(end-1)] + #NNBayes: re-write model equation + model_equations = nnbayes_model_equation(model_equations,num_latent_traits) end if R != false && !isposdef(map(AbstractFloat,R)) @@ -113,9 +84,9 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, if isdefined(Main,term_symbol) #@isdefined can be usde to tests whether a local variable or object field is defined if typeof(getfield(Main,term_symbol)) == Genotypes term.random_type = "genotypes" - if traiti == 1 #same genos are required in all traits - genotypei = getfield(Main,term_symbol) - genotypei.name = string(term_symbol) + genotypei = getfield(Main,term_symbol) + genotypei.name = string(term_symbol) + if genotypei.name ∉ map(x->x.name, genotypes) #only save unique genotype genotypei.ntraits = nModels if nModels != 1 genotypei.df = genotypei.df + nModels @@ -139,29 +110,12 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, #latent traits if nonlinear_function != false #NNBayes - mme.latent_traits = true - mme.nonlinear_function = nonlinear_function - - if activation_function != false #e.g., "tanh" - if activation_function == "tanh" - mytanh(x) = tanh(x) - mme.activation_function = mytanh - elseif activation_function == "sigmoid" - mysigmoid(x) = 1/(1+exp(-x)) - mme.activation_function = mysigmoid - elseif activation_function == "relu" - myrelu(x) = max(0, x) - mme.activation_function = myrelu - elseif activation_function == "leakyrelu" - myleakyrelu(x) = max(0.01x, x) - mme.activation_function = myleakyrelu - elseif activation_function == "linear" - mylinear(x) = x - mme.activation_function = mylinear - else - error("Please select the activation function from tanh/sigmoid/relu/leakyrelu/linear.") - end - end + #NNBayes: check parameters again + nnbayes_check_nhiddennode(num_latent_traits,mme) + + mme.latent_traits = true + mme.nonlinear_function = nonlinear_function + mme.activation_function = activation_function!=false ? nnbayes_activation(activation_function) : false end return mme From e7ba1f803ff70c79bb157101674125fe273d56f6 Mon Sep 17 00:00:00 2001 From: tj Date: Thu, 17 Jun 2021 15:51:56 -0700 Subject: [PATCH 07/11] support partial connected NN --- src/1.JWAS/src/JWAS.jl | 26 ++++----- src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 30 +++++++---- src/1.JWAS/src/Nonlinear/nnbayes_check.jl | 55 ++++++++++++++++++++ src/1.JWAS/src/build_MME.jl | 32 ++++++++++-- src/1.JWAS/src/output.jl | 36 ++++++++----- src/1.JWAS/src/types.jl | 10 ++-- 6 files changed, 144 insertions(+), 45 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index b9adca3d..d994ebab 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -287,21 +287,15 @@ function runMCMC(mme::MME,df; end mme.Gi = map(Float64,mme.Gi) end - #mega_trait + + # NNBayes mega trait: from multi-trait to multiple single-trait if mme.MCMCinfo.mega_trait == true || mme.MCMCinfo.constraint == true - if mme.nModels == 1 - error("more than 1 trait is required for MegaLMM analysis.") - end - mme.MCMCinfo.constraint = true - ##sample from scale-inv-⁠χ2, not InverseWishart - mme.df.residual = mme.df.residual - mme.nModels - mme.scaleR = diag(mme.scaleR/(mme.df.residual - 1))*(mme.df.residual-2)/mme.df.residual #diag(R_prior_mean)*(ν-2)/ν - if mme.M != 0 - for Mi in mme.M - Mi.df = Mi.df - mme.nModels - Mi.scale = diag(Mi.scale/(Mi.df - 1))*(Mi.df-2)/Mi.df - end - end + nnbayes_mega_trait(mme) + end + + # NNBayes: modify parameters for partial connected NN + if mme.nnbayes_partial==true + nnbayes_partial_para_modify2(mme) end ############################################################################ #Make incidence matrices and genotype covariates for training observations @@ -463,7 +457,7 @@ function getMCMCinfo(mme) @printf("%-30s %20s\n","Method",Mi.method) for Mi in mme.M if Mi.genetic_variance != false - if mme.nModels == 1 + if mme.nModels == 1 || mme.nnbayes_partial == true @printf("%-30s %20.3f\n","genetic variances (genomic):",Mi.genetic_variance) else @printf("%-30s\n","genetic variances (genomic):") @@ -472,7 +466,7 @@ function getMCMCinfo(mme) end end if !(Mi.method in ["GBLUP"]) - if mme.nModels == 1 + if mme.nModels == 1 || mme.nnbayes_partial == true @printf("%-30s %20.3f\n","marker effect variances:",Mi.G) else @printf("%-30s\n","marker effect variances:") diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 3b6ea631..ff504f3f 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -20,6 +20,7 @@ function MCMC_BayesianAlphabet(mme,df) latent_traits = mme.latent_traits nonlinear_function = mme.nonlinear_function activation_function = mme.activation_function + partial = mme.nnbayes_partial ############################################################################ # Categorical Traits (starting values for maker effects defaulting to 0s) ############################################################################ @@ -103,11 +104,15 @@ function MCMC_BayesianAlphabet(mme,df) end end end + if mme.nnbayes_partial == true + nnbayes_partial_para_modify3(mme) + end + #phenotypes corrected for all effects ycorr = vec(Matrix(mme.ySparse)-mme.X*mme.sol) if mme.M != 0 for Mi in mme.M - for traiti in 1:mme.nModels + for traiti in 1:Mi.ntraits if Mi.α[traiti] != zero(Mi.α[traiti]) ycorr[(traiti-1)*Mi.nObs+1 : traiti*Mi.nObs] = ycorr[(traiti-1)*Mi.nObs+1 : traiti*Mi.nObs] - Mi.genotypes*Mi.α[traiti] @@ -204,48 +209,57 @@ function MCMC_BayesianAlphabet(mme,df) # 2. Marker Effects ######################################################################## if mme.M !=0 - for Mi in mme.M + for i in 1:length(mme.M) + Mi=mme.M[i] ######################################################################## # Marker Effects ######################################################################## if Mi.method in ["BayesC","BayesB","BayesA"] locus_effect_variances = (Mi.method == "BayesC" ? fill(Mi.G,Mi.nMarkers) : Mi.G) - if is_multi_trait + if is_multi_trait && mme.nnbayes_partial==false if is_mega_trait megaBayesABC!(Mi,wArray,mme.R,locus_effect_variances) else MTBayesABC!(Mi,wArray,mme.R,locus_effect_variances) end + elseif mme.nnbayes_partial==true + BayesABC!(Mi,wArray[i],mme.R[i,i],locus_effect_variances) else BayesABC!(Mi,ycorr,mme.R,locus_effect_variances) end elseif Mi.method =="RR-BLUP" - if is_multi_trait + if is_multi_trait && mme.nnbayes_partial==false if is_mega_trait megaBayesC0!(Mi,wArray,mme.R) else MTBayesC0!(Mi,wArray,mme.R) end + elseif mme.nnbayes_partial==true + BayesC0!(Mi,wArray[i],mme.R[i,i]) else BayesC0!(Mi,ycorr,mme.R) end elseif Mi.method == "BayesL" - if is_multi_trait + if is_multi_trait && mme.nnbayes_partial==false if is_mega_trait #problem with sampleGammaArray megaBayesL!(Mi,wArray,mme.R) else MTBayesL!(Mi,wArray,mme.R) end + elseif mme.nnbayes_partial==true + BayesC0!(Mi,wArray[i],mme.R[i,i]) else BayesL!(Mi,ycorr,mme.R) end elseif Mi.method == "GBLUP" - if is_multi_trait + if is_multi_trait && mme.nnbayes_partial==false if is_mega_trait megaGBLUP!(Mi,wArray,mme.R,invweights) else MTGBLUP!(Mi,wArray,ycorr,mme.R,invweights) end + elseif mme.nnbayes_partial==true + GBLUP!(Mi,wArray[i],mme.R[i,i],invweights) else GBLUP!(Mi,ycorr,mme.R,invweights) end @@ -254,7 +268,7 @@ function MCMC_BayesianAlphabet(mme,df) # Marker Inclusion Probability ######################################################################## if Mi.estimatePi == true - if is_multi_trait + if is_multi_trait && mme.nnbayes_partial==false if is_mega_trait Mi.π = [samplePi(sum(Mi.δ[i]), Mi.nMarkers) for i in 1:mme.nModels] else @@ -321,8 +335,6 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # 5. Latent Traits ######################################################################## - - #mme.M[1].genotypes here is 5-by-5 if latent_traits == true #to update ycorr! sample_latent_traits(yobs,mme,ycorr,nonlinear_function,activation_function) end diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl index b9e82c05..2ce56aab 100644 --- a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -105,3 +105,58 @@ function nnbayes_activation(activation_function) error("invalid actication function") end end + + +# below function is to modify mme from multi-trait model to multiple single trait models +# coded by Hao +function nnbayes_mega_trait(mme) + #mega_trait + if mme.nModels == 1 + error("more than 1 trait is required for MegaLMM analysis.") + end + mme.MCMCinfo.constraint = true + + ##sample from scale-inv-⁠χ2, not InverseWishart + mme.df.residual = mme.df.residual - mme.nModels + mme.scaleR = diag(mme.scaleR/(mme.df.residual - 1))*(mme.df.residual-2)/mme.df.residual #diag(R_prior_mean)*(ν-2)/ν + if mme.M != 0 + for Mi in mme.M + Mi.df = Mi.df - mme.nModels + Mi.scale = diag(Mi.scale/(Mi.df - 1))*(Mi.df-2)/Mi.df + end + end + +end + + +# below function is to modify essential parameters for partial connected NN +function nnbayes_partial_para_modify(mme) + mme.nnbayes_partial=true + for Mi in mme.M + Mi.ntraits=1 + end +end + + +# below function is to modify essential parameters for partial connected NN +function nnbayes_partial_para_modify2(mme) + for Mi in mme.M + Mi.scale = Mi.scale[1] + Mi.G = Mi.G[1,1] + Mi.genetic_variance=Mi.genetic_variance[1,1] + end +end + + +# below function is to modify essential parameters for partial connected NN +function nnbayes_partial_para_modify3(mme) + for Mi in mme.M + Mi.meanVara = Mi.meanVara[1] + Mi.meanVara2 = Mi.meanVara2[1] + Mi.meanScaleVara = Mi.meanScaleVara[1] + Mi.meanScaleVara2 = Mi.meanScaleVara2[1] + Mi.π = Mi.π[1] + Mi.mean_pi = Mi.mean_pi[1] + Mi.mean_pi2 = Mi.mean_pi2[1] + end +end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 6c66fb20..dd48cf97 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -80,7 +80,6 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, whichterm = 1 for term in modelTerms term_symbol = Symbol(split(term.trmStr,":")[end]) - traiti = term.iModel if isdefined(Main,term_symbol) #@isdefined can be usde to tests whether a local variable or object field is defined if typeof(getfield(Main,term_symbol)) == Genotypes term.random_type = "genotypes" @@ -101,6 +100,27 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end end end + + ## add trait_name for genotype + #step1. make a dict for genotype =>trait names + dict_gene_trait =Dict() + for term in modelTerms + if term.random_type == "genotypes" + gene_name=term.factors[1] + trait_name=term.iTrait + if !haskey(dict_gene_trait, gene_name) #new genotype + dict_gene_trait[gene_name] = [trait_name] # e.g., :geno1 => ["y1"] + else #existing key + dict_gene_trait[gene_name] = append!(dict_gene_trait[gene_name],[trait_name]) # e.g., :geno1 = ["y1","y2"] + end + end + end + #step2. add trait names for each genotype + for genotypei in genotypes + gene_name = Symbol(genotypei.name) + genotypei.trait_name = dict_gene_trait[gene_name] + end + #crear mme with genotypes filter!(x->x.random_type != "genotypes",modelTerms) mme = MME(nModels,modelVec,modelTerms,dict,lhsVec,R == false ? R : Float32.(R),Float32(df)) @@ -108,11 +128,17 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, mme.M = genotypes end - #latent traits - if nonlinear_function != false #NNBayes + + #NNBayes: + if nonlinear_function != false #NNBayes: check parameters again nnbayes_check_nhiddennode(num_latent_traits,mme) + #NNBayes: change parameters for partial-connected NN + if num_latent_traits == false + nnbayes_partial_para_modify(mme) + end + mme.latent_traits = true mme.nonlinear_function = nonlinear_function mme.activation_function = activation_function!=false ? nnbayes_activation(activation_function) : false diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 557cb997..8b31e426 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -102,7 +102,7 @@ function output_result(mme,output_folder, whicheffect = Mi.meanAlpha[traiti] whicheffectsd = sqrt.(abs.(Mi.meanAlpha2[traiti] .- Mi.meanAlpha[traiti] .^2)) whichdelta = Mi.meanDelta[traiti] - for traiti in 2:mme.nModels + for traiti in 2:Mi.ntraits whichtrait = vcat(whichtrait,fill(string(mme.lhsVec[traiti]),length(Mi.markerID))) whichmarker = vcat(whichmarker,Mi.markerID) whicheffect = vcat(whicheffect,Mi.meanAlpha[traiti]) @@ -253,8 +253,8 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= end if mme.M !=0 #write samples for marker effects to a text file for Mi in mme.M - for traiti in 1:ntraits - push!(outvar,"marker_effects_"*Mi.name*"_"*string(mme.lhsVec[traiti])) + for traiti in Mi.trait_name + push!(outvar,"marker_effects_"*Mi.name*"_"*traiti) end push!(outvar,"marker_effects_variances"*"_"*Mi.name) push!(outvar,"pi"*"_"*Mi.name) @@ -325,8 +325,8 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= if mme.M !=0 for Mi in mme.M - for traiti in 1:ntraits - writedlm(outfile["marker_effects_"*Mi.name*"_"*string(mme.lhsVec[traiti])],transubstrarr(Mi.markerID),',') + for traiti in Mi.trait_name + writedlm(outfile["marker_effects_"*Mi.name*"_"*traiti],transubstrarr(Mi.markerID),',') end end end @@ -374,8 +374,8 @@ function output_MCMC_samples(mme,vRes,G0, end if mme.M != 0 && outfile != false for Mi in mme.M - for traiti in 1:ntraits - writedlm(outfile["marker_effects_"*Mi.name*"_"*string(mme.lhsVec[traiti])],Mi.α[traiti]',',') + for traiti in 1:Mi.ntraits + writedlm(outfile["marker_effects_"*Mi.name*"_"*Mi.trait_name[traiti]],Mi.α[traiti],',') end if Mi.G != false if mme.nModels == 1 @@ -396,12 +396,22 @@ function output_MCMC_samples(mme,vRes,G0, end if mme.MCMCinfo.outputEBV == true - EBVmat = myEBV = getEBV(mme,1) - writedlm(outfile["EBV_"*string(mme.lhsVec[1])],myEBV',',') - for traiti in 2:ntraits - myEBV = getEBV(mme,traiti) #actually BV - writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],myEBV',',') - EBVmat = [EBVmat myEBV] + if mme.nnbayes_partial==false + EBVmat = myEBV = getEBV(mme,1) + writedlm(outfile["EBV_"*string(mme.lhsVec[1])],myEBV',',') + for traiti in 2:ntraits + myEBV = getEBV(mme,traiti) #actually BV + writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],myEBV',',') + EBVmat = [EBVmat myEBV] + end + else #NNBayes_partial + EBVmat = myEBV = mme.M[1].output_genotypes*mme.M[1].α[1] + writedlm(outfile["EBV_"*mme.M[1].trait_name[1]],myEBV',',') + for i in 2:length(mme.M) + myEBV=mme.M[i].output_genotypes*mme.M[i].α[1] + writedlm(outfile["EBV_"*mme.M[i].trait_name[1]],myEBV',',') + EBVmat = [EBVmat myEBV] + end end if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false mygvar = cov(EBVmat) diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index 432e31ae..8223a93c 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -77,7 +77,8 @@ mutable struct RandomEffect #Better to be a dict? key: term_array::Array{Abstr end mutable struct Genotypes - name #name for this category + name #name for this category, eg. "geno1" + trait_name #name for the corresponding trait, eg."y1" obsID::Array{AbstractString,1} #row ID for (imputed) genotyped and phenotyped inds (finally) markerID @@ -124,8 +125,8 @@ mutable struct Genotypes output_genotypes #output genotypes isGRM #whether genotypes or relationship matirx is provided - - Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false, + + Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false,false, a1,a2,a3,a4,a5,a6,a7,a8,a4,false, false,false,false,false, false,true,true,false, @@ -246,6 +247,7 @@ mutable struct MME weights_NN σ2_yobs activation_function + nnbayes_partial function MME(nModels,modelVec,modelTerms,dict,lhsVec,R,ν) @@ -272,6 +274,6 @@ mutable struct MME 0, false,false,false, false, - false,false,false,1.0,false) + false,false,false,1.0,false,false) end end From 8c18dc120f21288f40ab1837287d13ec1b902d10 Mon Sep 17 00:00:00 2001 From: tj Date: Thu, 17 Jun 2021 20:21:21 -0700 Subject: [PATCH 08/11] trait_name to trait_names --- src/1.JWAS/src/build_MME.jl | 12 ++++++------ src/1.JWAS/src/output.jl | 10 +++++----- src/1.JWAS/src/types.jl | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index dd48cf97..0ead40c5 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -101,24 +101,24 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end end - ## add trait_name for genotype + ## add trait_names for genotype #step1. make a dict for genotype =>trait names dict_gene_trait =Dict() for term in modelTerms if term.random_type == "genotypes" gene_name=term.factors[1] - trait_name=term.iTrait + trait_names=term.iTrait if !haskey(dict_gene_trait, gene_name) #new genotype - dict_gene_trait[gene_name] = [trait_name] # e.g., :geno1 => ["y1"] - else #existing key - dict_gene_trait[gene_name] = append!(dict_gene_trait[gene_name],[trait_name]) # e.g., :geno1 = ["y1","y2"] + dict_gene_trait[gene_name] = [trait_names] # e.g., :geno1 => ["y1"] + else #existing genotype + dict_gene_trait[gene_name] = append!(dict_gene_trait[gene_name],[trait_names]) # e.g., :geno1 = ["y1","y2"] end end end #step2. add trait names for each genotype for genotypei in genotypes gene_name = Symbol(genotypei.name) - genotypei.trait_name = dict_gene_trait[gene_name] + genotypei.trait_names = dict_gene_trait[gene_name] end #crear mme with genotypes diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 8b31e426..492cf26d 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -253,7 +253,7 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= end if mme.M !=0 #write samples for marker effects to a text file for Mi in mme.M - for traiti in Mi.trait_name + for traiti in Mi.trait_names push!(outvar,"marker_effects_"*Mi.name*"_"*traiti) end push!(outvar,"marker_effects_variances"*"_"*Mi.name) @@ -325,7 +325,7 @@ function output_MCMC_samples_setup(mme,nIter,output_samples_frequency,file_name= if mme.M !=0 for Mi in mme.M - for traiti in Mi.trait_name + for traiti in Mi.trait_names writedlm(outfile["marker_effects_"*Mi.name*"_"*traiti],transubstrarr(Mi.markerID),',') end end @@ -375,7 +375,7 @@ function output_MCMC_samples(mme,vRes,G0, if mme.M != 0 && outfile != false for Mi in mme.M for traiti in 1:Mi.ntraits - writedlm(outfile["marker_effects_"*Mi.name*"_"*Mi.trait_name[traiti]],Mi.α[traiti],',') + writedlm(outfile["marker_effects_"*Mi.name*"_"*Mi.trait_names[traiti]],Mi.α[traiti],',') end if Mi.G != false if mme.nModels == 1 @@ -406,10 +406,10 @@ function output_MCMC_samples(mme,vRes,G0, end else #NNBayes_partial EBVmat = myEBV = mme.M[1].output_genotypes*mme.M[1].α[1] - writedlm(outfile["EBV_"*mme.M[1].trait_name[1]],myEBV',',') + writedlm(outfile["EBV_"*mme.M[1].trait_names[1]],myEBV',',') for i in 2:length(mme.M) myEBV=mme.M[i].output_genotypes*mme.M[i].α[1] - writedlm(outfile["EBV_"*mme.M[i].trait_name[1]],myEBV',',') + writedlm(outfile["EBV_"*mme.M[i].trait_names[1]],myEBV',',') EBVmat = [EBVmat myEBV] end end diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index 8223a93c..aec2b395 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -78,7 +78,7 @@ end mutable struct Genotypes name #name for this category, eg. "geno1" - trait_name #name for the corresponding trait, eg."y1" + trait_names #names for the corresponding traits, eg.["y1","y2"] obsID::Array{AbstractString,1} #row ID for (imputed) genotyped and phenotyped inds (finally) markerID From 915c3d102a7acfc6eba1803b372df7cec735be75 Mon Sep 17 00:00:00 2001 From: tj Date: Thu, 17 Jun 2021 21:53:25 -0700 Subject: [PATCH 09/11] simplify EBV output for NNBayes-partial --- src/1.JWAS/src/output.jl | 41 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 492cf26d..48768d78 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -205,6 +205,12 @@ end (internal function) Get breeding values for individuals defined by outputEBV(), defaulting to all genotyped individuals. This function is used inside MCMC functions for one MCMC samples from posterior distributions. +e.g., +non-NNBayes_partial: y1=M1*α1[1]+M2*α2[1]+M3*α3[1] + y2=M1*α1[2]+M2*α2[2]+M3*α3[2]; +NNBayes_partial: y1=M1*α1[1] + y2=M2*α2[1] + y3=M3*α3[1]; """ function getEBV(mme,traiti) traiti_name = string(mme.lhsVec[traiti]) @@ -223,8 +229,15 @@ function getEBV(mme,traiti) end end if mme.M != 0 - for Mi in mme.M - EBV += Mi.output_genotypes*Mi.α[traiti] + for i in 1:length(mme.M) + Mi=mme.M[i] + if mme.nnbayes_partial==false #non-NNBayes_partial + EBV += Mi.output_genotypes*Mi.α[traiti] + else #NNBayes_partial + if i==traiti + EBV = Mi.output_genotypes*mme.M[i].α[1] + end + end end end return EBV @@ -396,23 +409,15 @@ function output_MCMC_samples(mme,vRes,G0, end if mme.MCMCinfo.outputEBV == true - if mme.nnbayes_partial==false - EBVmat = myEBV = getEBV(mme,1) - writedlm(outfile["EBV_"*string(mme.lhsVec[1])],myEBV',',') - for traiti in 2:ntraits - myEBV = getEBV(mme,traiti) #actually BV - writedlm(outfile["EBV_"*string(mme.lhsVec[traiti])],myEBV',',') - EBVmat = [EBVmat myEBV] - end - else #NNBayes_partial - EBVmat = myEBV = mme.M[1].output_genotypes*mme.M[1].α[1] - writedlm(outfile["EBV_"*mme.M[1].trait_names[1]],myEBV',',') - for i in 2:length(mme.M) - myEBV=mme.M[i].output_genotypes*mme.M[i].α[1] - writedlm(outfile["EBV_"*mme.M[i].trait_names[1]],myEBV',',') - EBVmat = [EBVmat myEBV] - end + EBVmat = myEBV = getEBV(mme,1) + writedlm(outfile["EBV_"*string(mme.lhsVec[1])],myEBV',',') + for traiti in 2:ntraits + myEBV = getEBV(mme,traiti) #actually BV + trait_name = mme.nnbayes_partial ? mme.M[traiti].trait_names[1] : string(mme.lhsVec[traiti]) + writedlm(outfile["EBV_"*trait_name],myEBV',',') + EBVmat = [EBVmat myEBV] end + if mme.MCMCinfo.output_heritability == true && mme.MCMCinfo.single_step_analysis == false mygvar = cov(EBVmat) genetic_variance = (ntraits == 1 ? mygvar : vec(mygvar)') From 0405567c120f1bdccfbaf75c66c2579cd8f8deed Mon Sep 17 00:00:00 2001 From: tj Date: Fri, 18 Jun 2021 08:33:12 -0700 Subject: [PATCH 10/11] simplify code in build_MME for partial NNBayes --- src/1.JWAS/src/Nonlinear/nnbayes_check.jl | 4 +++- src/1.JWAS/src/build_MME.jl | 26 ++++------------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl index 2ce56aab..0775c396 100644 --- a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -6,8 +6,10 @@ function nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,acti if typeof(num_latent_traits) == Int64 #fully-connected. e.g, num_latent_traits=5 printstyled(" - Neural network: fully connected neural network \n",bold=false,color=:green) printstyled(" - Number of hidden nodes: $num_latent_traits \n",bold=false,color=:green) + nnbayes_partial=false elseif num_latent_traits == false #partial-connected. printstyled(" - Neural network: partially connected neural network \n",bold=false,color=:green) + nnbayes_partial=true else error("Please check you number of latent traits") end @@ -30,6 +32,7 @@ function nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,acti else error("nonlinear_function can only be Neural Network or a user-defined nonlinear function") end + return nnbayes_partial end @@ -131,7 +134,6 @@ end # below function is to modify essential parameters for partial connected NN function nnbayes_partial_para_modify(mme) - mme.nnbayes_partial=true for Mi in mme.M Mi.ntraits=1 end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 0ead40c5..4e404b52 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -38,7 +38,7 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, num_latent_traits = false, nonlinear_function = false, activation_function = false) #nonlinear_function(x1,x2) = x1+x2 if nonlinear_function != false #NNBayes #NNBayes: check parameters - nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,activation_function) + nnbayes_partial = nnbayes_check_print_parameter(num_latent_traits,nonlinear_function,activation_function) #NNBayes: re-write model equation model_equations = nnbayes_model_equation(model_equations,num_latent_traits) @@ -85,8 +85,10 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, term.random_type = "genotypes" genotypei = getfield(Main,term_symbol) genotypei.name = string(term_symbol) + trait_names=[term.iTrait] if genotypei.name ∉ map(x->x.name, genotypes) #only save unique genotype genotypei.ntraits = nModels + genotypei.trait_names = nonlinear_function != false && nnbayes_partial==true ? trait_names : string.(lhsVec) if nModels != 1 genotypei.df = genotypei.df + nModels end @@ -101,26 +103,6 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end end - ## add trait_names for genotype - #step1. make a dict for genotype =>trait names - dict_gene_trait =Dict() - for term in modelTerms - if term.random_type == "genotypes" - gene_name=term.factors[1] - trait_names=term.iTrait - if !haskey(dict_gene_trait, gene_name) #new genotype - dict_gene_trait[gene_name] = [trait_names] # e.g., :geno1 => ["y1"] - else #existing genotype - dict_gene_trait[gene_name] = append!(dict_gene_trait[gene_name],[trait_names]) # e.g., :geno1 = ["y1","y2"] - end - end - end - #step2. add trait names for each genotype - for genotypei in genotypes - gene_name = Symbol(genotypei.name) - genotypei.trait_names = dict_gene_trait[gene_name] - end - #crear mme with genotypes filter!(x->x.random_type != "genotypes",modelTerms) mme = MME(nModels,modelVec,modelTerms,dict,lhsVec,R == false ? R : Float32.(R),Float32(df)) @@ -128,7 +110,6 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, mme.M = genotypes end - #NNBayes: if nonlinear_function != false #NNBayes: check parameters again @@ -140,6 +121,7 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, end mme.latent_traits = true + mme.nnbayes_partial = nnbayes_partial mme.nonlinear_function = nonlinear_function mme.activation_function = activation_function!=false ? nnbayes_activation(activation_function) : false end From 4a76da2760525e4841fbbf1251522ef3129c07c4 Mon Sep 17 00:00:00 2001 From: tj Date: Fri, 18 Jun 2021 09:26:02 -0700 Subject: [PATCH 11/11] modification --- src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 1 - src/1.JWAS/src/Nonlinear/nnbayes_check.jl | 7 ------- src/1.JWAS/src/build_MME.jl | 9 +++------ src/1.JWAS/src/output.jl | 4 ++-- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index ff504f3f..694f7441 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -20,7 +20,6 @@ function MCMC_BayesianAlphabet(mme,df) latent_traits = mme.latent_traits nonlinear_function = mme.nonlinear_function activation_function = mme.activation_function - partial = mme.nnbayes_partial ############################################################################ # Categorical Traits (starting values for maker effects defaulting to 0s) ############################################################################ diff --git a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl index 0775c396..67438073 100644 --- a/src/1.JWAS/src/Nonlinear/nnbayes_check.jl +++ b/src/1.JWAS/src/Nonlinear/nnbayes_check.jl @@ -132,13 +132,6 @@ function nnbayes_mega_trait(mme) end -# below function is to modify essential parameters for partial connected NN -function nnbayes_partial_para_modify(mme) - for Mi in mme.M - Mi.ntraits=1 - end -end - # below function is to modify essential parameters for partial connected NN function nnbayes_partial_para_modify2(mme) diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index 4e404b52..79d1add2 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -87,8 +87,9 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, genotypei.name = string(term_symbol) trait_names=[term.iTrait] if genotypei.name ∉ map(x->x.name, genotypes) #only save unique genotype - genotypei.ntraits = nModels - genotypei.trait_names = nonlinear_function != false && nnbayes_partial==true ? trait_names : string.(lhsVec) + is_nnbayes_partial = nonlinear_function != false && nnbayes_partial==true + genotypei.ntraits = is_nnbayes_partial ? 1 : nModels + genotypei.trait_names = is_nnbayes_partial ? trait_names : string.(lhsVec) if nModels != 1 genotypei.df = genotypei.df + nModels end @@ -115,10 +116,6 @@ function build_model(model_equations::AbstractString, R = false; df = 4.0, #NNBayes: check parameters again nnbayes_check_nhiddennode(num_latent_traits,mme) - #NNBayes: change parameters for partial-connected NN - if num_latent_traits == false - nnbayes_partial_para_modify(mme) - end mme.latent_traits = true mme.nnbayes_partial = nnbayes_partial diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index 48768d78..f3545368 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -206,8 +206,8 @@ end defaulting to all genotyped individuals. This function is used inside MCMC functions for one MCMC samples from posterior distributions. e.g., -non-NNBayes_partial: y1=M1*α1[1]+M2*α2[1]+M3*α3[1] - y2=M1*α1[2]+M2*α2[2]+M3*α3[2]; +non-NNBayes_partial (multi-classs Bayes) : y1=M1*α1[1]+M2*α2[1]+M3*α3[1] + y2=M1*α1[2]+M2*α2[2]+M3*α3[2]; NNBayes_partial: y1=M1*α1[1] y2=M2*α2[1] y3=M3*α3[1];