Skip to content

Commit

Permalink
fixed the bug of unmatched rows
Browse files Browse the repository at this point in the history
  • Loading branch information
YinLiLin committed Dec 30, 2024
1 parent 2c5f851 commit 4661404
Show file tree
Hide file tree
Showing 12 changed files with 316 additions and 266 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: rMVP
Type: Package
Title: Memory-Efficient, Visualize-Enhanced, Parallel-Accelerated GWAS
Tool
Version: 1.3.0
Date: 2024-12-12
Version: 1.3.5
Date: 2024-12-30
Authors@R: c( person("Lilin", "Yin", role = "aut"),
person("Haohao", "Zhang", role = "aut"),
person("Zhenshuang", "Tang", role = "aut"),
Expand Down
17 changes: 10 additions & 7 deletions R/MVP.FarmCPU.r
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@

if(!is.big.matrix(geno)) stop("genotype should be in 'big.matrix' format.")
if(sum(is.na(phe[, 2])) != 0) stop("NAs are not allowed in phenotype.")
if(nrow(map) != ncol(geno) & nrow(map) != nrow(geno)) stop("The number of markers in genotype and map doesn't match!")
mrk_bycol <- ncol(geno) == nrow(map)
if(is.null(ind_idx)){
if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
Expand Down Expand Up @@ -242,7 +244,7 @@
theCV=cbind(CV,myRemove$bin)
}

myGLM=FarmCPU.LM(y=phe[,2],GDP=geno,GDP_index=ind_idx,w=theCV,maxLine=maxLine,ncpus=ncpus,npc=npc, verbose=verbose)
myGLM=FarmCPU.LM(y=phe[,2],GDP=geno,GDP_index=ind_idx,GDP_mrk_bycol=mrk_bycol,w=theCV,maxLine=maxLine,ncpus=ncpus,npc=npc, verbose=verbose)
if(!is.null(seqQTN)){
if(ncol(myGLM$P) != (npc + length(seqQTN) + 1)) stop("wrong dimensions.")
}
Expand Down Expand Up @@ -499,7 +501,7 @@ FarmCPU.BIN <-

if(is.null(P)) return(list(bin=NULL,binmap=NULL,seqQTN=NULL))

if(nrow(Y) == nrow(GDP)){
if(nrow(GM) == ncol(GDP)){
if(is.null(GDP_index)) GDP_index=seq(1, nrow(GDP))
}else{
if(is.null(GDP_index)) GDP_index=seq(1, ncol(GDP))
Expand Down Expand Up @@ -558,7 +560,7 @@ FarmCPU.BIN <-
GP=cbind(GM,P,NA,NA,NA)
mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin,inclosure.size=inc)
seqQTN=which(mySpecify$index==TRUE)
if(nrow(Y) == nrow(GDP)){
if(nrow(GM) == ncol(GDP)){
GK=GDP[GDP_index, seqQTN]
}else{
GK=t(GDP[seqQTN, GDP_index])
Expand Down Expand Up @@ -606,7 +608,7 @@ FarmCPU.BIN <-
GP=cbind(GM,P,NA,NA,NA)
mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin,inclosure.size=inc)
seqQTN=which(mySpecify$index==TRUE)
if(nrow(Y) == nrow(GDP)){
if(nrow(GM) == ncol(GDP)){
GK=GDP[GDP_index, seqQTN]
}else{
GK=t(GDP[seqQTN, GDP_index])
Expand Down Expand Up @@ -647,7 +649,7 @@ FarmCPU.BIN <-
GP=cbind(GM,P,NA,NA,NA)
mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin[ii],inclosure.size=inc[ii])
seqQTN=which(mySpecify$index==TRUE)
if(nrow(Y) == nrow(GDP)){
if(nrow(GM) == ncol(GDP)){
GK=GDP[GDP_index, seqQTN]
}else{
GK=t(GDP[seqQTN, GDP_index])
Expand Down Expand Up @@ -775,6 +777,7 @@ FarmCPU.Specify <-
#' @param w covariates, n by c matrix, n is sample size, c is number of covariates
#' @param GDP genotype
#' @param GDP_index index of effective genotyped individuals
#' @param GDP_mrk_bycol whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)
#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
#' @param ncpus number of threads used for parallele computation
#' @param npc number of covariates without pseudo QTNs
Expand All @@ -787,7 +790,7 @@ FarmCPU.Specify <-
#'
#' @keywords internal
FarmCPU.LM <-
function(y, w=NULL, GDP, GDP_index=NULL, maxLine=5000, ncpus=2, npc=0, verbose=TRUE){
function(y, w=NULL, GDP, GDP_index=NULL, GDP_mrk_bycol=TRUE, maxLine=5000, ncpus=2, npc=0, verbose=TRUE){
#print("FarmCPU.LM started")
if(is.null(y)) return(NULL)
if(is.null(GDP)) return(NULL)
Expand Down Expand Up @@ -863,7 +866,7 @@ FarmCPU.LM <-
# if(ncol(w) == 50) write.csv(P, "P.csv")

mkl_env({
results <- glm_c(y=y, X=w, iXX = wwi, GDP@address, geno_ind=GDP_index, step=maxLine, verbose=verbose, threads=ncpus)
results <- glm_c(y=y, X=w, iXX = wwi, GDP@address, geno_ind=GDP_index, marker_bycol=GDP_mrk_bycol, step=maxLine, verbose=verbose, threads=ncpus)
})
return(list(P=results[ ,-c(1:3), drop=FALSE], betapred=betapred, sepred=sepred, B=results[ , 1, drop=FALSE], S=results[ , 2, drop=FALSE]))
# return(list(P=P, betapred=betapred, B=as.matrix(B), S=S))
Expand Down
9 changes: 8 additions & 1 deletion R/MVP.GLM.r
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#' @param CV Covariance, design matrix(n * x) for the fixed effects
#' @param ind_idx the index of effective genotyped individuals
#' @param mrk_idx the index of effective markers used in analysis
#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)
#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
#' @param cpu number of cpus used for parallel computation
#' @param verbose whether to print detail.
Expand Down Expand Up @@ -52,6 +53,7 @@ function(
CV=NULL,
ind_idx=NULL,
mrk_idx=NULL,
mrk_bycol=TRUE,
maxLine=5000,
cpu=1,
verbose=TRUE
Expand All @@ -60,6 +62,11 @@ function(
if(is.null(ind_idx)){
if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
if(mrk_bycol){
if(nrow(phe) != nrow(geno)) stop("set the argument 'mrk_bycol=FALSE'?")
}else{
if(nrow(phe) != ncol(geno)) stop("set the argument 'mrk_bycol=TRUE'?")
}
}else{
n <- length(ind_idx)
if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
Expand All @@ -84,7 +91,7 @@ function(
logging.log("scanning...\n", verbose = verbose)

mkl_env({
results <- glm_c(y = ys, X = X0, iXX = iX0X0, geno@address, geno_ind = ind_idx, marker_ind = mrk_idx, step = maxLine, verbose = verbose, threads = cpu)
results <- glm_c(y = ys, X = X0, iXX = iX0X0, geno@address, geno_ind = ind_idx, marker_ind = mrk_idx, marker_bycol = mrk_bycol, step = maxLine, verbose = verbose, threads = cpu)
})

return(results[, c(1, 2, ncol(results))])
Expand Down
9 changes: 8 additions & 1 deletion R/MVP.MLM.r
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#' @param CV covariates
#' @param ind_idx the index of effective genotyped individuals
#' @param mrk_idx the index of effective markers used in analysis
#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)
#' @param REML a list that contains ve and vg
#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
#' @param cpu number of cpus used for parallel computation
Expand Down Expand Up @@ -59,6 +60,7 @@ function(
CV=NULL,
ind_idx=NULL,
mrk_idx=NULL,
mrk_bycol=TRUE,
REML=NULL,
maxLine=5000,
cpu=1,
Expand All @@ -69,6 +71,11 @@ function(
if(is.null(ind_idx)){
if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
if(mrk_bycol){
if(nrow(phe) != nrow(geno)) stop("set the argument 'mrk_bycol=FALSE'?")
}else{
if(nrow(phe) != ncol(geno)) stop("set the argument 'mrk_bycol=TRUE'?")
}
}else{
n <- length(ind_idx)
if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
Expand Down Expand Up @@ -125,7 +132,7 @@ function(

logging.log("scanning...\n", verbose = verbose)
mkl_env({
results <- mlm_c(y = ys, X = X0, U = U, vgs = vgs, geno@address, ind_idx, mrk_idx, step = maxLine, verbose = verbose, threads = cpu)
results <- mlm_c(y = ys, X = X0, U = U, vgs = vgs, geno@address, geno_ind = ind_idx, marker_ind = mrk_idx, marker_bycol = mrk_bycol, step = maxLine, verbose = verbose, threads = cpu)
})

return(results)
Expand Down
10 changes: 5 additions & 5 deletions R/MVP.r
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
logging.log(paste("Markers are detected to be stored by", ifelse(MrkByCol, "column", "row")), "\n", verbose = verbose)
logging.log("Analyzed trait:", colnames(phe)[2], "\n", verbose = verbose)
logging.log("Number of threads used:", ncpus, "\n", verbose = verbose)
hpclib <- grepl("mkl", sessionInfo()$LAPACK) | grepl("openblas", sessionInfo()$LAPACK) | !inherits(try(Revo.version, silent=TRUE),"try-error")
hpclib <- grepl("mkl", sessionInfo()$LAPACK) | grepl("openblas", sessionInfo()$LAPACK) | eval(parse(text = "!inherits(try(Revo.version,silent=TRUE),'try-error')"))
if(!hpclib){
logging.log("No high performance math library detected! The computational efficiency would be greatly reduced\n", verbose = verbose)
}else{
if(grepl("mkl", sessionInfo()$LAPACK) | !inherits(try(Revo.version, silent=TRUE),"try-error")){
if(grepl("mkl", sessionInfo()$LAPACK) | eval(parse(text = "!inherits(try(Revo.version,silent=TRUE),'try-error')"))){
logging.log("Math Kernel Library is detected, nice job!\n", verbose = verbose)
}else{
logging.log("OpenBLAS Library is detected, nice job!\n", verbose = verbose)
Expand Down Expand Up @@ -190,7 +190,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
}
}

logging.log("Check if NAs exist in genotype...", "\n", verbose = verbose)
logging.log("Check if NAs exist in genotype", "\n", verbose = verbose)
if(hasNA(geno@address, mrkbycol = MrkByCol, geno_ind = seqTaxa, threads = ncpus)) stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.")

logging.log("Calculate allele frequency...", "\n", verbose = verbose)
Expand Down Expand Up @@ -340,7 +340,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
logging.log("-------------------------GWAS Start-------------------------", "\n", verbose = verbose)
if (glm.run) {
logging.log("General Linear Model (GLM) Start...", "\n", verbose = verbose)
glm.results <- MVP.GLM(phe=phe, geno=geno, CV=CV.GLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine = maxLine, cpu=ncpus, verbose = verbose);gc()
glm.results <- MVP.GLM(phe=phe, geno=geno, CV=CV.GLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, mrk_bycol = MrkByCol, maxLine = maxLine, cpu=ncpus, verbose = verbose);gc()
colnames(glm.results) <- c("Effect", "SE", paste(colnames(phe)[2],"GLM",sep="."))
z = glm.results[, 1]/glm.results[, 2]
lambda = median(z^2, na.rm=TRUE)/qchisq(1/2, df = 1,lower.tail=FALSE)
Expand All @@ -355,7 +355,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,

if (mlm.run) {
logging.log("Mixed Linear Model (MLM) Start...", "\n", verbose = verbose)
mlm.results <- MVP.MLM(phe=phe, geno=geno, K=K, eigenK=eigenK, CV=CV.MLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine = maxLine, cpu=ncpus, vc.method=vc.method, verbose = verbose);gc()
mlm.results <- MVP.MLM(phe=phe, geno=geno, K=K, eigenK=eigenK, CV=CV.MLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, mrk_bycol = MrkByCol, maxLine = maxLine, cpu=ncpus, vc.method=vc.method, verbose = verbose);gc()
colnames(mlm.results) <- c("Effect", "SE", paste(colnames(phe)[2],"MLM",sep="."))
z = mlm.results[, 1]/mlm.results[, 2]
lambda = median(z^2, na.rm=TRUE)/qchisq(1/2, df = 1,lower.tail=FALSE)
Expand Down
20 changes: 6 additions & 14 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ getRow <- function(pBigMat, row) {
.Call(`_rMVP_getRow`, pBigMat, row)
}

glm_c <- function(y, X, iXX, pBigMat, geno_ind = NULL, marker_ind = NULL, step = 10000L, verbose = TRUE, threads = 0L) {
.Call(`_rMVP_glm_c`, y, X, iXX, pBigMat, geno_ind, marker_ind, step, verbose, threads)
glm_c <- function(y, X, iXX, pBigMat, geno_ind = NULL, marker_ind = NULL, marker_bycol = TRUE, step = 10000L, verbose = TRUE, threads = 0L) {
.Call(`_rMVP_glm_c`, y, X, iXX, pBigMat, geno_ind, marker_ind, marker_bycol, step, verbose, threads)
}

mlm_c <- function(y, X, U, vgs, pBigMat, geno_ind = NULL, marker_ind = NULL, step = 10000L, verbose = TRUE, threads = 0L) {
.Call(`_rMVP_mlm_c`, y, X, U, vgs, pBigMat, geno_ind, marker_ind, step, verbose, threads)
mlm_c <- function(y, X, U, vgs, pBigMat, geno_ind = NULL, marker_ind = NULL, marker_bycol = TRUE, step = 10000L, verbose = TRUE, threads = 0L) {
.Call(`_rMVP_mlm_c`, y, X, U, vgs, pBigMat, geno_ind, marker_ind, marker_bycol, step, verbose, threads)
}

vcf_parser_map <- function(vcf_file, out) {
Expand Down Expand Up @@ -61,16 +61,8 @@ hasNA <- function(pBigMat, mrkbycol = TRUE, geno_ind = NULL, marker_ind = NULL,
.Call(`_rMVP_hasNA`, pBigMat, mrkbycol, geno_ind, marker_ind, threads)
}

BigRowMean <- function(pBigMat, mrkbycol = TRUE, threads = 0L, geno_ind = NULL) {
.Call(`_rMVP_BigRowMean`, pBigMat, mrkbycol, threads, geno_ind)
}

kin_cal_m <- function(pBigMat, threads = 0L, verbose = TRUE) {
.Call(`_rMVP_kin_cal_m`, pBigMat, threads, verbose)
}

kin_cal_s <- function(pBigMat, threads = 0L, mkl = FALSE, verbose = TRUE) {
.Call(`_rMVP_kin_cal_s`, pBigMat, threads, mkl, verbose)
BigRowMean <- function(pBigMat, marker_bycol = TRUE, step = 10000L, threads = 0L, geno_ind = NULL, verbose = TRUE) {
.Call(`_rMVP_BigRowMean`, pBigMat, marker_bycol, step, threads, geno_ind, verbose)
}

kin_cal <- function(pBigMat, geno_ind = NULL, marker_ind = NULL, marker_freq = NULL, marker_bycol = TRUE, threads = 0L, step = 10000L, mkl = FALSE, verbose = TRUE) {
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
</a>
</p>

### #----------------------***rMVP [v1.1.0]() is coming, and stronger again!***------------------------#
### #----------------------***rMVP [v1.3.5]() is coming, and stronger and faster again!***------------------------#

### Repos:
**Github:** https://github.com/xiaolei-lab/rMVP
Expand Down
3 changes: 3 additions & 0 deletions man/MVP.GLM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/MVP.MLM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4661404

Please sign in to comment.