Skip to content

Commit

Permalink
Merge pull request #4 from tbenavi1/fastK_compatibility
Browse files Browse the repository at this point in the history
making genomescope 2.0 compatible with FastK
  • Loading branch information
mschatz authored Apr 8, 2024
2 parents da24a1a + 0faf3f8 commit 5063303
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 25 deletions.
51 changes: 28 additions & 23 deletions R/report_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
kmer_hist_transform = kmer_hist_orig
kmer_hist_transform$V2 = as.numeric(kmer_hist_transform$V1)**transform_exp * as.numeric(kmer_hist_transform$V2)
model = container[[1]]
ISCROPPED = abs(nrow(kmer_hist) - nrow(kmer_hist_orig)) > 1 # the current version has difference one even when cutoff was not used (the last position is excluded for the fit but restored for the genome size est)

#automatically zoom into the relevant regions of the plot, ignore first 15 positions
xmax=length(x)
Expand Down Expand Up @@ -120,7 +121,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
#rect(0, 0, max(kmer_hist_orig[[1]])*1.1 , max(kmer_hist_orig[[2]])*1.1, col=COLOR_BGCOLOR)
rect(0, 0, x_limit_orig*1.1 , y_limit_orig*1.1, col=COLOR_BGCOLOR)
points(kmer_hist_orig, type="h", col=COLOR_HIST, lwd=2)
# if(length(kmer_hist[,1])!=length(kmer_hist_orig[,1])){
# if( ISCROPPED ){
# abline(v=length(kmer_hist[,1]),col=COLOR_COVTHRES,lty="dashed", lwd=3)
# }
box(col="black")
Expand All @@ -134,7 +135,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
#rect(0, 0, max(kmer_hist_orig[[1]])*1.1 , max(kmer_hist_orig[[2]])*1.1, col=COLOR_BGCOLOR)
rect(0, 0, x_limit*1.1 , y_limit*1.1, col=COLOR_BGCOLOR)
points(kmer_hist_transform, type="h", col=COLOR_HIST, lwd=2)
# if(length(kmer_hist[,1])!=length(kmer_hist_orig[,1])){
# if( ISCROPPED ){
# abline(v=length(kmer_hist[,1]),col=COLOR_COVTHRES,lty="dashed", lwd=3)
# }
box(col="black")
Expand All @@ -148,7 +149,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
cex.lab=font_size, cex.axis=font_size, cex.main=font_size, cex.sub=font_size)
rect(1e-10, 1e-10, max(kmer_hist_orig[,1])*10 , max(kmer_hist_orig[,2])*10, col=COLOR_BGCOLOR)
points(kmer_hist_orig, type="h", col=COLOR_HIST, lwd=2)
if(length(kmer_hist[,1])!=length(kmer_hist_orig[,1])){
if( ISCROPPED ){
abline(v=length(kmer_hist[,1]),col=COLOR_COVTHRES,lty="dashed", lwd=3)
}
box(col="black")
Expand All @@ -161,16 +162,16 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
cex.lab=font_size, cex.axis=font_size, cex.main=font_size, cex.sub=font_size)
rect(1e-10, 1e-10, max(kmer_hist_transform[,1])*10 , max(kmer_hist_transform[,2])*10, col=COLOR_BGCOLOR)
points(kmer_hist_transform, type="h", col=COLOR_HIST, lwd=2)
if(length(kmer_hist[,1])!=length(kmer_hist_transform[,1])){
if( ISCROPPED ){
abline(v=length(kmer_hist[,1]),col=COLOR_COVTHRES,lty="dashed", lwd=3)
}
box(col="black")


if(!is.null(model))
{
x=kmer_hist[[1]]
y=kmer_hist[[2]]
x=kmer_hist_orig[[1]]
y=kmer_hist_orig[[2]]
y_transform = as.numeric(x)**transform_exp*as.numeric(y)

## The model converged!
Expand Down Expand Up @@ -244,9 +245,13 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
error_rate = 1-(1-(total_error_kmers/total_kmers))**(1/k)
error_rate = c(error_rate, error_rate)

# print(paste("Total:", total_kmers, "Total err: ", total_error_kmers, "kcov:", kcov)) # sanity testing lines :-)

total_len = (total_kmers-total_error_kmers)/(p*kcov)
atotal_len = (total_kmers-total_error_kmers)/(p*akcov)

# print(paste("Total:", total_len, "Atotal: ", atotal_len)) # sanity testing lines :-)

## find kmers that fit the p peak model (no repeats)
if (p==1)
{
Expand Down Expand Up @@ -403,7 +408,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
dev.set(dev.next())

## Finish Linear Plot
title(paste("\n\nlen:", prettyNum(total_len[1], big.mark=","),
title(paste("\n\nlen:", prettyNum(atotal_len, big.mark=","),
"bp",
" uniq:", format(100*(unique_len[1]/total_len[1]), digits=3),
"% ", "\n",
Expand Down Expand Up @@ -452,7 +457,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
dev.set(dev.next())

## Finish Linear Plot
title(paste("\n\nlen:", prettyNum(total_len[1], big.mark=","),
title(paste("\n\nlen:", prettyNum(atotal_len, big.mark=","),
"bp",
" uniq:", format(100*(unique_len[1]/total_len[1]), digits=3),
"% ", "\n",
Expand Down Expand Up @@ -501,7 +506,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
dev.set(dev.next())

## Finish Log plot
title(paste("\n\nlen:", prettyNum(total_len[1], big.mark=","),
title(paste("\n\nlen:", prettyNum(atotal_len, big.mark=","),
"bp",
" uniq:", format(100*(unique_len[1]/total_len[1]), digits=3),
"% ", "\n",
Expand Down Expand Up @@ -531,20 +536,20 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
}

## Add legend
if(length(kmer_hist[,1])==length(kmer_hist_orig[,1]))
if( !ISCROPPED )
{
if (NO_UNIQUE_SEQUENCE) {
legend(exp(.62 * log(max(x))), 1.0 * max(y),
legend=c("observed", "full model", "errors", "kmer-peaks"),
lty=c("solid", "solid", "solid", "dashed"),
lwd=c(3,3,3,3),
lwd=c(3,3,3,2),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_ERRORS, COLOR_KMERPEAK),
bg="white")
} else {
legend(exp(.62 * log(max(x))), 1.0 * max(y),
legend=c("observed", "full model", "unique sequence", "errors", "kmer-peaks"),
lty=c("solid", "solid", "solid", "solid", "dashed"),
lwd=c(3,3,3,3,3),
lwd=c(3,3,3,3,2),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_pPEAK, COLOR_ERRORS, COLOR_KMERPEAK),
bg="white")
}
Expand Down Expand Up @@ -573,7 +578,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
dev.set(dev.next())

## Finish Log plot
title(paste("\n\nlen:", prettyNum(total_len[1], big.mark=","),
title(paste("\n\nlen:", prettyNum(atotal_len, big.mark=","),
"bp",
" uniq:", format(100*(unique_len[1]/total_len[1]), digits=3),
"% ", "\n",
Expand Down Expand Up @@ -603,7 +608,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
}

## Add legend
if(length(kmer_hist[,1])==length(kmer_hist_orig[,1]))
if( ISCROPPED )
{
if (NO_UNIQUE_SEQUENCE) {
legend(exp(.62 * log(max(x))), 1.0 * max(y),
Expand All @@ -626,18 +631,18 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
if (NO_UNIQUE_SEQUENCE) {
legend("topright",
##legend(exp(.62 * log(max(x))), 1.0 * max(y),
legend=c("observed", "full model", "errors", "kmer-peaks","cov-threshold"),
lty=c("solid", "solid", "solid", "dashed", "dashed"),
lwd=c(3,3,3,2,3),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_ERRORS, COLOR_KMERPEAK, COLOR_COVTHRES),
legend=c("observed", "full model", "errors", "kmer-peaks"),
lty=c("solid", "solid", "solid", "dashed"),
lwd=c(3,3,3,2),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_ERRORS, COLOR_KMERPEAK),
bg="white")
} else {
legend("topright",
##legend(exp(.62 * log(max(x))), 1.0 * max(y),
legend=c("observed", "full model", "unique sequence", "errors", "kmer-peaks","cov-threshold"),
lty=c("solid", "solid", "solid", "solid", "dashed", "dashed"),
lwd=c(3,3,3,3,2,3),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_pPEAK, COLOR_ERRORS, COLOR_KMERPEAK, COLOR_COVTHRES),
legend=c("observed", "full model", "unique sequence", "errors", "kmer-peaks"),
lty=c("solid", "solid", "solid", "solid", "dashed"),
lwd=c(3,3,3,3,2),
col=c(COLOR_HIST, COLOR_2pPEAK, COLOR_pPEAK, COLOR_ERRORS, COLOR_KMERPEAK),
bg="white")
}
}
Expand All @@ -649,7 +654,7 @@ report_results<-function(kmer_hist,kmer_hist_orig, k, p, container, foldername,
" kcov:", format(akcov, digits=3),
" err:", format(error_rate[1], digits=3),
" model fit:", format(adups, digits=3),
" len:", round(total_len[1]), "\n", sep=""))
" len:", round(atotal_len), "\n", sep=""))
}
}
else
Expand Down
4 changes: 2 additions & 2 deletions genomescope.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ if (is.null(arguments$input) | is.null(arguments$output)) {
minkmerx = 2;
}

kmer_prof <- kmer_prof[c(minkmerx:(length(kmer_prof[,2])-1)),] #get rid of the last position
kmer_prof_orig <- kmer_prof
kmer_prof_orig <- kmer_prof # kmer_prof_orig will now store all including the last position
kmer_prof <- kmer_prof[c(minkmerx:(nrow(kmer_prof)-1)),] #get rid of the last position

## try to find the local minimum between errors and the first (heterozygous) peak
kmer_trans = as.numeric(kmer_prof[,1])**transform_exp*as.numeric(kmer_prof[,2])
Expand Down

0 comments on commit 5063303

Please sign in to comment.