From 20aeb057b609180b8cffa1873df75fe8e345e94d Mon Sep 17 00:00:00 2001
From: chr1swallace <cew54@cam.ac.uk>
Date: Thu, 13 Jun 2024 16:08:48 +0100
Subject: [PATCH 1/3] added effect_prior to approx.bf.estimates

---
 DESCRIPTION           |  2 ++
 NAMESPACE             |  3 +++
 R/claudia.R           | 47 +++++++++++++++++++++++++++++--------------
 R/coloc-package.R     |  1 +
 man/annotate_susie.Rd |  4 ++++
 5 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5bb2689..15d994a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,6 +5,7 @@ Imports:
     ggplot2,
     methods,
     viridis,
+    matrixStats,
     stats,
     grDevices,
     susieR (>= 0.12.06),
@@ -42,6 +43,7 @@ Collate:
     'boundaries.R'
     'check.R'
     'claudia.R'
+    'compare.R'
     'plot.R'
     'private.R'
     'sensitivity.R'
diff --git a/NAMESPACE b/NAMESPACE
index ae5c4c4..0f2511b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(compare,bf_bf)
 S3method(plot,coloc_abf)
 S3method(print,coloc_abf)
 export(annotate_susie)
@@ -9,6 +10,7 @@ export(check_alignment)
 export(check_dataset)
 export(coloc.abf)
 export(coloc.bf_bf)
+export(coloc.compare)
 export(coloc.signals)
 export(coloc.susie)
 export(coloc.susie_bf)
@@ -39,6 +41,7 @@ importFrom(graphics,points)
 importFrom(graphics,rect)
 importFrom(graphics,text)
 importFrom(graphics,title)
+importFrom(matrixStats,logSumExp)
 importFrom(methods,as)
 importFrom(methods,is)
 importFrom(methods,new)
diff --git a/R/claudia.R b/R/claudia.R
index 3b4f4af..f0c9869 100644
--- a/R/claudia.R
+++ b/R/claudia.R
@@ -93,8 +93,9 @@ approx.bf.p <- function(p,f,type, N, s, suffix=NULL) {
 ##' @inheritParams approx.bf.p
 ##' @return data.frame containing lABF and intermediate calculations
 ##' @author Vincent Plagnol, Chris Wallace
-approx.bf.estimates <- function (z, V, type, suffix=NULL, sdY=1) {
-    sd.prior <- if (type == "quant") { 0.15*sdY } else { 0.2 }
+approx.bf.estimates <- function (z, V, type, suffix=NULL, sdY=1,
+                                 effect_priors=c(quant=0.15,cc=0.2)) {
+    sd.prior <- if (type == "quant") { effect_priors["quant"] * sdY } else { effect_priors["cc"] }
     r <- sd.prior^2/(sd.prior^2 + V)
     lABF = 0.5 * (log(1 - r) + (r * z^2))
     ret <- data.frame(V, z, r, lABF)
@@ -163,10 +164,12 @@ sdY.est <- function(vbeta, maf, n) {
 ##' @title process.dataset
 ##' @param d list
 ##' @param suffix "df1" or "df2"
+##' @param ... used to pass parameters to approx.bf.estimates, in
+##'     particular the effect_priors parameter
 ##' @return data.frame with log(abf) or log(bf)
 ##' @export
 ##' @author Chris Wallace
-process.dataset <- function(d, suffix) {
+process.dataset <- function(d, suffix, ...) {
                                         #message('Processing dataset')
 
     nd <- names(d)
@@ -307,22 +310,36 @@ adjust_prior=function(p,nsnps,suffix="") {
 ##' coefficients should be used if available.
 ##' 
 ##' @title Fully Bayesian colocalisation analysis using Bayes Factors
-##' @param dataset1 a list with specifically named elements defining the dataset
-##'   to be analysed. See \code{\link{check_dataset}} for details.
+##' @param dataset1 a list with specifically named elements defining
+##'     the dataset to be analysed. See \code{\link{check_dataset}}
+##'     for details.
 ##' @param dataset2 as above, for dataset 2
-##' @param MAF Common minor allele frequency vector to be used for both dataset1 and dataset2, a shorthand for supplying the same vector as parts of both datasets
-##' @param p1 prior probability a SNP is associated with trait 1, default 1e-4
-##' @param p2 prior probability a SNP is associated with trait 2, default 1e-4
-##' @param p12 prior probability a SNP is associated with both traits, default 1e-5
-##' @return a list of two \code{data.frame}s:
-##' \itemize{
-##' \item summary is a vector giving the number of SNPs analysed, and the posterior probabilities of H0 (no causal variant), H1 (causal variant for trait 1 only), H2 (causal variant for trait 2 only), H3 (two distinct causal variants) and H4 (one common causal variant)
-##' \item results is an annotated version of the input data containing log Approximate Bayes Factors and intermediate calculations, and the posterior probability SNP.PP.H4 of the SNP being causal for the shared signal *if* H4 is true. This is only relevant if the posterior support for H4 in summary is convincing.
-##' }
+##' @param MAF Common minor allele frequency vector to be used for
+##'     both dataset1 and dataset2, a shorthand for supplying the same
+##'     vector as parts of both datasets
+##' @param p1 prior probability a SNP is associated with trait 1,
+##'     default 1e-4
+##' @param p2 prior probability a SNP is associated with trait 2,
+##'     default 1e-4
+##' @param p12 prior probability a SNP is associated with both traits,
+##'     default 1e-5
+##' @param ... used to pass parameters to approx.bf.estimates, in
+##'     particular the effect_priors parameter
+##' @return a list of two \code{data.frame}s: \itemize{ \item summary
+##'     is a vector giving the number of SNPs analysed, and the
+##'     posterior probabilities of H0 (no causal variant), H1 (causal
+##'     variant for trait 1 only), H2 (causal variant for trait 2
+##'     only), H3 (two distinct causal variants) and H4 (one common
+##'     causal variant) \item results is an annotated version of the
+##'     input data containing log Approximate Bayes Factors and
+##'     intermediate calculations, and the posterior probability
+##'     SNP.PP.H4 of the SNP being causal for the shared signal *if*
+##'     H4 is true. This is only relevant if the posterior support for
+##'     H4 in summary is convincing.  }
 ##' @author Claudia Giambartolomei, Chris Wallace
 ##' @export
 coloc.abf <- function(dataset1, dataset2, MAF=NULL, 
-                      p1=1e-4, p2=1e-4, p12=1e-5) {
+                      p1=1e-4, p2=1e-4, p12=1e-5,...) {
 
     if(!("MAF" %in% names(dataset1)) & !is.null(MAF))
         dataset1$MAF <- MAF
diff --git a/R/coloc-package.R b/R/coloc-package.R
index 8f2116b..620c172 100644
--- a/R/coloc-package.R
+++ b/R/coloc-package.R
@@ -18,5 +18,6 @@
 #' @importFrom grDevices colorRampPalette palette rgb
 #' @importFrom stats pnorm uniroot
 #' @importFrom susieR susie_rss susie_get_cs
+#' @importFrom matrixStats logSumExp
 utils::globalVariables(c(".","dfsane","dmvnorm","H0","H1","H2","H3","H4","hit1","hit2","lABF.df1","lABF.df2","lABF.h3","lbf1","lbf2","lbf3","lbf4","nsnps","snp","snp1","snp2","varbeta","z"))
 NULL
diff --git a/man/annotate_susie.Rd b/man/annotate_susie.Rd
index d742409..d5aa18b 100644
--- a/man/annotate_susie.Rd
+++ b/man/annotate_susie.Rd
@@ -10,6 +10,10 @@ annotate_susie(res, snp, LD)
 \item{res}{output of susie_rss()}
 
 \item{snp}{vector of snp identifiers}
+
+\item{LD}{matrix of LD (r) between snps in snp
+identifiers. Columns, rows should be named by a string that
+exists in the vector snp}
 }
 \value{
 res with column names added to some components

From 7dd92d9021158284981f5980edf556c542204af3 Mon Sep 17 00:00:00 2001
From: chr1swallace <cew54@cam.ac.uk>
Date: Thu, 13 Jun 2024 16:12:21 +0100
Subject: [PATCH 2/3] added effect_prior to approx.bf.estimates

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 15d994a..97f28ca 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,7 +43,6 @@ Collate:
     'boundaries.R'
     'check.R'
     'claudia.R'
-    'compare.R'
     'plot.R'
     'private.R'
     'sensitivity.R'

From c66d8eac410de8b8346a6edf584ffda4236b61f6 Mon Sep 17 00:00:00 2001
From: Chris Wallace <cew54@cam.ac.uk>
Date: Tue, 16 Jul 2024 10:59:13 +0100
Subject: [PATCH 3/3] weird doc issues

---
 .gitignore                 |  1 +
 NAMESPACE                  |  2 --
 man/approx.bf.estimates.Rd |  9 +++++++-
 man/coloc.abf.Rd           | 47 ++++++++++++++++++++++++++++----------
 man/coloc.bf_bf.Rd         |  9 +++++---
 man/coloc.detail.Rd        | 18 ++++++++++-----
 man/coloc.process.Rd       |  9 +++++---
 man/coloc.signals.Rd       | 18 ++++++++++-----
 man/coloc.susie_bf.Rd      | 14 ++++++++----
 man/combine.abf.Rd         |  9 +++++---
 man/process.dataset.Rd     |  5 +++-
 11 files changed, 99 insertions(+), 42 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3a2a582..bbd5907 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 inst/doc
 doc
 Meta
+*~
diff --git a/NAMESPACE b/NAMESPACE
index 0f2511b..c9d50d4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(compare,bf_bf)
 S3method(plot,coloc_abf)
 S3method(print,coloc_abf)
 export(annotate_susie)
@@ -10,7 +9,6 @@ export(check_alignment)
 export(check_dataset)
 export(coloc.abf)
 export(coloc.bf_bf)
-export(coloc.compare)
 export(coloc.signals)
 export(coloc.susie)
 export(coloc.susie_bf)
diff --git a/man/approx.bf.estimates.Rd b/man/approx.bf.estimates.Rd
index 3f29402..1c828ed 100644
--- a/man/approx.bf.estimates.Rd
+++ b/man/approx.bf.estimates.Rd
@@ -4,7 +4,14 @@
 \alias{approx.bf.estimates}
 \title{Internal function, approx.bf.estimates}
 \usage{
-approx.bf.estimates(z, V, type, suffix = NULL, sdY = 1)
+approx.bf.estimates(
+  z,
+  V,
+  type,
+  suffix = NULL,
+  sdY = 1,
+  effect_priors = c(quant = 0.15, cc = 0.2)
+)
 }
 \arguments{
 \item{z}{normal deviate associated with regression coefficient and its variance}
diff --git a/man/coloc.abf.Rd b/man/coloc.abf.Rd
index 704418c..2a61c4c 100644
--- a/man/coloc.abf.Rd
+++ b/man/coloc.abf.Rd
@@ -4,28 +4,51 @@
 \alias{coloc.abf}
 \title{Fully Bayesian colocalisation analysis using Bayes Factors}
 \usage{
-coloc.abf(dataset1, dataset2, MAF = NULL, p1 = 1e-04, p2 = 1e-04, p12 = 1e-05)
+coloc.abf(
+  dataset1,
+  dataset2,
+  MAF = NULL,
+  p1 = 1e-04,
+  p2 = 1e-04,
+  p12 = 1e-05,
+  ...
+)
 }
 \arguments{
-\item{dataset1}{a list with specifically named elements defining the dataset
-to be analysed. See \code{\link{check_dataset}} for details.}
+\item{dataset1}{a list with specifically named elements defining
+the dataset to be analysed. See \code{\link{check_dataset}}
+for details.}
 
 \item{dataset2}{as above, for dataset 2}
 
-\item{MAF}{Common minor allele frequency vector to be used for both dataset1 and dataset2, a shorthand for supplying the same vector as parts of both datasets}
+\item{MAF}{Common minor allele frequency vector to be used for
+both dataset1 and dataset2, a shorthand for supplying the same
+vector as parts of both datasets}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
+
+\item{...}{used to pass parameters to approx.bf.estimates, in
+particular the effect_priors parameter}
 }
 \value{
-a list of two \code{data.frame}s:
-\itemize{
-\item summary is a vector giving the number of SNPs analysed, and the posterior probabilities of H0 (no causal variant), H1 (causal variant for trait 1 only), H2 (causal variant for trait 2 only), H3 (two distinct causal variants) and H4 (one common causal variant)
-\item results is an annotated version of the input data containing log Approximate Bayes Factors and intermediate calculations, and the posterior probability SNP.PP.H4 of the SNP being causal for the shared signal \emph{if} H4 is true. This is only relevant if the posterior support for H4 in summary is convincing.
-}
+a list of two \code{data.frame}s: \itemize{ \item summary
+is a vector giving the number of SNPs analysed, and the
+posterior probabilities of H0 (no causal variant), H1 (causal
+variant for trait 1 only), H2 (causal variant for trait 2
+only), H3 (two distinct causal variants) and H4 (one common
+causal variant) \item results is an annotated version of the
+input data containing log Approximate Bayes Factors and
+intermediate calculations, and the posterior probability
+SNP.PP.H4 of the SNP being causal for the shared signal \emph{if}
+H4 is true. This is only relevant if the posterior support for
+H4 in summary is convincing.  }
 }
 \description{
 Bayesian colocalisation analysis
diff --git a/man/coloc.bf_bf.Rd b/man/coloc.bf_bf.Rd
index 38e03d3..9040699 100644
--- a/man/coloc.bf_bf.Rd
+++ b/man/coloc.bf_bf.Rd
@@ -19,11 +19,14 @@ coloc.bf_bf(
 
 \item{bf2}{named vector of log BF, or matrix of BF with colnames (cols=snps, rows=signals)}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 
 \item{overlap.min}{see trim_by_posterior}
 
diff --git a/man/coloc.detail.Rd b/man/coloc.detail.Rd
index 64cf1bc..d1bd64d 100644
--- a/man/coloc.detail.Rd
+++ b/man/coloc.detail.Rd
@@ -14,18 +14,24 @@ coloc.detail(
 )
 }
 \arguments{
-\item{dataset1}{a list with specifically named elements defining the dataset
-to be analysed. See \code{\link{check_dataset}} for details.}
+\item{dataset1}{a list with specifically named elements defining
+the dataset to be analysed. See \code{\link{check_dataset}}
+for details.}
 
 \item{dataset2}{as above, for dataset 2}
 
-\item{MAF}{Common minor allele frequency vector to be used for both dataset1 and dataset2, a shorthand for supplying the same vector as parts of both datasets}
+\item{MAF}{Common minor allele frequency vector to be used for
+both dataset1 and dataset2, a shorthand for supplying the same
+vector as parts of both datasets}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 }
 \value{
 a list of three \code{data.tables}s:
diff --git a/man/coloc.process.Rd b/man/coloc.process.Rd
index 3c273db..81fae9e 100644
--- a/man/coloc.process.Rd
+++ b/man/coloc.process.Rd
@@ -31,11 +31,14 @@ masking}
 
 \item{r2thr}{r2 threshold at which to mask}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 
 \item{LD1}{named LD matrix (for masking) for trait 1 only}
 
diff --git a/man/coloc.signals.Rd b/man/coloc.signals.Rd
index bb4d675..d6723cb 100644
--- a/man/coloc.signals.Rd
+++ b/man/coloc.signals.Rd
@@ -20,12 +20,15 @@ coloc.signals(
 )
 }
 \arguments{
-\item{dataset1}{a list with specifically named elements defining the dataset
-to be analysed. See \code{\link{check_dataset}} for details.}
+\item{dataset1}{a list with specifically named elements defining
+the dataset to be analysed. See \code{\link{check_dataset}}
+for details.}
 
 \item{dataset2}{as above, for dataset 2}
 
-\item{MAF}{Common minor allele frequency vector to be used for both dataset1 and dataset2, a shorthand for supplying the same vector as parts of both datasets}
+\item{MAF}{Common minor allele frequency vector to be used for
+both dataset1 and dataset2, a shorthand for supplying the same
+vector as parts of both datasets}
 
 \item{LD}{required if method="cond". matrix of genotype
 \emph{correlation} (ie r, not r^2) between SNPs. If dataset1 and
@@ -64,11 +67,14 @@ because the strongest signals aren't affected by conditioning
 incorrectly on weaker secondary and less certain signals.
 }\if{html}{\out{</div>}}}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 
 \item{maxhits}{maximum number of levels to condition/mask}
 
diff --git a/man/coloc.susie_bf.Rd b/man/coloc.susie_bf.Rd
index 83c588a..75c5236 100644
--- a/man/coloc.susie_bf.Rd
+++ b/man/coloc.susie_bf.Rd
@@ -15,16 +15,20 @@ coloc.susie_bf(
 )
 }
 \arguments{
-\item{dataset1}{a list with specifically named elements defining the dataset
-to be analysed. See \code{\link{check_dataset}} for details.}
+\item{dataset1}{a list with specifically named elements defining
+the dataset to be analysed. See \code{\link{check_dataset}}
+for details.}
 
 \item{bf2}{named vector of log BF, names are snp ids and will be matched to column names of susie object's alpha}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 
 \item{susie.args}{named list of arguments to be passed to susieR::susie_rss()}
 
diff --git a/man/combine.abf.Rd b/man/combine.abf.Rd
index 8fedd19..8e577e4 100644
--- a/man/combine.abf.Rd
+++ b/man/combine.abf.Rd
@@ -11,11 +11,14 @@ combine.abf(l1, l2, p1, p2, p12, quiet = FALSE)
 
 \item{l2}{merged.df$lABF.df2}
 
-\item{p1}{prior probability a SNP is associated with trait 1, default 1e-4}
+\item{p1}{prior probability a SNP is associated with trait 1,
+default 1e-4}
 
-\item{p2}{prior probability a SNP is associated with trait 2, default 1e-4}
+\item{p2}{prior probability a SNP is associated with trait 2,
+default 1e-4}
 
-\item{p12}{prior probability a SNP is associated with both traits, default 1e-5}
+\item{p12}{prior probability a SNP is associated with both traits,
+default 1e-5}
 
 \item{quiet}{don't print posterior summary if TRUE. default=FALSE}
 }
diff --git a/man/process.dataset.Rd b/man/process.dataset.Rd
index 3093d42..95f2c02 100644
--- a/man/process.dataset.Rd
+++ b/man/process.dataset.Rd
@@ -4,12 +4,15 @@
 \alias{process.dataset}
 \title{process.dataset}
 \usage{
-process.dataset(d, suffix)
+process.dataset(d, suffix, ...)
 }
 \arguments{
 \item{d}{list}
 
 \item{suffix}{"df1" or "df2"}
+
+\item{...}{used to pass parameters to approx.bf.estimates, in
+particular the effect_priors parameter}
 }
 \value{
 data.frame with log(abf) or log(bf)