Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Merge pull request #53 from molpopgen/lHaf
Browse files Browse the repository at this point in the history
Add l-Haf statistic from VariantMatrix.  #35
  • Loading branch information
molpopgen authored Sep 21, 2018
2 parents 3c22ce1 + 6cf4bd7 commit 40202c1
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 3 deletions.
1 change: 1 addition & 0 deletions Sequence/summstats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "summstats/nsl.hpp"
#include "summstats/nslx.hpp"
#include "summstats/ld.hpp"
#include "summstats/lhaf.hpp"
#include "summstats/garud.hpp"

#endif
2 changes: 1 addition & 1 deletion Sequence/summstats/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pkgincludedir=$(prefix)/include/Sequence/summstats

pkginclude_HEADERS = classics.hpp thetapi.hpp thetaw.hpp thetah.hpp thetal.hpp auxillary.hpp nvariablesites.hpp allele_counts.hpp \
util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp \
util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp lhaf.hpp \
algorithm.hpp
2 changes: 1 addition & 1 deletion Sequence/summstats/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
pkginclude_HEADERS = classics.hpp thetapi.hpp thetaw.hpp thetah.hpp thetal.hpp auxillary.hpp nvariablesites.hpp allele_counts.hpp \
util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp \
util.hpp ld.hpp nSLiHS.hpp nsl.hpp nslx.hpp garud.hpp generic.hpp lhaf.hpp \
algorithm.hpp

all: all-am
Expand Down
20 changes: 20 additions & 0 deletions Sequence/summstats/lhaf.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef SEQUENCE_SUMMSTATS_LHAF_HPP
#define SEQUENCE_SUMMSTATS_LHAF_HPP

#include <cstdint>
#include <vector>
#include <Sequence/VariantMatrix.hpp>

namespace Sequence
{
/*! \brief l-Haf statistic of \cite Ronen2015-te
* \param m A VariantMatrix
* \param refstate The ancstral state
* \param l The power parameter
* \return vector of the statistic
* \ingroup popgenanalysis
*/
std::vector<double> lhaf(const VariantMatrix &m,
const std::int8_t refstate, const double l);
} // namespace Sequence
#endif
38 changes: 38 additions & 0 deletions doc/libsequence.bib
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,41 @@ @ARTICLE{Thornton2003-wj
paper;libseq\_manual;pylibseq\_manual",
language = "en"
}

@ARTICLE{Ronen2015-te,
title = "Predicting Carriers of Ongoing Selective Sweeps without Knowledge
of the Favored Allele",
author = "Ronen, Roy and Tesler, Glenn and Akbari, Ali and Zakov, Shay and
Rosenberg, Noah A and Bafna, Vineet",
abstract = "Methods for detecting the genomic signatures of natural selection
have been heavily studied, and they have been successful in
identifying many selective sweeps. For most of these sweeps, the
favored allele remains unknown, making it difficult to
distinguish carriers of the sweep from non-carriers. In an
ongoing selective sweep, carriers of the favored allele are
likely to contain a future most recent common ancestor.
Therefore, identifying them may prove useful in predicting the
evolutionary trajectory--for example, in contexts involving
drug-resistant pathogen strains or cancer subclones. The main
contribution of this paper is the development and analysis of a
new statistic, the Haplotype Allele Frequency (HAF) score. The
HAF score, assigned to individual haplotypes in a sample,
naturally captures many of the properties shared by haplotypes
carrying a favored allele. We provide a theoretical framework for
computing expected HAF scores under different evolutionary
scenarios, and we validate the theoretical predictions with
simulations. As an application of HAF score computations, we
develop an algorithm (PreCIOSS: Predicting Carriers of Ongoing
Selective Sweeps) to identify carriers of the favored allele in
selective sweeps, and we demonstrate its power on simulations of
both hard and soft sweeps, as well as on data from well-known
sweeps in human populations.",
journal = "PLoS Genet.",
volume = 11,
number = 9,
pages = "e1005527",
month = sep,
year = 2015,
keywords = "Dec 13 import;libseq\_manual",
language = "en"
}
1 change: 1 addition & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ libsequence_la_SOURCES= Grantham.cc\
summstats/nslx.cc \
summstats/garud.cc \
summstats/generic.cc \
summstats/lhaf.cc \
summstats/auxillary.cc


Expand Down
6 changes: 5 additions & 1 deletion src/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ am_libsequence_la_OBJECTS = Grantham.lo PathwayHelper.lo \
summstats/allele_counts.lo summstats/haplotype_statistics.lo \
summstats/ld.lo summstats/rmin.lo summstats/nsl.lo \
summstats/nslx.lo summstats/garud.lo summstats/generic.lo \
summstats/auxillary.lo
summstats/lhaf.lo summstats/auxillary.lo
libsequence_la_OBJECTS = $(am_libsequence_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
Expand Down Expand Up @@ -415,6 +415,7 @@ libsequence_la_SOURCES = Grantham.cc\
summstats/nslx.cc \
summstats/garud.cc \
summstats/generic.cc \
summstats/lhaf.cc \
summstats/auxillary.cc

AM_LDFLAGS = -version-info 20:0:0
Expand Down Expand Up @@ -576,6 +577,8 @@ summstats/garud.lo: summstats/$(am__dirstamp) \
summstats/$(DEPDIR)/$(am__dirstamp)
summstats/generic.lo: summstats/$(am__dirstamp) \
summstats/$(DEPDIR)/$(am__dirstamp)
summstats/lhaf.lo: summstats/$(am__dirstamp) \
summstats/$(DEPDIR)/$(am__dirstamp)
summstats/auxillary.lo: summstats/$(am__dirstamp) \
summstats/$(DEPDIR)/$(am__dirstamp)

Expand Down Expand Up @@ -698,6 +701,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/haplotype_statistics.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/hprime.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/ld.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/lhaf.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/nsl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/nslx.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@summstats/$(DEPDIR)/nvariablesites.Plo@am__quote@
Expand Down
44 changes: 44 additions & 0 deletions src/summstats/lhaf.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include <cstdint>
#include <algorithm>
#include <numeric>
#include <cmath>
#include <Sequence/VariantMatrix.hpp>
#include <Sequence/VariantMatrixViews.hpp>

namespace Sequence
{
std::vector<double>
lhaf(const VariantMatrix &m, const std::int8_t refstate, const double l)
{
std::vector<long int> dcounts;
dcounts.reserve(m.nsites);
const auto find_nonref = [refstate](const std::int8_t x) {
return x != refstate && !(x < 0);
};
for (std::size_t i = 0; i < m.nsites; ++i)
{
auto r = get_ConstRowView(m, i);
dcounts.push_back(
std::count_if(r.begin(), r.end(), find_nonref));
}

// Get the values for each element in the data
std::vector<double> rv;
rv.reserve(m.nsites);
for (std::size_t i = 0; i < m.nsam; ++i)
{
auto c = get_ConstColView(m, i);
auto j = std::find_if(c.cbegin(), c.cend(), find_nonref);
double score = 0.0;
while (j != c.cend())
{
size_t d2 = static_cast<std::size_t>(
std::distance(c.cbegin(), j));
score += std::pow(static_cast<double>(dcounts[d2]), l);
j = std::find_if(c.cbegin(), c.cend(), find_nonref);
}
rv.push_back(score);
}
return rv;
}
} // namespace Sequence

0 comments on commit 40202c1

Please sign in to comment.