Skip to content

Commit

Permalink
Merge branch 'feature/fastio' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-p committed Nov 24, 2014
2 parents 82bfb48 + 65d6ed1 commit ec457ce
Show file tree
Hide file tree
Showing 22 changed files with 475 additions and 348 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
branches:
only:
- develop

- feature/fastio

language: cpp

compiler: gcc
Expand All @@ -14,7 +15,7 @@ before_install:
- echo "yes" | sudo apt-add-repository ppa:h-rayflood/gcc-upper
- sudo apt-get clean -qq
- sudo apt-get update -qq
- sudo apt-get install -qq gcc-4.9 g++-4.9
- sudo apt-get install -qq gcc-4.9 g++-4.9 liblzma-dev libbz2-dev
- export CC="gcc-4.9"
- export CXX="gcc-4.9"
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 50
Expand Down
105 changes: 45 additions & 60 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ set (BOOST_CXX_FLAGS "-Wno-deprecated-register -std=c++11")
SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})

## Set the standard required compile flags
set (CMAKE_CXX_FLAGS "-g -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DHAVE_ANSI_TERM -DHAVE_SSTREAM -DHAVE_CONFIG_H -Wall -std=c++11 -Wreturn-type -Werror=return-type")
# Nov 18th --- removed -DHAVE_CONFIG_H
set (CMAKE_CXX_FLAGS "-g -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -std=c++11 -Wreturn-type -Werror=return-type")

##
# OSX is strange (some might say, stupid in this regard). Deal with it's quirkines here.
Expand Down Expand Up @@ -252,7 +253,8 @@ message("==================================================================")
include(ExternalProject)
ExternalProject_Add(libbwa
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
URL http://www.cs.cmu.edu/~robp/files/bwa-master.tar.gz
URL http://www.cs.stonybrook.edu/~rp/files/bwa-master.tar.gz
#URL http://www.cs.cmu.edu/~robp/files/bwa-master.tar.gz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/bwa-master
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND ""
Expand All @@ -261,26 +263,6 @@ ExternalProject_Add(libbwa
BUILD_IN_SOURCE TRUE
)

##
# The optimal mem chaining provided by clasp seems not to provide any benefit
# and is much more computationally intensive than the greedy approach. I'm
# removing this dependency and functionality for the time being.
##
#message("Build system will fetch and build CLASP (for Salmon)")
#message("==================================================================")
#ExternalProject_Add(libclasp
# DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
# URL http://www.cs.cmu.edu/~robp/files/clasp_v1_1.tar.gz
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/clasp_v1_1
# INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
# CONFIGURE_COMMAND ""
# BUILD_COMMAND sh -c "make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} && ar cr libclasp.a <SOURCE_DIR>/libs/*.o"
# INSTALL_COMMAND sh -c "mkdir -p <INSTALL_DIR>/lib && mkdir -p <INSTALL_DIR>/include/clasp && cp libclasp.a <INSTALL_DIR>/lib && cp libs/*.h <INSTALL_DIR>/include/clasp"
# BUILD_IN_SOURCE TRUE
#)
#


message("Build system will fetch and build CMPH")
message("==================================================================")
include(ExternalProject)
Expand Down Expand Up @@ -452,51 +434,54 @@ ExternalProject_Add(libgff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_SOURCE_DIR}/external/install
)

#message("Build system will compile parallel samtools")
#message("==================================================================")
#ExternalProject_Add(libsamtools
#DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
#URL http://www.cs.cmu.edu/~robp/files/samtools.tgz
#SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/samtools
#BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/samtools
#INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
#CONFIGURE_COMMAND ""
#BUILD_COMMAND $(MAKE)
#INSTALL_COMMAND sh -c "mkdir -p <INSTALL_DIR>/include/samtools && mkdir -p <INSTALL_DIR>/include/samtools/pbgzip && mkdir -p <INSTALL_DIR>/include/samtools/bcftools && cp <SOURCE_DIR>/*.h <INSTALL_DIR>/include/samtools/ && cp <SOURCE_DIR>/pbgzip/*.h <INSTALL_DIR>/include/samtools/pbgzip/ && cp <SOURCE_DIR>/bcftools/*.h <INSTALL_DIR>/include/samtools/bcftools/ && cp <SOURCE_DIR>/libbam.a <INSTALL_DIR>/lib/"
#)

message("Build system will compile htslib")
message("Build system will compile Staden IOLib")
message("==================================================================")
ExternalProject_Add(libhts
ExternalProject_Add(libstadenio
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/samtools/htslib/archive/1.1.tar.gz -o htslib-1.1.tar.gz &&
tar -xzf htslib-1.1.tar.gz &&
rm -fr htslib &&
mv -f htslib-1.1 htslib
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/htslib
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/archive/v1.13.7.tar.gz -o staden-io_lib-v1.13.7.tar.gz &&
tar -xzf staden-io_lib-v1.13.7.tar.gz &&
rm -fr staden-io_lib &&
mv -f staden-io_lib-1.13.7 staden-io_lib
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/staden-io_lib
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND ""
CONFIGURE_COMMAND ./configure --enable-shared=no --without-libcurl --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
BUILD_IN_SOURCE 1
INSTALL_COMMAND make install prefix=<INSTALL_DIR>
INSTALL_COMMAND make install
)

message("Build system will compile samtools")
message("==================================================================")
ExternalProject_Add(libsamtools
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/samtools/samtools/archive/1.1.tar.gz -o samtools-1.1.tar.gz &&
tar -xzf samtools-1.1.tar.gz &&
rm -fr samtools &&
mv -f samtools-1.1 samtools
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/samtools
CONFIGURE_COMMAND ""
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
BUILD_IN_SOURCE 1
INSTALL_COMMAND make install prefix=<INSTALL_DIR> &&
mv <SOURCE_DIR>/libbam.a <INSTALL_DIR>/lib/libbam.a
)
#message("Build system will compile htslib")
#message("==================================================================")
#ExternalProject_Add(libhts
# DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
# DOWNLOAD_COMMAND curl -k -L https://github.com/samtools/htslib/archive/1.1.tar.gz -o htslib-1.1.tar.gz &&
# tar -xzf htslib-1.1.tar.gz &&
# rm -fr htslib &&
# mv -f htslib-1.1 htslib
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/htslib
# INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
# CONFIGURE_COMMAND ""
# BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
# BUILD_IN_SOURCE 1
# INSTALL_COMMAND make install prefix=<INSTALL_DIR>
#)
#
#message("Build system will compile samtools")
#message("==================================================================")
#ExternalProject_Add(libsamtools
# DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
# DOWNLOAD_COMMAND curl -k -L https://github.com/samtools/samtools/archive/1.1.tar.gz -o samtools-1.1.tar.gz &&
# tar -xzf samtools-1.1.tar.gz &&
# rm -fr samtools &&
# mv -f samtools-1.1 samtools
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/samtools
# CONFIGURE_COMMAND ""
# INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
# BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
# BUILD_IN_SOURCE 1
# INSTALL_COMMAND make install prefix=<INSTALL_DIR> &&
# mv <SOURCE_DIR>/libbam.a <INSTALL_DIR>/lib/libbam.a
#)


###
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
Documentation
==============

The documentation for Salmon and Sailfish is being migrated to [ReadTheDocs](www.readthedocs.org).
The documentation for Salmon and Sailfish is being migrated to [ReadTheDocs](http://readthedocs.org).
To see [the latest documentation there](http://sailfish.readthedocs.org).
15 changes: 4 additions & 11 deletions include/AlignmentGroup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#define ALIGNMENT_GROUP

extern "C" {
#include "htslib/sam.h"
#include "samtools/samtools.h"
#include "io_lib/scram.h"
#include "io_lib/os.h"
}

#include <vector>
Expand All @@ -19,17 +19,10 @@ class AlignmentGroup {
std::string* read() { return read_; }

inline std::vector<FragT>& alignments() { return alignments_; }
void addAlignment(FragT p) { alignments_.push_back(p);}
void addAlignment(FragT p) { alignments_.push_back(p); }
inline size_t numAlignments() { return alignments_.size(); }
inline size_t size() { return numAlignments(); }
/*
void addAlignment(bam1_t* r) {
alignments_.push_back({r, sailfish::math::LOG_0});
}
void addAlignment(bam1_t* r1, bam1_t* r2) {
alignments_.push_back({r1, r2, sailfish::math::LOG_0});
}
*/

private:
std::vector<FragT> alignments_;
std::string* read_;
Expand Down
32 changes: 21 additions & 11 deletions include/AlignmentLibrary.hpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#ifndef ALIGNMENT_LIBRARY_HPP
#define ALIGNMENT_LIBRARY_HPP

// samtools / htslib includes
extern "C" {
#include "htslib/sam.h"
#include "samtools/samtools.h"
#include "io_lib/scram.h"
#include "io_lib/os.h"
#undef max
}


// Our includes
#include "ClusterForest.hpp"
#include "Transcript.hpp"
Expand All @@ -25,6 +26,7 @@ extern "C" {
// Standard includes
#include <vector>
#include <memory>
#include <functional>

template <typename T>
class NullFragmentFilter;
Expand Down Expand Up @@ -72,7 +74,9 @@ class AlignmentLibrary {
}

// The alignment file existed, so create the alignment queue
bq = std::unique_ptr<BAMQueue<FragT>>(new BAMQueue<FragT>(alnFiles, libFmt_));
size_t numParseThreads = salmonOpts.numParseThreads;
std::cerr << "parseThreads = " << numParseThreads << "\n";
bq = std::unique_ptr<BAMQueue<FragT>>(new BAMQueue<FragT>(alnFiles, libFmt_, numParseThreads));

std::cerr << "Checking that provided alignment files have consistent headers . . . ";
if (! salmon::utils::headersAreConsistent(bq->headers()) ) {
Expand All @@ -84,12 +88,12 @@ class AlignmentLibrary {
}
std::cerr << "done\n";

bam_header_t* header = bq->header();
SAM_hdr* header = bq->header();

// The transcript file existed, so load up the transcripts
double alpha = 0.005;
for (size_t i = 0; i < header->n_targets; ++i) {
transcripts_.emplace_back(i, header->target_name[i], header->target_len[i], alpha);
for (size_t i = 0; i < header->nref; ++i) {
transcripts_.emplace_back(i, header->ref[i].name, header->ref[i].len, alpha);
}

FASTAParser fp(transcriptFile.string());
Expand All @@ -116,8 +120,7 @@ class AlignmentLibrary {
fragLenKernelP, 1)
);

errMod_.reset(new
ErrorModel(1.0, salmonOpts.maxExpectedReadLen));
errMod_.reset(new ErrorModel(1.0, salmonOpts.maxExpectedReadLen));
// Start parsing the alignments
NullFragmentFilter<FragT>* nff = nullptr;
bq->start(nff);
Expand All @@ -127,7 +130,9 @@ class AlignmentLibrary {

inline bool getAlignmentGroup(AlignmentGroup<FragT>*& ag) { return bq->getAlignmentGroup(ag); }

inline bam_header_t* header() { return bq->header(); }
//inline t_pool* threadPool() { return threadPool_.get(); }

inline SAM_hdr* header() { return bq->header(); }

inline FragmentLengthDistribution& fragmentLengthDistribution() {
return *flDist_.get();
Expand Down Expand Up @@ -165,7 +170,10 @@ class AlignmentLibrary {

bq->reset();
bq->start(filter);
if (incPasses) { quantificationPasses_++; }
if (incPasses) {
quantificationPasses_++;
fmt::print(stderr, "Current iteration = {}\n", quantificationPasses_);
}
return true;
}

Expand Down Expand Up @@ -194,7 +202,9 @@ class AlignmentLibrary {
* A pointer to the queue from which the fragments
* will be read.
*/
//std::unique_ptr<t_pool, std::function<void(t_pool*)>> threadPool_;
std::unique_ptr<BAMQueue<FragT>> bq;

/**
* The cluster forest maintains the dynamic relationship
* defined by transcripts and reads --- if two transcripts
Expand Down
32 changes: 19 additions & 13 deletions include/BAMQueue.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#ifndef __BAMQUEUE_HPP__
#define __BAMQUEUE_HPP__

extern "C" {
#include "htslib/sam.h"
#include "samtools/samtools.h"
}

#include <boost/lockfree/spsc_queue.hpp>
#include <boost/lockfree/queue.hpp>
#include <tbb/atomic.h>
Expand All @@ -25,13 +20,22 @@ extern "C" {
#include "ReadPair.hpp"
#include "UnpairedRead.hpp"

extern "C" {
#include "io_lib/scram.h"
#include "io_lib/os.h"
#undef max
#undef min
}

/**
* Simple structure holding info about the alignment file.
*/
struct AlignmentFile {
boost::filesystem::path fileName;
samFile* fp;
bam_header_t* header;
std::string readMode;
scram_fd* fp;
SAM_hdr* header;
uint32_t numParseThreads;
};

/**
Expand All @@ -44,14 +48,14 @@ struct AlignmentFile {
template <typename FragT>
class BAMQueue {
public:
BAMQueue(std::vector<boost::filesystem::path>& fnames, LibraryFormat& libFmt);
BAMQueue(std::vector<boost::filesystem::path>& fnames, LibraryFormat& libFmt, uint32_t numParseThreads);
~BAMQueue();
void forceEndParsing();

bam_header_t* header();
bam_header_t* safeHeader();
SAM_hdr* header();
SAM_hdr* safeHeader();

std::vector<bam_header_t*> headers();
std::vector<SAM_hdr*> headers();

template <typename FilterT>
void start(FilterT filt);
Expand Down Expand Up @@ -82,14 +86,16 @@ class BAMQueue {
template <typename FilterT>
inline bool getFrag_(UnpairedRead& sread, FilterT filt);

public:
bool verbose=false;
private:
std::vector<AlignmentFile> files_;
std::string fname_;
LibraryFormat libFmt_;

std::vector<AlignmentFile>::iterator currFile_;
samFile* fp_ = nullptr;
bam_header_t* hdr_ = nullptr;
scram_fd* fp_ = nullptr;
SAM_hdr* hdr_ = nullptr;

//htsFile* fp_ = nullptr;
size_t totalReads_;
Expand Down
Loading

0 comments on commit ec457ce

Please sign in to comment.