Skip to content

Commit

Permalink
Merge pull request #41 from yhoogstrate/mount_2bit
Browse files Browse the repository at this point in the history
v1.5.0: Allows mounting UCSC 2bit files onto the filesystem using FUSE
  • Loading branch information
yhoogstrate authored Aug 2, 2019
2 parents 4759661 + 13f8411 commit 3606202
Show file tree
Hide file tree
Showing 15 changed files with 1,075 additions and 284 deletions.
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ project(fastafs)
# Do this once in a while - find different bugs
#set(CMAKE_CXX_COMPILER "clang++")

set(PROJECT_VERSION "1.4.0")
set(PROJECT_VERSION "1.5.0")
set(PACKAGE_URL "https://github.com/yhoogstrate/fastafs")
set(PACKAGE_BUGREPORT "${PACKAGE_URL}/issues")

Expand Down Expand Up @@ -75,7 +75,7 @@ add_custom_target(tidy DEPENDS make_tidy )

add_subdirectory(src)
include_directories(include)
add_definitions(-std=c++14)
add_definitions(-std=c++17)

# Boost
find_package(Boost COMPONENTS unit_test_framework REQUIRED)
Expand Down Expand Up @@ -103,6 +103,7 @@ add_executable(fastafs
src/fasta_to_fastafs.cpp
src/ucsc2bit_to_fastafs.cpp
src/fastafs.cpp
src/ucsc2bit.cpp
src/twobit_byte.cpp
src/database.cpp
src/utils.cpp
Expand All @@ -125,8 +126,9 @@ add_test(test_cache "${BUILD_TEST_DIR}/test_cache")
add_test(test_view "${BUILD_TEST_DIR}/test_view")
#add_test(test_tree "${BUILD_TEST_DIR}/test_tree")
add_test(test_fastafs "${BUILD_TEST_DIR}/test_fastafs")
add_test(test_ucsc2bit "${BUILD_TEST_DIR}/test_ucsc2bit")
add_test(test_fastafs_as_ucsc2bit "${BUILD_TEST_DIR}/test_ucsc2bit")
add_test(test_ucsc2bit_to_fastafs "${BUILD_TEST_DIR}/test_ucsc2bit_to_fastafs")
add_test(test_ucsc2bit_as_fasta "${BUILD_TEST_DIR}/test_ucsc2bit_as_fasta")
add_test(test_utils "${BUILD_TEST_DIR}/test_utils")


Expand Down
6 changes: 6 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
2019-08-02 Youri Hoogstrate

* v1.5.0
* Support mounting 2bit files (without FASTAFS file(s))
- Requires c++-17 for several std::filesystem calls

2019-06-05 Youri Hoogstrate

* Changed SHA1 to MD5 hashes for BAM compatibility
Expand Down
29 changes: 25 additions & 4 deletions include/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,34 @@
#define GIT_SHA1_STRING "@GIT_SHA1_STRING@"


/* https://stackoverflow.com/questions/26652904/boost-check-equal-and-dervatives-add-custom-message/26687584 */
#define BOOST_CHECK_EQUAL_MESSAGE(L, R, M) { BOOST_TEST_INFO(M); BOOST_CHECK_EQUAL(L, R); }



#define NUCLEOTIDE_T 0 // 00
#define NUCLEOTIDE_C 1 // 01
#define NUCLEOTIDE_A 2 // 10
#define NUCLEOTIDE_G 3 // 11

//#define TWOBIT_MAGIC "\x43\x27\x41\x1a";
//#define TWOBIT_VERSION "\x00\x00\x00\x00"

/* https://stackoverflow.com/questions/26652904/boost-check-equal-and-dervatives-add-custom-message/26687584 */
#define BOOST_CHECK_EQUAL_MESSAGE(L, R, M) { BOOST_TEST_INFO(M); BOOST_CHECK_EQUAL(L, R); }


#ifndef CONFIG_HPP
#define CONFIG_HPP

using namespace std::literals;


static const int READ_BUFFER_SIZE = 4096;

static const std::string UCSC2BIT_MAGIC = "\x43\x27\x41\x1a"s;
static const std::string UCSC2BIT_VERSION = "\x00\x00\x00\x00"s;

static const std::string FASTAFS_MAGIC = "\x0F\x0A\x46\x53"s;
static const std::string FASTAFS_VERSION = "\x00\x00\x00\x00"s;

static const std::string DICT_HEADER = "@HD\tVN:1.0\tSO:unsorted\n";


#endif
13 changes: 0 additions & 13 deletions include/fastafs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,5 @@ class fastafs
};


// Constants:
using namespace std::literals;
static const std::string UCSC2BIT_MAGIC = "\x43\x27\x41\x1a"s;
static const std::string UCSC2BIT_VERSION = "\x00\x00\x00\x00"s;

static const std::string FASTAFS_MAGIC = "\x0F\x0A\x46\x53"s;
static const std::string FASTAFS_VERSION = "\x00\x00\x00\x00"s;

static const int READ_BUFFER_SIZE = 4096;


static const std::string DICT_HEADER = "@HD\tVN:1.0\tSO:unsorted\n";


#endif
64 changes: 64 additions & 0 deletions include/ucsc2bit.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

#include <vector>


#include "utils.hpp"

#ifndef UCSC2BIT_HPP
#define UCSC2BIT_HPP




class ucsc2bit_seq
{
public:
explicit ucsc2bit_seq();

std::string name;//may not exceed 255 chars in current datatype
uint32_t data_position;// file offset to start reading sequence data
uint32_t sequence_data_position;// file offset to start reading sequence data

uint32_t n;// number nucleotides [ACTG + N]

std::vector<uint32_t> n_starts;// start positions (nucleotide positions; 0-based)
std::vector<uint32_t> n_ends;// end positions (nucleotide positions; 0-based)

std::vector<uint32_t> m_starts;// start positions (nucleotide positions; 0-based)
std::vector<uint32_t> m_ends;// end positions (nucleotide positions; 0-based)

uint32_t fasta_filesize(uint32_t);
uint32_t n_padding(uint32_t, uint32_t, uint32_t);

uint32_t view_fasta_chunk(uint32_t, char*, size_t, off_t, std::ifstream*);
};



class ucsc2bit
{
public:

explicit ucsc2bit(std::string);
~ucsc2bit();

std::string name;// needed as basename for mounting
std::string filename;

void load(std::string);

std::vector<ucsc2bit_seq*> data;


uint32_t n();
size_t fasta_filesize(uint32_t);


uint32_t view_fasta_chunk(uint32_t, char*, size_t, off_t);
uint32_t view_faidx_chunk(uint32_t, char*, size_t, off_t);
std::string get_faidx(uint32_t);//@todo get rid of this, make it full chunked
};



#endif
Loading

0 comments on commit 3606202

Please sign in to comment.