Skip to content

Commit

Permalink
Merge pull request #93 from sa501428/master
Browse files Browse the repository at this point in the history
add method for getting number of contacts
  • Loading branch information
sa501428 authored Feb 3, 2022
2 parents ae61146 + c45eaff commit 6b9c42e
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 13 deletions.
80 changes: 67 additions & 13 deletions C++/straw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,17 +711,7 @@ void appendRecord(vector<contactRecord> &vector, int32_t index, int32_t binX, in
vector[index] = record;
}

// this is the meat of reading the data. takes in the block number and returns the set of contact records corresponding to
// that block. the block data is compressed and must be decompressed using the zlib library functions
vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t version) {
if (idx.size <= 0) {
vector<contactRecord> v;
return v;
}
char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
// Decompress the block
// zlib struct
int32_t decompressBlock(indexEntry idx, char *compressedBytes, char *uncompressedBytes) {
z_stream infstream;
infstream.zalloc = Z_NULL;
infstream.zfree = Z_NULL;
Expand All @@ -734,8 +724,37 @@ vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t
inflateInit(&infstream);
inflate(&infstream, Z_NO_FLUSH);
inflateEnd(&infstream);
int32_t uncompressedSize;
uncompressedSize = static_cast<int32_t>(infstream.total_out);
int32_t uncompressedSize = static_cast<int32_t>(infstream.total_out);
return uncompressedSize;
}

long getNumRecordsInBlock(const string &fileName, indexEntry idx, int32_t version){
if (idx.size <= 0) {
return 0;
}
char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);

// create stream from buffer for ease of use
memstream bufferin(uncompressedBytes, uncompressedSize);
uint64_t nRecords;
nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
delete[] compressedBytes;
delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
return nRecords;
}

// this is the meat of reading the data. takes in the block number and returns the set of contact records corresponding to
// that block. the block data is compressed and must be decompressed using the zlib library functions
vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t version) {
if (idx.size <= 0) {
vector<contactRecord> v;
return v;
}
char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);

// create stream from buffer for ease of use
memstream bufferin(uncompressedBytes, uncompressedSize);
Expand Down Expand Up @@ -1128,6 +1147,19 @@ class MatrixZoomData {
}
return finalMatrix;
}

int64_t getNumberOfTotalRecords() {
if (!foundFooter) {
return 0;
}
int64_t regionIndices[4] = {0, numBins1, 0, numBins2};
set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
int64_t total = 0;
for (int32_t blockNumber : blockNumbers) {
total += getNumRecordsInBlock(fileName, blockMap[blockNumber], version);
}
return total;
}
};

class HiCFile {
Expand Down Expand Up @@ -1271,3 +1303,25 @@ vector<contactRecord> straw(const string &matrixType, const string &norm, const
MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
return mzd->getRecords(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
}

int64_t getNumRecordsForFile(const string &fileName, int32_t binsize) {
HiCFile *hiCFile = new HiCFile(fileName);
int64_t totalNumRecords = 0;

vector<chromosome> chromosomes = hiCFile->getChromosomes();
for(int32_t i = 0; i < chromosomes.size(); i++){
if(chromosomes[i].index <= 0) continue;
for(int32_t j = i; j < chromosomes.size(); j++){
if(chromosomes[j].index <= 0) continue;
MatrixZoomData *mzd;
if(chromosomes[i].index > chromosomes[j].index){
mzd = hiCFile->getMatrixZoomData(chromosomes[j].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
} else {
mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[j].name, "observed", "NONE", "BP", binsize);
}
totalNumRecords += mzd->getNumberOfTotalRecords();
}
}

return totalNumRecords;
}
2 changes: 2 additions & 0 deletions C++/straw.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,6 @@ std::vector<contactRecord>
straw(const std::string& matrixType, const std::string& norm, const std::string& fname, const std::string& chr1loc, const std::string& chr2loc,
const std::string &unit, int32_t binsize);

int64_t getNumRecordsForFile(const std::string& filename, int32_t binsize);

#endif

0 comments on commit 6b9c42e

Please sign in to comment.