Skip to content

Commit

Permalink
Merge pull request #22 from DKFZ-ODCF/review-my-code
Browse files Browse the repository at this point in the history
Restore reference behaviour for sophiaMref
  • Loading branch information
vinjana authored Mar 18, 2024
2 parents aabceca + e65cfbf commit 95089fe
Show file tree
Hide file tree
Showing 51 changed files with 1,299 additions and 606 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ sophia: $(BUILD_DIR)/global.o \
$(BUILD_DIR)/Breakpoint.o \
$(BUILD_DIR)/ChosenBp.o \
$(BUILD_DIR)/ChrConverter.o \
$(BUILD_DIR)/IndexRange.o \
$(BUILD_DIR)/Hg37ChrConverter.o \
$(BUILD_DIR)/GenericChrConverter.o \
$(BUILD_DIR)/MateInfo.o \
Expand All @@ -121,6 +122,7 @@ sophiaAnnotate: $(BUILD_DIR)/global.o \
$(BUILD_DIR)/Breakpoint.o \
$(BUILD_DIR)/BreakpointReduced.o \
$(BUILD_DIR)/ChrConverter.o \
$(BUILD_DIR)/IndexRange.o \
$(BUILD_DIR)/Hg37ChrConverter.o \
$(BUILD_DIR)/GenericChrConverter.o \
$(BUILD_DIR)/DeFuzzier.o \
Expand Down Expand Up @@ -149,6 +151,7 @@ sophiaMref: $(BUILD_DIR)/global.o \
$(BUILD_DIR)/Alignment.o \
$(BUILD_DIR)/GlobalAppConfig.o \
$(BUILD_DIR)/ChrConverter.o \
$(BUILD_DIR)/IndexRange.o \
$(BUILD_DIR)/Hg37ChrConverter.o \
$(BUILD_DIR)/GenericChrConverter.o \
$(BUILD_DIR)/HelperFunctions.o \
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ If you built SOPHIA with dynamic libraries, the some libraries are runtime requi
* Boost 1.82.0
* libbacktrace 20220708
* gtest 1.14.0
* gmock 1.14.0
* rapidcsv 8.0.0
* strtk 0.6.0

Expand Down Expand Up @@ -356,6 +357,7 @@ If you want to know more then, currently, the only documentation of we can offer
* Patch: Added unit tests.
* Patch: Code readability improvements, documentation, `.editorconfig` file, and `clang-format` configuration
* Patch: Major refactorings for code clarity (and understanding of the convoluted code) and to improve usage of C++ type system for compiler-based checks of changes.
* Patch: For `sophiaAnnotate` the default value for clonalitylofreq was advertised in the usage information as 10, but the actual value was 5. Now, the correct values (5) is advertised as default.

* 35 (9e3b6ed)
* Forked from [bitbucket](https://bitbucket.org/compbio_charite/sophia/src/master/)
12 changes: 6 additions & 6 deletions include/Alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ namespace sophia {
BASE_QUALITY_THRESHOLD,
BASE_QUALITY_THRESHOLD_LOW;

static unsigned int CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD,
INDEL_NUCLEOTIDE_COUNT_THRESHOLD;
static ChrSize CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD,
INDEL_NUCLEOTIDE_COUNT_THRESHOLD;

static double ISIZEMAX;

Expand All @@ -70,7 +70,7 @@ namespace sophia {

const string &getSamLine() const { return samLine; }

const vector<ChrSize> &getSamChunkPositions() const {
const vector<unsigned int> &getSamChunkPositions() const {
return samTabPositions;
}

Expand All @@ -84,7 +84,7 @@ namespace sophia {
return readBreakpointTypes;
}

void setChosenBp(ChrSize chosenBpLoc, unsigned int alignmentIndex);
void setChosenBp(ChrSize chosenBpLoc, int alignmentIndex);

bool isOverhangEncounteredM() const { return chosenBp->bpEncounteredM; }

Expand Down Expand Up @@ -139,7 +139,7 @@ namespace sophia {

void mappingQualityCheck();

/** The `Alignment` isEventCandidate` is true, if the last CIGAR code indicates a match,
/** The `Alignment::isEventCandidate` is true, if the last CIGAR code indicates a match,
* or if the CIGAR indicates a soft-clip, hard-clip, insertion, or deletion.
*/
bool isEventCandidate() const;
Expand Down Expand Up @@ -187,7 +187,7 @@ namespace sophia {

bool validLine;

vector<ChrSize> samTabPositions;
vector<unsigned int> samTabPositions;

string::const_iterator saCbegin, saCend;

Expand Down
28 changes: 15 additions & 13 deletions include/Breakpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <string>
#include <utility>
#include <vector>
#include <cmath>


namespace sophia {

Expand Down Expand Up @@ -130,37 +132,37 @@ namespace sophia {
return (pos < rhs.getPos());
}

bool closeToSupp(const SuppAlignment &compIn, ChrSize fuzziness) const {
bool closeToSupp(const SuppAlignment &compIn, ChrDistance fuzziness) const {
if (chrIndex == compIn.getChrIndex()) {
if (compIn.isFuzzy()) {
fuzziness = ChrSize(2.5 * DEFAULT_READ_LENGTH); // Truncates to lower integer.
return (pos - fuzziness) <=
(compIn.getExtendedPos() + fuzziness) &&
(compIn.getPos() - fuzziness) <= (pos + fuzziness);
fuzziness = ChrDistance(trunc(2.5 * static_cast<int>(DEFAULT_READ_LENGTH)));
return (ChrDistance(static_cast<int>(pos)) - fuzziness) <=
(ChrDistance(static_cast<int>(compIn.getExtendedPos())) + fuzziness) &&
(ChrDistance(static_cast<int>(compIn.getPos())) - fuzziness) <=
(ChrDistance(static_cast<int>(pos)) + fuzziness);
} else {
return (unsigned long) abs((long) pos - (long) compIn.getPos()) <= fuzziness;
return ChrDistance(abs(static_cast<int>(pos) - static_cast<int>(compIn.getPos()))) <= fuzziness;
}
} else {
return false;
}
}

ChrSize distanceToSupp(const SuppAlignmentAnno &compIn) const {
ChrSize result;
ChrDistance distanceToSupp(const SuppAlignmentAnno &compIn) const {
ChrDistance result;
if (chrIndex == compIn.getChrIndex()) {
if (compIn.isFuzzy()) {
if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) {
result = 0;
result = ChrDistance(0);
} else {
if (pos < compIn.getPos()) {
result = ChrSize(compIn.getPos() - pos);
result = ChrDistance(compIn.getPos() - pos);
} else {
// TODO Why here return the difference of getExtendePos(), but getPos() in the other branch?
result = ChrSize(pos - compIn.getExtendedPos());
result = ChrDistance(pos - compIn.getExtendedPos());
}
}
} else {
result = ChrSize(abs((long) pos - (long) compIn.getPos()));
result = ChrDistance(abs(static_cast<long>(pos) - static_cast<long>(compIn.getPos())));
}
} else {
result = 1000000;
Expand Down
50 changes: 27 additions & 23 deletions include/BreakpointReduced.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,7 @@ namespace sophia {
return pos < rhs.getPos();
}

/** This seems to be a function used for sorting positions, that assumes that the ordering
* of chromosomes produces a total order, i.e., e.g., that all positions on chr1 are
* "smaller" than all positions on chr2 (if chr1 has a smaller index than chr2).
*
* I doubt there is a biological meaning in the ordering of the chromosomes in the
* classic Hg37ChrConverter. */
/** This is used for sorting breakpoints. No biological meaning. */
bool fullSmaller(const BreakpointReduced &rhs) const {
if (chrIndex < rhs.getChrIndex()) {
return true;
Expand All @@ -89,9 +84,10 @@ namespace sophia {

template <typename T> int distanceToBp(const T &compIn) const {
if (chrIndex == compIn.getChrIndex()) {
return abs((int) pos - (int) compIn.getPos());
return abs(static_cast<int>(pos) - static_cast<int>(compIn.getPos()));
} else {
// Ups. -1 is used in < comparisons. Check usages, before refactoring this.
// This seems to be a special value. It is not explicitly used in comparisons.
// Check usages, before refactoring this.
return -1;
}
}
Expand Down Expand Up @@ -152,36 +148,36 @@ namespace sophia {
return res;
}

bool closeToSupp(const SuppAlignmentAnno &compIn, ChrSize fuzziness) const {
bool closeToSupp(const SuppAlignmentAnno &compIn, ChrDistance fuzziness) const {
if (chrIndex == compIn.getChrIndex()) {
if (compIn.isFuzzy()) {
fuzziness = ChrSize(2.5 * DEFAULT_READ_LENGTH); // truncate
return ((long) pos - (long) fuzziness) <= (long) (compIn.getExtendedPos() + fuzziness) &&
((long) compIn.getPos() - (long) fuzziness) <= (long) (pos + fuzziness);
fuzziness = ChrDistance(2.5 * DEFAULT_READ_LENGTH); // truncate
return (ChrDistance(pos) - fuzziness) <= (ChrDistance(compIn.getExtendedPos()) + fuzziness) &&
(ChrDistance(compIn.getPos()) - fuzziness) <= (ChrDistance(pos) + fuzziness);
} else {
return abs((long) pos - (long) compIn.getPos()) <= (long) fuzziness;
return abs(ChrDistance(pos) - ChrDistance(compIn.getPos())) <= fuzziness;
}
} else {
return false;
}
}

ChrSize distanceToSupp(const SuppAlignmentAnno &compIn) const {
ChrSize result;
ChrDistance distanceToSupp(const SuppAlignmentAnno &compIn) const {
ChrDistance result;
if (chrIndex == compIn.getChrIndex()) {
if (compIn.isFuzzy()) {
if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) {
result = 0;
} else {
if (pos < compIn.getPos()) {
result = ChrSize(compIn.getPos() - pos);
result = ChrDistance(compIn.getPos() - pos);
} else {
// TODO Why here getExtendenPos(), but getPos() above?
result = ChrSize(pos - compIn.getExtendedPos());
result = ChrDistance(pos - compIn.getExtendedPos());
}
}
} else {
result = ChrSize(abs((long) pos - (long) compIn.getPos()));
result = ChrDistance(abs(static_cast<long>(pos) - static_cast<long>(compIn.getPos())));
}
} else {
result = 1000000;
Expand Down Expand Up @@ -224,12 +220,20 @@ namespace sophia {
int lineIndex;
ChrIndex chrIndex;
ChrSize pos;
int normalSpans, lowQualSpansSoft, lowQualSpansHard, unpairedBreaksSoft,
unpairedBreaksHard, breaksShortIndel, lowQualBreaksSoft,
lowQualBreaksHard, repetitiveOverhangBreaks;
int pairedBreaksSoft, pairedBreaksHard;
int normalSpans,
lowQualSpansSoft,
lowQualSpansHard,
unpairedBreaksSoft,
unpairedBreaksHard,
breaksShortIndel,
lowQualBreaksSoft,
lowQualBreaksHard,
repetitiveOverhangBreaks;
int pairedBreaksSoft,
pairedBreaksHard;
int mateSupport;
int leftCoverage, rightCoverage;
int leftCoverage,
rightCoverage;
MrefMatch mrefHits;
GermlineMatch germlineInfo;
vector<SuppAlignmentAnno> suppAlignments;
Expand Down
6 changes: 3 additions & 3 deletions include/ChosenBp.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ using namespace std;
ChosenBp(char bpTypeIn,
int bpSizeIn,
bool bpEncounteredMIn,
int overhangStartIndexIn,
int overhangLengthIn,
ChrSize overhangStartIndexIn,
ChrSize overhangLengthIn,
int selfNodeIndexIn)
: bpType{bpTypeIn},
bpSize{bpSizeIn},
Expand All @@ -69,7 +69,7 @@ using namespace std;

bool bpEncounteredM;

int overhangStartIndex, overhangLength;
ChrSize overhangStartIndex, overhangLength;

vector<SuppAlignment> supplementaryAlignments;

Expand Down
22 changes: 16 additions & 6 deletions include/ChrCategory.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@


#include <string>
#include <boost/unordered/unordered_map.hpp>
#include <iterator>

#include <boost/unordered/unordered_map.hpp>

namespace sophia {

Expand All @@ -27,6 +26,8 @@ namespace sophia {
// Used for initialization.
static const boost::unordered::unordered_map<std::string, const ChrCategory> categories;

static const std::vector<ChrCategory> sorted_categories;

public:
// Only used to define categories.
ChrCategory(const std::string &s, std::size_t index);
Expand All @@ -36,8 +37,10 @@ namespace sophia {

/** Autosomal contigs, e.g. chr1, chr2, ..., chr22 */
static const ChrCategory& AUTOSOME;
/** Gonosomal contigs, e.g. chrX, Y */
static const ChrCategory& GONOSOME;
/** X chromosome */
static const ChrCategory& X;
/** Y chromosome */
static const ChrCategory& Y;
/** extrachromosomalContigs Extrachromosomal contigs, e.g. chrM, chrMT */
static const ChrCategory& EXTRACHROMOSOMAL;
/** Joined category for unlocalized, unplaced, or random placed contigs
Expand All @@ -63,7 +66,7 @@ namespace sophia {

static size_type numCategories();

static std::vector<ChrCategory> getCategories();
static const std::vector<ChrCategory>& getCategories();

std::string getName() const;

Expand Down Expand Up @@ -100,7 +103,14 @@ namespace std {
template<>
struct equal_to<sophia::ChrCategory> {
bool operator()(const sophia::ChrCategory& lhs, const sophia::ChrCategory& rhs) const {
return lhs.getName() == rhs.getName();
return lhs.operator==(rhs);
}
};

template<>
struct less<sophia::ChrCategory> {
bool operator()(const sophia::ChrCategory& lhs, const sophia::ChrCategory& rhs) const {
return lhs.operator<(rhs);
}
};

Expand Down
22 changes: 17 additions & 5 deletions include/ChrConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,23 @@ namespace sophia {
**/
class ChrConverter {

private:

const std::string assemblyName;

public:

ChrConverter(const std::string &assemblyNameIn);

virtual ~ChrConverter();

/** The name of the assembly. */
static const std::string assemblyName;
std::string getAssemblyName() const;

/** Number of chromosomes. */
virtual ChrIndex nChromosomes() const = 0;

/** Map an index position to a chromosome name. */
/** Map an index position to a chromosome name. Throws illegal_argument error, if the
the index is not valid. */
virtual ChrName indexToChrName(ChrIndex index) const = 0;

/** Map a chromosome name to an index position. */
Expand All @@ -63,6 +69,12 @@ namespace sophia {
/** chrX, Y, ...*/
virtual bool isGonosome(ChrIndex index) const = 0;

/** chrX */
virtual bool isX(ChrIndex index) const = 0;

/** chrY */
virtual bool isY(ChrIndex index) const = 0;

/** phix index. */
virtual bool isTechnical(ChrIndex index) const = 0;

Expand Down Expand Up @@ -91,7 +103,7 @@ namespace sophia {
virtual CompressedMrefIndex nChromosomesCompressedMref() const = 0;

/** Map an index position to a chromosome name for compressed mref files. */
virtual ChrName indexToChrNameCompressedMref(CompressedMrefIndex index) const = 0;
virtual ChrName compressedMrefIndexToChrName(CompressedMrefIndex index) const = 0;

/** Map an index from the global index-space to the compressed mref index-space. */
virtual CompressedMrefIndex indexToCompressedMrefIndex(ChrIndex index) const = 0;
Expand All @@ -116,7 +128,7 @@ namespace sophia {
*
* If the `stopCharExt` parameter is an empty string, then it takes a position in a
* character stream, and translates the following character(s) into index positions
* (using ChrConverter::indexToChr). If the name cannot be parsed, throws a domain_error
* (using ChrConverter::indexToChrName). If the name cannot be parsed, throws a domain_error
* exception.
*
* IMPORTANT: Implementations may or may not use the `stopCharExt` parameter. Therefore,
Expand Down
Loading

0 comments on commit 95089fe

Please sign in to comment.