Skip to content

Commit

Permalink
Added optional --trim-complex-tail switch
Browse files Browse the repository at this point in the history
  • Loading branch information
pjotrp committed Dec 16, 2020
1 parent ce156da commit 9773508
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
22 changes: 21 additions & 1 deletion src/AlleleParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1386,7 +1386,7 @@ RegisteredAlignment& AlleleParser::registerAlignment(BAMALIGN& alignment, Regist
* The cigar only records matches for sequences that have embedded
* mismatches.
*
* Also, we don't store the entire undelying sequence; just the subsequence
* Also, we don't store the entire underlying sequence; just the subsequence
* that matches our current target region.
*
* As we step through a match sequence, we look for mismatches. When we
Expand Down Expand Up @@ -1826,6 +1826,26 @@ RegisteredAlignment& AlleleParser::registerAlignment(BAMALIGN& alignment, Regist
return ra;
}

DEBUG2("registerAlignment: done registering alleles with addAllele");

if (parameters.trimComplexTail) {
// Simplify complex final alleles by splitting off any trailing reference matches
Allele& lastAllele = ra.alleles.back();
vector< pair<int, string> > lastCigar = splitCigar(lastAllele.cigar);
if (lastAllele.isComplex() && lastCigar.back().second == "M") {
DEBUG2("registerAlignment: trimming reference matches from end of final complex allele");
// FIXME TODO: The allele may not actually be complex
// anymore after splitting, in which case we should demote
// its type to SNP/MNP/INDEL.
// -trs, 23 Jan 2015
ra.alleles.push_back(lastAllele);
Allele& pAllele = ra.alleles.at(ra.alleles.size() - 2);
string seq; vector<pair<int, string> > cig; vector<short> quals;
pAllele.subtractFromEnd(lastCigar.back().first, seq, cig, quals);
ra.alleles.back().subtractFromStart(pAllele.referenceLength, seq, cig, quals);
}
}

// this deals with the case in which we have embedded Ns in the read
// often this happens at the start or end of reads, thus affecting our RegisteredAlignment::start and ::end
ra.start = ra.alleles.front().position;
Expand Down
23 changes: 16 additions & 7 deletions src/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void Parameters::simpleUsage(char ** argv) {
}

void Parameters::usage(char** argv) {
cout
cout
<< "usage: " << argv[0] << " [OPTION] ... [BAM FILE] ... " << endl
<< endl
<< "Bayesian haplotype-based polymorphism discovery." << endl
Expand Down Expand Up @@ -304,6 +304,8 @@ void Parameters::usage(char** argv) {
<< " Skip processing of alignments overlapping positions with coverage >N." << endl
<< " This filters sites above this coverage, but will also reduce data nearby." << endl
<< " default: no limit" << endl
<< " --trim-complex-tail" << endl
<< " Trim complex tails." << endl
<< endl
<< "population priors:" << endl
<< endl
Expand Down Expand Up @@ -490,6 +492,7 @@ Parameters::Parameters(int argc, char** argv) {
minCoverage = 0;
limitCoverage = 0;
skipCoverage = 0;
trimComplexTail = 0;
debuglevel = 0;
debug = false;
debug2 = false;
Expand Down Expand Up @@ -564,6 +567,7 @@ Parameters::Parameters(int argc, char** argv) {
{"min-coverage", required_argument, 0, '!'},
{"limit-coverage", required_argument, 0, '+'},
{"skip-coverage", required_argument, 0, 'g'},
{"trim-complex-tail", no_argument, 0, '𝛃'},
{"genotype-qualities", no_argument, 0, '='},
{"variant-input", required_argument, 0, '@'},
{"only-use-input-alleles", no_argument, 0, 'l'},
Expand Down Expand Up @@ -665,15 +669,15 @@ Parameters::Parameters(int argc, char** argv) {
gVCFout = true;
break;

// -& BOOL/INT --gvcf-no-chunk BOOL/INT --gvcf-chunk
// -& BOOL/INT --gvcf-no-chunk BOOL/INT --gvcf-chunk
case '&':
//cerr << "optarg:\t" << optarg << endl;
if(optarg[0] == 't'){
gVCFNoChunk = true;
} else if (optarg[0] == 'f'){
gVCFNoChunk = false;
gVCFNoChunk = false;
} else {
gVCFchunk = atoi(optarg);
gVCFchunk = atoi(optarg);
}
break;

Expand Down Expand Up @@ -722,6 +726,11 @@ Parameters::Parameters(int argc, char** argv) {
}
break;

// --trim-complex-tail
case '𝛃':
trimComplexTail = true;
break;

// -n --use-best-n-alleles
case 'n':
if (!convert(optarg, useBestNAlleles)) {
Expand Down Expand Up @@ -1083,7 +1092,7 @@ Parameters::Parameters(int argc, char** argv) {
break;

case '#':

// --version
cout << "version: " << VERSION_GIT << endl;
exit(0);
Expand All @@ -1093,7 +1102,7 @@ Parameters::Parameters(int argc, char** argv) {
usage(argv);
exit(0);
break;

// either catch "long options" or
case '?': // print a suggestion about the most-likely long option which the argument matches
{
Expand All @@ -1120,7 +1129,7 @@ Parameters::Parameters(int argc, char** argv) {
}

}

// any remaining arguments are considered as bam files
if (optind < argc) {
if (useStdin) {
Expand Down
1 change: 1 addition & 0 deletions src/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ class Parameters {
int minCoverage; // -! --min-coverage
int limitCoverage; // -+ --limit-coverage
int skipCoverage; // -g --skip-coverage
int trimComplexTail; // -. --trim-complex-tail
int debuglevel; // -d --debug increments
bool debug; // set if debuglevel >=1
bool debug2; // set if debuglevel >=2
Expand Down

0 comments on commit 9773508

Please sign in to comment.