Skip to content

Commit 6c7894c

Browse files
author
griffan
committed
1.Fixed bug of sanity check on pileup input branch
2.Catch error() exception
1 parent a36b1c6 commit 6c7894c

6 files changed

+71
-35
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ target_link_libraries(VerifyBamID statgen Vcf samtools ${CMAKE_CURRENT_SOURCE_DI
6565
enable_testing()
6666
add_test( NAME myTest1
6767
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
68-
COMMAND VerifyBamID --UDPath ./resource/test/hapmap_3.3.b37.dat.UD --BamFile ./resource/test/test.bam --BedPath resource/test/hapmap_3.3.b37.dat.bed --MeanPath ./resource/test/hapmap_3.3.b37.dat.mu --Reference ./resource/test/chr20.fa.gz )
68+
COMMAND VerifyBamID --DisableSanityCheck --UDPath resource/test/hapmap_3.3.b37.dat.UD --BamFile resource/test/test.bam --BedPath resource/test/hapmap_3.3.b37.dat.bed --MeanPath resource/test/hapmap_3.3.b37.dat.mu --Reference resource/test/chr20.fa.gz )
6969
#add_test( NAME myTest2
7070
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
7171
# COMMAND sh ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run.plot.sh -i ${CMAKE_CURRENT_SOURCE_DIR}/resource/test/hapmap_3.3.b37.dat.V -o ${CMAKE_CURRENT_SOURCE_DIR}/resource/test/hapmap -r 1000g -g grey)

ContaminationEstimator.cpp

+18-11
Original file line numberDiff line numberDiff line change
@@ -461,11 +461,12 @@ int ContaminationEstimator::ReadPileup(const std::string & pileupFile) {
461461

462462
bool ContaminationEstimator::IsSanityCheckOK()
463463
{
464-
if(isPileupInput)
465-
return true;
466-
int effectSite(0), tmpDepth(0);
467-
std::vector<int> depthVec;
464+
// if(isPileupInput) return true;
465+
int tmpDepth(0);
466+
// std::vector<int> depthVec;
468467
std::string chr;
468+
notice("Input Number of Marker:%d",viewer.GetNumMarker());
469+
469470
int pos;
470471
for (size_t i = 0; i < NumMarker; ++i) {
471472
chr = PosVec[i].first;
@@ -477,22 +478,28 @@ bool ContaminationEstimator::IsSanityCheckOK()
477478
}
478479
tmpDepth = viewer.GetBaseInfoAt(chr, pos).size();
479480
viewer.sdDepth += tmpDepth * tmpDepth;
480-
effectSite++;
481481
}
482482

483-
viewer.sdDepth = sqrt(viewer.sdDepth/effectSite - viewer.avgDepth * viewer.avgDepth);
483+
viewer.sdDepth = sqrt(viewer.sdDepth/viewer.effectiveNumSite - viewer.avgDepth * viewer.avgDepth);
484+
485+
double filteredDepth(0), filteredsdDepth(0);
486+
viewer.effectiveNumSite=0;
484487

485-
effectSite=0;
486488
for (size_t i = 0; i < NumMarker; ++i) {
487489
chr = PosVec[i].first;
488490
pos = PosVec[i].second;
491+
if (viewer.posIndex.find(chr) == viewer.posIndex.end()) {
492+
continue;
493+
} else if (viewer.posIndex[chr].find(pos) == viewer.posIndex[chr].end()) {
494+
continue;
495+
}
489496
tmpDepth = viewer.GetBaseInfoAt(chr, pos).size();
490-
if(tmpDepth < (viewer.avgDepth - 3 * viewer.sdDepth)||
497+
if(tmpDepth == 0 || tmpDepth < (viewer.avgDepth - 3 * viewer.sdDepth)||
491498
tmpDepth > (viewer.avgDepth + 3 * viewer.sdDepth)) continue;
492-
effectSite++;
499+
viewer.effectiveNumSite++;
493500
}
494501
notice("Mean Depth:%f",viewer.avgDepth);
495502
notice("SD Depth:%f",viewer.sdDepth);
496-
notice("Filtered %d low-quality SNP markers.", NumMarker-effectSite);
497-
return double(effectSite)/NumMarker > 0.5 and effectSite > 7000;
503+
notice("%d SNP markers remained after sanity check.", viewer.effectiveNumSite);
504+
return double(viewer.effectiveNumSite)/NumMarker > 0.5 and viewer.effectiveNumSite > 7000;
498505
}

ContaminationEstimator.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,9 @@ class ContaminationEstimator {
261261
InitialGF(ptr->AF2s[i], GF2);
262262
std::vector<char>& tmpBase = ptr->viewer.GetBaseInfoAt(chr, pos);
263263
std::vector<char>& tmpQual = ptr->viewer.GetQualInfoAt(chr, pos);
264-
if (!ptr->isPileupInput and (tmpBase.size() == 0 ||
264+
if (tmpBase.size() == 0 ||
265265
tmpBase.size() < (ptr->viewer.avgDepth - 3 * ptr->viewer.sdDepth)||
266-
tmpBase.size() > (ptr->viewer.avgDepth + 3 * ptr->viewer.sdDepth))
266+
tmpBase.size() > (ptr->viewer.avgDepth + 3 * ptr->viewer.sdDepth)
267267
// ||
268268
// tmpBase.size() < ptr->viewer.firstQT ||
269269
// tmpBase.size() > ptr->viewer.thirdQT

SimplePileupViewer.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ int SimplePileupViewer::SIMPLEmpileup(mplp_conf_t *conf, int n, char **fn) {
503503
continue;
504504
} else {
505505
/*calculate number of reads covering snps*/
506-
numBases += n_plp[i];
506+
// numBases += n_plp[i];
507507
for (j = 0; j < n_plp[i]; ++j) {//each covered read in ith bam file
508508
const bam_pileup1_t *p = plp[i] + j;
509509
int c = p->qpos < p->b->core.l_qseq
@@ -551,6 +551,11 @@ int SimplePileupViewer::SIMPLEmpileup(mplp_conf_t *conf, int n, char **fn) {
551551
// }
552552
}
553553
}
554+
if(tmpBase.size()>0)
555+
{
556+
effectiveNumSite++;
557+
numBases +=tmpBase.size();
558+
}
554559
// putc('\n', pileup_fp);
555560
if (not existed) {
556561
baseInfo.push_back(tmpBase);
@@ -837,6 +842,10 @@ int SimplePileupViewer::ReadPileup(const std::string &filePath) {
837842
qualInfo.push_back(tmpQual);
838843
}
839844
numBases += depth;
845+
depth = 0;
846+
seq = "";
847+
qual = "";
848+
effectiveNumSite++;
840849
}
841850
avgDepth = (double)numBases/GetNumMarker();
842851
return 0;

SimplePileupViewer.h

+7-5
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,12 @@ class SimplePileupViewer {
5959
BaseInfo baseInfo;
6060
QualInfo qualInfo;
6161

62-
std::string SEQ_SM;
63-
int numBases;
64-
double avgDepth;
65-
double sdDepth;
62+
std::string SEQ_SM = "DefaultSampleName";
63+
int numBases = 0;
64+
int effectiveNumSite = 0;
65+
double avgDepth = 0;
66+
double sdDepth = 0;
67+
6668
double firstQT;//1st quantile
6769
double thirdQT;//2st quantile
6870

@@ -93,7 +95,7 @@ class SimplePileupViewer {
9395

9496
int GetNumMarker()
9597
{
96-
return baseInfo.size();
98+
return effectiveNumSite;
9799
}
98100

99101
inline std::vector<char>& GetBaseInfoAt(std::string& chr, int32_t pos)

main.cpp

+33-15
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ int execute(int argc, char **argv) {
3737
std::string RefVCF("Empty");
3838
std::string fixPC("Empty");
3939
double fixAlpha(-1.),epsilon(1e-8);
40-
bool withinAncestry(false),outputPileup(false),verbose(false);
40+
bool withinAncestry(false),outputPileup(false),verbose(false),disableSanityCheck(false);
4141
int nfiles(0),seed(12345),nPC(2),nthread(4);
4242
paramList pl;
4343
BEGIN_LONG_PARAMS(longParameters)
@@ -59,6 +59,8 @@ int execute(int argc, char **argv) {
5959
"Options to adjust model selection and parameters")
6060
LONG_PARAM("WithinAncestry", &withinAncestry,
6161
"[Bool] Enabling withinAncestry assume target sample and contamination source are from the same populations,[default:BetweenAncestry] otherwise")
62+
LONG_PARAM("DisableSanityCheck", &disableSanityCheck,
63+
"[Bool] Disable marker quality sanity check(no marker filtering)[default:false]")
6264
LONG_INT_PARAM("NumPC", &nPC,
6365
"[Int] Set number of PCs to infer Allele Frequency[optional]")
6466
LONG_STRING_PARAM("FixPC", &fixPC, "[String] Input fixed PCs to estimate Alpha[format PC1:PC2:PC3...]")
@@ -210,13 +212,16 @@ int execute(int argc, char **argv) {
210212
else if(Estimator.viewer.posIndex[item.chr].find(item.end)==Estimator.viewer.posIndex[item.chr].end())
211213
continue;
212214

213-
fout<<item.chr<<"\t"<<item.end<<"\t"<<Estimator.ChooseBed[item.chr][item.end].first<<"\t"<<Estimator.viewer.GetBaseInfoAt(item.chr,item.end).size()<<"\t";
214-
for(auto base:Estimator.viewer.GetBaseInfoAt(item.chr,item.end))
215-
fout<<base;
216-
fout<<"\t";
217-
for(auto qual:Estimator.viewer.GetQualInfoAt(item.chr,item.end))
218-
fout<<qual;
219-
fout<<std::endl;
215+
if(Estimator.viewer.GetBaseInfoAt(item.chr,item.end).size() > 0) {
216+
fout << item.chr << "\t" << item.end << "\t" << Estimator.ChooseBed[item.chr][item.end].first << "\t"
217+
<< Estimator.viewer.GetBaseInfoAt(item.chr, item.end).size() << "\t";
218+
for (auto base:Estimator.viewer.GetBaseInfoAt(item.chr, item.end))
219+
fout << base;
220+
fout << "\t";
221+
for (auto qual:Estimator.viewer.GetQualInfoAt(item.chr, item.end))
222+
fout << qual;
223+
fout << std::endl;
224+
}
220225
}
221226
fout.close();
222227
if(!fout)
@@ -226,11 +231,13 @@ int execute(int argc, char **argv) {
226231
}
227232
}
228233

229-
if(Estimator.IsSanityCheckOK())
230-
notice("Passing Marker Sanity Check...");
231-
else {
232-
warning("Insufficient Available markers, check input bam depth distribution in output pileup file after specifying --OutputPileup");
233-
exit(EXIT_FAILURE);
234+
if(!disableSanityCheck) {
235+
if (Estimator.IsSanityCheckOK())
236+
notice("Passing Marker Sanity Check...");
237+
else {
238+
warning("Insufficient Available markers, check input bam depth distribution in output pileup file after specifying --OutputPileup");
239+
exit(EXIT_FAILURE);
240+
}
234241
}
235242

236243
Estimator.OptimizeLLK(outputPrefix);
@@ -245,8 +252,11 @@ int execute(int argc, char **argv) {
245252
exit(EXIT_FAILURE);
246253
}
247254
fout << headers << std::endl;
248-
fout << Estimator.viewer.SEQ_SM << "\tNA\tNA\t" << Estimator.NumMarker << "\t" << floor((double)Estimator.viewer.numBases/Estimator.NumMarker+0.5)
249-
<< "\t" << Estimator.viewer.avgDepth << "\t"
255+
fout << Estimator.viewer.SEQ_SM << "\tNA\tNA\t" << Estimator.NumMarker << "\t";
256+
if(Estimator.isPileupInput)
257+
fout <<"NA";
258+
else fout<<Estimator.viewer.numBases;
259+
fout << "\t" << Estimator.viewer.avgDepth << "\t"
250260
<< ((Estimator.fn.globalAlpha < 0.5) ? Estimator.fn.globalAlpha : (1.f - Estimator.fn.globalAlpha)) << "\t"
251261
<< -Estimator.fn.llk1 << "\t" << -Estimator.fn.llk0 << "\t" << "NA\tNA\t"
252262
<< "NA\tNA\tNA\tNA\tNA\t"
@@ -284,6 +294,14 @@ int main(int argc, char** argv) {
284294
std::cerr << errorMsg << std::endl;
285295
return returnVal;
286296
}
297+
catch(std::exception& e)
298+
{
299+
returnVal = -1;
300+
std::string errorMsg = "Exiting due to ERROR:\n\t";
301+
errorMsg += e.what();
302+
std::cerr << errorMsg << std::endl;
303+
return returnVal;
304+
}
287305
compStatus = returnVal;
288306
PhoneHome::completionStatus(compStatus.c_str(),"VerifyBamID2");
289307
return(returnVal);

0 commit comments

Comments
 (0)