Skip to content

Commit

Permalink
Modified VariantRecalibrator to only use a deduped annotations list. …
Browse files Browse the repository at this point in the history
…This resolves issues that can result from differences in the number of annotations in the original and deduped lists. Also fixed the logger warning that prints the duplicate annotations.
  • Loading branch information
KevinCLydon committed Feb 28, 2023
1 parent 24f62de commit e0f07ef
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public VariantDataManager( final List<String> annotationKeys, final VariantRecal
this.data = Collections.emptyList();
final List<String> uniqueAnnotations = annotationKeys.stream().distinct().collect(Collectors.toList());
if (annotationKeys.size() != uniqueAnnotations.size()) {
logger.warn("Ignoring duplicate annotations for recalibration %s.", Utils.getDuplicatedItems(annotationKeys));
logger.warn("Ignoring duplicate annotations for recalibration {}", Utils.getDuplicatedItems(annotationKeys));
}
this.annotationKeys = new ArrayList<>( uniqueAnnotations );
this.VRAC = VRAC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ public Object onTraversalSuccess() {
engine.evaluateData(dataManager.getData(), goodModel, false);
if (goodModel.failedToConverge) {
if (outputModel != null) {
final GATKReport report = writeModelReport(goodModel, null, USE_ANNOTATIONS);
final GATKReport report = writeModelReport(goodModel, null, dataManager.getAnnotationKeys());
saveModelReport(report, outputModel);
}
throw new UserException.VQSRPositiveModelFailure("Positive training model failed to converge. One or more annotations " +
Expand All @@ -682,7 +682,7 @@ public Object onTraversalSuccess() {
engine.evaluateData(dataManager.getData(), badModel, true);

if (outputModel != null) {
final GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
final GATKReport report = writeModelReport(goodModel, badModel, dataManager.getAnnotationKeys());
saveModelReport(report, outputModel);
}

Expand Down Expand Up @@ -715,7 +715,7 @@ public Object onTraversalSuccess() {
goodModel,
badModel,
0.0,
dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
dataManager.getAnnotationKeys().toArray(new String[dataManager.getAnnotationKeys().size()]));
}

if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,22 @@ public class VariantRecalibratorIntegrationTest extends CommandLineProgramTest {
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false"
};

final private String[] variantRecalibratorSamplingParamsWithDupes = {
" --variant " + getLargeVQSRTestDataDir() + "phase1.projectConsensus.chr20.1M-10M.raw.snps.vcf" +
" -L 20:1,000,000-10,000,000" +
" --resource:known,known=true,prior=10.0 " + getLargeVQSRTestDataDir() + "dbsnp_132_b37.leftAligned.20.1M-10M.vcf" +
" --resource:truth_training1,truth=true,training=true,prior=15.0 " + getLargeVQSRTestDataDir() + "sites_r27_nr.b37_fwd.20.1M-10M.vcf" +
" --resource:truth_training2,training=true,truth=true,prior=12.0 " + getLargeVQSRTestDataDir() + "Omni25_sites_1525_samples.b37.20.1M-10M.vcf" +
" -an QD -an HaplotypeScore -an HRun -an QD" +
" --trust-all-polymorphic" + // for speed
" --output %s" +
" -tranches-file %s" +
" --output-model " + modelReportFilename +
" -mode SNP --max-gaussians 3" + //reduce max gaussians so we have negative training data with the sampled input
" -sample-every 2" +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false"
};

@Override
public String getToolTestDataDir(){
return toolsTestDir + "walkers/VQSR/";
Expand Down Expand Up @@ -404,7 +420,7 @@ public void testVariantRecalibratorSampling() throws IOException {
public void testVariantRecalibratorRScriptOutput() throws IOException {
final String inputFile = getLargeVQSRTestDataDir() + "phase1.projectConsensus.chr20.1M-10M.raw.snps.vcf";
final File unrunRscript = createTempFile("rscriptOutput", ".R");
final String args = StringUtils.join(variantRecalibratorSamplingParams, " ");
final String args = StringUtils.join(variantRecalibratorSamplingParamsWithDupes, " ");

final IntegrationTestSpec spec = new IntegrationTestSpec(
args +
Expand Down

0 comments on commit e0f07ef

Please sign in to comment.