Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avro test #7192

Merged
merged 20 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package org.broadinstitute.hellbender.tools.variantdb.nextgen;

import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.avro.generic.GenericRecord;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Advanced;
Expand All @@ -13,12 +11,9 @@
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.variantdb.CommonCode;
import org.broadinstitute.hellbender.tools.variantdb.nextgen.FilterSensitivityTools;
import org.broadinstitute.hellbender.tools.variantdb.SampleList;
import org.broadinstitute.hellbender.tools.variantdb.SchemaUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.bigquery.StorageAPIAvroReader;
import org.broadinstitute.hellbender.utils.bigquery.TableReference;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.*;
Expand Down Expand Up @@ -52,13 +47,15 @@ public class ExtractCohort extends ExtractTool {
@Argument(
fullName = "cohort-extract-table",
doc = "Fully qualified name of the table where the cohort data exists (already subsetted)",
optional = false
mutex = {"cohort-avro-file-name"},
optional = true
mmorgantaylor marked this conversation as resolved.
Show resolved Hide resolved
)
private String cohortTable = null;

@Argument(
fullName = "cohort-avro-file-name",
doc = "Path of the cohort avro file",
mutex = {"cohort-extract-table"},
optional = true
)
private String cohortAvroFileName = null;
Expand All @@ -79,31 +76,31 @@ public class ExtractCohort extends ExtractTool {


@Argument(
fullName="snps-truth-sensitivity-filter-level",
doc="The truth sensitivity level at which to start filtering SNPs",
optional=true
fullName ="snps-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering SNPs",
optional = true
)
private Double truthSensitivitySNPThreshold = null;

@Argument(
fullName="indels-truth-sensitivity-filter-level",
doc="The truth sensitivity level at which to start filtering INDELs",
optional=true
fullName = "indels-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering INDELs",
optional = true
)
private Double truthSensitivityINDELThreshold = null;

@Advanced
@Argument(
fullName="snps-lod-score-cutoff",
doc="The VQSLOD score below which to start filtering SNPs",
optional=true)
fullName = "snps-lod-score-cutoff",
doc = "The VQSLOD score below which to start filtering SNPs",
optional = true)
private Double vqsLodSNPThreshold = null;

@Advanced
@Argument(
fullName="indels-lod-score-cutoff",
doc="The VQSLOD score below which to start filtering INDELs",
optional=true)
fullName = "indels-lod-score-cutoff",
doc = "The VQSLOD score below which to start filtering INDELs",
optional = true)
private Double vqsLodINDELThreshold = null;


Expand Down Expand Up @@ -145,6 +142,20 @@ protected void onStartup() {
throw new UserException("min-location and max-location should not be used together with intervals (-L).");
}

// if there is a avro file, the BQ specific parameters are unnecessary,
// but they all are required if there is no avro file
if (cohortAvroFileName == null && (projectID == null || cohortTable == null)) {
throw new UserException("Project id (--project-id) and cohort table (--cohort-extract-table) are required " +
"if no avro file (--cohort-avro-file-name) is provided.");
}

// if there is a sample file, the BQ specific parameters are unnecessary,
// but without a sample file, both a sample-table and a project-id are needed
if (sampleFileName == null && (projectID == null || sampleTableName == null)) {
throw new UserException("Project id (--project-id) and sample table (--sample-table) are required " +
"if no sample file (--sample-file) is provided.");
}

engine = new ExtractCohortEngine(
projectID,
vcfWriter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public ExtractCohortEngine(final String projectID,
this.sampleNames = sampleNames;
this.mode = mode;

this.cohortTableRef = new TableReference(cohortTableName, SchemaUtils.COHORT_FIELDS);
this.cohortTableRef = cohortTableName == null || "".equals(cohortTableName) ? null : new TableReference(cohortTableName, SchemaUtils.COHORT_FIELDS);
this.cohortAvroFileName = cohortAvroFileName;
this.traversalIntervals = traversalIntervals;
this.minLocation = minLocation;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,24 @@ public enum QueryMode {
@Argument(
fullName = "project-id",
doc = "ID of the Google Cloud project to use when executing queries",
optional = false
optional = true
)
protected String projectID = null;

@Argument(
fullName = "sample-table",
doc = "Fully qualified name of a bigquery table containing a single column `sample` that describes the full list of samples to extract",
optional = true
optional = true,
mutex={"sample-file"}
)
protected String sampleTableName = null;

@Argument(
fullName = "sample-file",
doc = "Alternative to `sample-table`. Pass in a (sample_id,sample_name) CSV that describes the full list of samples to extract. No header",
optional = true
optional = true,
mutex={"sample-table"}

)
protected File sampleFileName = null;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.broadinstitute.hellbender.tools.variantdb.nextgen;

import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.annotations.Test;

import java.io.File;


class ExtractCohortTest extends CommandLineProgramTest {
private final String prefix = getToolTestDataDir();
private final String cohortAvroFileName = prefix +"chr20_subset_3_samples.avro";
private final String sampleFile = prefix +"sample_list";

@Test
public void testFinalVCFfromAvro() throws Exception {
// To create the expectedVCF file (of expected output) --create a temp table in BQ with the following query
// and then export it through the BQ GUI as an avro file into GCS.
// SELECT * FROM `spec-ops-aou.anvil_100_for_testing.exported_cohort_all_samples`
// where location < 20000000200000 and location >= 20000000100000
// and (sample_name="HG00405" or sample_name="HG00418" or sample_name="HG00408")
final File expectedVCF = getTestFile("expected.vcf");

// create a temporary file (that will get cleaned up after the test has run) to hold the output data in
final File outputVCF = createTempFile("output", "vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args
.add("mode", "GENOMES")
.add("ref-version", 38)
.add("query-mode", "LOCAL_SORT")
.add("R", hg38Reference)
.add("O", outputVCF.getAbsolutePath())
.add("local-sort-max-records-in-ram", 10000000)
.add("cohort-avro-file-name", cohortAvroFileName)
.add("sample-file", sampleFile);

runCommandLine(args);
IntegrationTestSpec.assertEqualTextFiles(outputVCF, expectedVCF);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you also check the index file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feels like it would be outside the scope of the test no? a GATK issue?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, totally - i just wasn't sure why you were checking in the expected index file in that case

}
}
Binary file not shown.
Loading