Skip to content
This repository has been archived by the owner on Oct 29, 2023. It is now read-only.

Commit

Permalink
Add VariantSimilarity integration test.
Browse files Browse the repository at this point in the history
Also refactored reusable integration test helper code.
  • Loading branch information
deflaux committed Jul 30, 2015
1 parent 49fe347 commit 6ee824f
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package com.google.cloud.genomics.dataflow.functions;

import com.google.api.client.util.Lists;
import com.google.api.client.util.Preconditions;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.common.collect.BiMap;
Expand Down Expand Up @@ -71,6 +72,16 @@ public GraphResult(String name, double x, double y) {
@Override public String toString() {
return String.format("%s\t\t%s\t%s", name, graphX, graphY);
}

public static GraphResult fromString(String tsv) {
Preconditions.checkNotNull(tsv);
String[] tokens = tsv.split("[\\s\t]+");
Preconditions.checkState(3 == tokens.length,
"Expected three values in serialized GraphResult but found %d", tokens.length);
return new GraphResult(tokens[0],
Double.parseDouble(tokens[1]),
Double.parseDouble(tokens[2]));
}
}

private static final PCoAnalysis INSTANCE = new PCoAnalysis();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,103 +15,83 @@
*/
package com.google.cloud.genomics.dataflow.pipelines;

import com.google.api.services.storage.Storage;
import com.google.cloud.dataflow.sdk.options.GcsOptions;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.util.GcsUtil;
import com.google.cloud.dataflow.sdk.util.Transport;
import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;
import java.io.BufferedReader;

import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Writer;
import java.nio.channels.Channels;
import java.security.GeneralSecurityException;

/**
* This test expects you to have:
* -a Google Cloud API key in the GOOGLE_API_KEY environment variable,
* -your Google Cloud project name in TEST_PROJECT,
* -a GCS folder path in TEST_OUTPUT_GCS_FOLDER to store temporary test outputs,
* -a GCS folder path in TEST_STAGING_GCS_FOLDER to store temporary files,
* GCS folder paths should be of the form "gs://bucket/folder/"
* This integration test will read and write to Cloud Storage, and call the Genomics API.
*
* This test will read and write to GCS, and call the Genomics API.
* The following environment variables are required:
* - a Google Cloud API key in GOOGLE_API_KEY,
* - a Google Cloud project name in TEST_PROJECT,
* - a Cloud Storage folder path in TEST_OUTPUT_GCS_FOLDER to store temporary test outputs,
* - a Cloud Storage folder path in TEST_STAGING_GCS_FOLDER to store temporary files,
*
* Cloud Storage folder paths should be of the form "gs://bucket/folder/"
*
* When doing e.g. mvn install, you can skip integration tests using:
* mvn install -DskipITs
* mvn install -DskipITs
*
* To run one test:
* mvn -Dit.test=CountReadsITCase#testLocal verify
*
* See also http://maven.apache.org/surefire/maven-failsafe-plugin/examples/single-test.html
*/
@RunWith(JUnit4.class)
public class CountReadsITCase {

final String API_KEY = System.getenv("GOOGLE_API_KEY");
final String TEST_PROJECT = System.getenv("TEST_PROJECT");
final String TEST_OUTPUT_GCS_FOLDER = System.getenv("TEST_OUTPUT_GCS_FOLDER");
final String TEST_STAGING_GCS_FOLDER = System.getenv("TEST_STAGING_GCS_FOLDER");
// This file shouldn't move.
final String TEST_BAM_FNAME = "gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA06985/alignment/NA06985.454.MOSAIK.SRP000033.2009_11.bam";
static final String TEST_BAM_FNAME = "gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA06985/alignment/NA06985.454.MOSAIK.SRP000033.2009_11.bam";
// This is the Readgroupset ID of the same file, in ReadStore. It also shouldn't move.
final String TEST_READGROUPSET = "CMvnhpKTFhDvp9zAvYj66AY";
static final String TEST_READGROUPSET = "CMvnhpKTFhDvp9zAvYj66AY";
// The region where we're counting reads.
final String TEST_CONTIG = "1:550000:560000";
static final String TEST_CONTIG = "1:550000:560000";
// How many reads are in that region.
final long TEST_EXPECTED = 685;
static final long TEST_EXPECTED = 685;
// In this file there are no unmapped reads, so expecting the same number.
final long TEST_EXPECTED_WITH_UNMAPPED = TEST_EXPECTED;
static final long TEST_EXPECTED_WITH_UNMAPPED = TEST_EXPECTED;

// Same as the above variables, but for the NA12877_S1 dataset.
final String NA12877_S1_BAM_FILENAME = "gs://genomics-public-data/platinum-genomes/bam/NA12877_S1.bam";
final String NA12877_S1_READGROUPSET = "CMvnhpKTFhD3he72j4KZuyc";
final String NA12877_S1_CONTIG = "chr17:41196311:41277499";
final long NA12877_S1_EXPECTED = 45081;
static final String NA12877_S1_BAM_FILENAME = "gs://genomics-public-data/platinum-genomes/bam/NA12877_S1.bam";
static final String NA12877_S1_READGROUPSET = "CMvnhpKTFhD3he72j4KZuyc";
static final String NA12877_S1_CONTIG = "chr17:41196311:41277499";
static final long NA12877_S1_EXPECTED = 45081;
// How many reads are in that region if we take unmapped ones too
final long NA12877_S1_EXPECTED_WITH_UNMAPPED = 45142;

@Before
public void voidEnsureEnvVar() {
Assert.assertNotNull("You must set the GOOGLE_API_KEY environment variable for this test.", API_KEY);
Assert.assertNotNull("You must set the TEST_PROJECT environment variable for this test.", TEST_PROJECT);
Assert.assertNotNull("You must set the TEST_OUTPUT_GCS_FOLDER environment variable for this test.", TEST_OUTPUT_GCS_FOLDER);
Assert.assertTrue("TEST_OUTPUT_GCS_FOLDER must end with '/'", TEST_OUTPUT_GCS_FOLDER.endsWith("/"));
Assert.assertTrue("TEST_OUTPUT_GCS_FOLDER must start with 'gs://'", TEST_OUTPUT_GCS_FOLDER.startsWith("gs://"));
Assert.assertNotNull("You must set the TEST_STAGING_GCS_FOLDER environment variable for this test.", TEST_STAGING_GCS_FOLDER);
Assert.assertTrue("TEST_STAGING_GCS_FOLDER must start with 'gs://'", TEST_STAGING_GCS_FOLDER.startsWith("gs://"));
// we don't care how TEST_STAGING_GCS_FOLDER ends, so no check for it.
static final long NA12877_S1_EXPECTED_WITH_UNMAPPED = 45142;

static IntegrationTestHelper helper;

@BeforeClass
public static void setUpBeforeClass() {
helper = new IntegrationTestHelper();
}

private void testLocalBase(String outputFilename, String contig, String bamFilename, long expectedCount,
boolean includeUnmapped) throws Exception {
final String OUTPUT = TEST_OUTPUT_GCS_FOLDER + outputFilename;
final String OUTPUT = helper.TEST_OUTPUT_GCS_FOLDER + outputFilename;
String[] ARGS = {
"--apiKey=" + API_KEY,
"--project=" + TEST_PROJECT,
"--apiKey=" + helper.API_KEY,
"--output=" + OUTPUT,
"--references=" + contig,
"--includeUnmapped=" + includeUnmapped,
"--BAMFilePath=" + bamFilename,
};
GenomicsOptions popts = PipelineOptionsFactory.create().as(GenomicsOptions.class);
popts.setApiKey(API_KEY);
GcsUtil gcsUtil = new GcsUtil.GcsUtilFactory().create(popts);
try {
touchOutput(gcsUtil, OUTPUT);
helper.touchOutput(OUTPUT);

CountReads.main(ARGS);

BufferedReader reader = new BufferedReader(Channels.newReader(gcsUtil.open(GcsPath.fromUri(OUTPUT)), "UTF-8"));
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());

Assert.assertEquals(expectedCount, got);
} finally {
GcsDelete(popts, OUTPUT);
helper.deleteOutput(OUTPUT);
}
}

Expand Down Expand Up @@ -144,31 +124,28 @@ public void testLocalNA12877_S1_UNMAPPED() throws Exception {
}

private void testCloudBase(String outputFilename, String contig, String bamFilename, long expectedCount) throws Exception {
final String OUTPUT = TEST_OUTPUT_GCS_FOLDER + outputFilename;
final String OUTPUT = helper.TEST_OUTPUT_GCS_FOLDER + outputFilename;
String[] ARGS = {
"--apiKey=" + API_KEY,
"--project=" + TEST_PROJECT,
"--apiKey=" + helper.API_KEY,
"--project=" + helper.TEST_PROJECT,
"--output=" + OUTPUT,
"--numWorkers=2",
"--runner=BlockingDataflowPipelineRunner",
"--stagingLocation=" + TEST_STAGING_GCS_FOLDER,
"--stagingLocation=" + helper.TEST_STAGING_GCS_FOLDER,
"--references=" + contig,
"--BAMFilePath=" + bamFilename
};
GenomicsOptions popts = PipelineOptionsFactory.create().as(GenomicsOptions.class);
popts.setApiKey(API_KEY);
GcsUtil gcsUtil = new GcsUtil.GcsUtilFactory().create(popts);
try {
touchOutput(gcsUtil, OUTPUT);
helper.touchOutput(OUTPUT);

CountReads.main(ARGS);

BufferedReader reader = new BufferedReader(Channels.newReader(gcsUtil.open(GcsPath.fromUri(OUTPUT)), "UTF-8"));
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());

Assert.assertEquals(expectedCount, got);
} finally {
GcsDelete(popts, OUTPUT);
helper.deleteOutput(OUTPUT);
}
}

Expand All @@ -188,31 +165,28 @@ public void testCloudNA12877_S1() throws Exception {
}

public void testCloudWithAPIBase(String outputFilename, String contig, String readGroupSetId, long expectedCount) throws Exception {
final String OUTPUT = TEST_OUTPUT_GCS_FOLDER + outputFilename;
final String OUTPUT = helper.TEST_OUTPUT_GCS_FOLDER + outputFilename;
String[] ARGS = {
"--apiKey=" + API_KEY,
"--project=" + TEST_PROJECT,
"--apiKey=" + helper.API_KEY,
"--project=" + helper.TEST_PROJECT,
"--output=" + OUTPUT,
"--numWorkers=2",
"--runner=BlockingDataflowPipelineRunner",
"--stagingLocation=" + TEST_STAGING_GCS_FOLDER,
"--stagingLocation=" + helper.TEST_STAGING_GCS_FOLDER,
"--references=" + contig,
"--readGroupSetId=" + readGroupSetId
};
GenomicsOptions popts = PipelineOptionsFactory.create().as(GenomicsOptions.class);
popts.setApiKey(API_KEY);
GcsUtil gcsUtil = new GcsUtil.GcsUtilFactory().create(popts);
try {
touchOutput(gcsUtil, OUTPUT);
helper.touchOutput(OUTPUT);

CountReads.main(ARGS);

BufferedReader reader = new BufferedReader(Channels.newReader(gcsUtil.open(GcsPath.fromUri(OUTPUT)), "UTF-8"));
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());

Assert.assertEquals(expectedCount, got);
} finally {
GcsDelete(popts, OUTPUT);
helper.deleteOutput(OUTPUT);
}
}

Expand All @@ -231,22 +205,6 @@ public void testCloudWithAPI_NA12877_S1() throws Exception {
NA12877_S1_CONTIG, NA12877_S1_READGROUPSET, NA12877_S1_EXPECTED);
}

/**
* Make sure we can get to the output.
*/
private void touchOutput(GcsUtil gcsUtil, String outputGcsPath) throws IOException {
try (Writer writer = Channels.newWriter(gcsUtil.create(GcsPath.fromUri(outputGcsPath), "text/plain"), "UTF-8")) {
writer.write("output will go here");
}
}

private static void GcsDelete(PipelineOptions popts, String gcsPath) throws IOException, GeneralSecurityException {
// boilerplate
GcsPath path = GcsPath.fromUri(gcsPath);
GcsOptions gcsOptions = (GcsOptions)popts.as(GcsOptions.class);
Storage storage = Transport.newStorageClient(gcsOptions).build();
// do the actual work
storage.objects().delete(path.getBucket(), path.getObject()).execute();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.pipelines;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Writer;
import java.nio.channels.Channels;
import java.security.GeneralSecurityException;

import org.junit.Assert;

import com.google.api.services.storage.Storage;
import com.google.cloud.dataflow.sdk.options.GcsOptions;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.util.GcsUtil;
import com.google.cloud.dataflow.sdk.util.Transport;
import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;

public class IntegrationTestHelper {

// Test configuration constants
final String API_KEY = System.getenv("GOOGLE_API_KEY");
final String TEST_PROJECT = System.getenv("TEST_PROJECT");
final String TEST_OUTPUT_GCS_FOLDER = System.getenv("TEST_OUTPUT_GCS_FOLDER");
final String TEST_STAGING_GCS_FOLDER = System.getenv("TEST_STAGING_GCS_FOLDER");

// Variant test configuration constants
static final String PLATINUM_GENOMES_DATASET = "3049512673186936334";
static final String PLATINUM_GENOMES_BRCA1_REFERENCES = "chr17:41196311:41277499";
static final int PLATINUM_GENOMES_NUMBER_OF_SAMPLES = 17;

GenomicsOptions popts = PipelineOptionsFactory.create().as(GenomicsOptions.class);
GcsUtil gcsUtil;

public IntegrationTestHelper() {
Assert.assertNotNull("You must set the GOOGLE_API_KEY environment variable for this test.", API_KEY);
Assert.assertNotNull("You must set the TEST_PROJECT environment variable for this test.", TEST_PROJECT);
Assert.assertNotNull("You must set the TEST_OUTPUT_GCS_FOLDER environment variable for this test.", TEST_OUTPUT_GCS_FOLDER);
Assert.assertNotNull("You must set the TEST_STAGING_GCS_FOLDER environment variable for this test.", TEST_STAGING_GCS_FOLDER);
Assert.assertTrue("TEST_OUTPUT_GCS_FOLDER must end with '/'", TEST_OUTPUT_GCS_FOLDER.endsWith("/"));
Assert.assertTrue("TEST_OUTPUT_GCS_FOLDER must start with 'gs://'", TEST_OUTPUT_GCS_FOLDER.startsWith("gs://"));
Assert.assertTrue("TEST_STAGING_GCS_FOLDER must start with 'gs://'", TEST_STAGING_GCS_FOLDER.startsWith("gs://"));
// we don't care how TEST_STAGING_GCS_FOLDER ends, so no check for it.

popts.setApiKey(API_KEY);
gcsUtil = new GcsUtil.GcsUtilFactory().create(popts);
}

/**
* Make sure we can get to the output.
*
* Also write a sentinel value to the file. This protects against the possibility of prior
* test output causing a newly failing test to appear to succeed.
*/
public void touchOutput(String outputPath) throws IOException {
try (Writer writer = Channels.newWriter(gcsUtil.create(GcsPath.fromUri(outputPath), "text/plain"), "UTF-8")) {
writer.write("output will go here");
}
}

/**
* Open test output for reading.
*/
public BufferedReader openOutput(String outputPath) throws IOException {
return new BufferedReader(Channels.newReader(gcsUtil.open(GcsPath.fromUri(outputPath)), "UTF-8"));
}

/**
* Delete test output.
*/
public void deleteOutput(String outputPath) throws IOException, GeneralSecurityException {
// boilerplate
GcsPath path = GcsPath.fromUri(outputPath);
GcsOptions gcsOptions = (GcsOptions)popts.as(GcsOptions.class);
Storage storage = Transport.newStorageClient(gcsOptions).build();
// do the actual work
storage.objects().delete(path.getBucket(), path.getObject()).execute();
}

}
Loading

0 comments on commit 6ee824f

Please sign in to comment.