Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutect2 WDL and GetSampleName can handle multiple sample names in BAM headers #8859

Merged
merged 2 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions scripts/mutect2_wdl/mutect2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -442,22 +442,20 @@ task M2 {
touch bamout.bam
touch f1r2.tar.gz
touch dataset.txt
echo "" > normal_name.txt

gatk --java-options "-Xmx~{command_mem}m" GetSampleName -R ~{ref_fasta} -I ~{tumor_reads} -O tumor_name.txt -encode \
~{"--gcs-project-for-requester-pays " + gcs_project_for_requester_pays}
tumor_command_line="-I ~{tumor_reads} -tumor `cat tumor_name.txt`"

if [[ ! -z "~{normal_reads}" ]]; then
gatk --java-options "-Xmx~{command_mem}m" GetSampleName -R ~{ref_fasta} -I ~{normal_reads} -O normal_name.txt -encode \
gatk --java-options "-Xmx~{command_mem}m" GetSampleName -R ~{ref_fasta} -I ~{normal_reads} -O normal_names.txt -encode \
~{"--gcs-project-for-requester-pays " + gcs_project_for_requester_pays}
normal_command_line="-I ~{normal_reads} -normal `cat normal_name.txt`"
# add "-normal " to the start of each line and " " to the end, then remove newlines
# to get -normal sample1 -normal sample2 etc
normal_sample_line=`awk '{ print "-normal", $0 }' normal_names.txt | tr '\n' ' '`
fi

gatk --java-options "-Xmx~{command_mem}m" Mutect2 \
-R ~{ref_fasta} \
$tumor_command_line \
$normal_command_line \
-I ~{tumor_reads} \
~{"-I " + normal_reads} \
$normal_sample_line \
~{"--germline-resource " + gnomad} \
~{"-pon " + pon} \
~{"-L " + intervals} \
Expand Down Expand Up @@ -513,8 +511,6 @@ task M2 {
File unfiltered_vcf = "~{output_vcf}"
File unfiltered_vcf_idx = "~{output_vcf_idx}"
File output_bamOut = "bamout.bam"
String tumor_sample = read_string("tumor_name.txt")
String normal_sample = read_string("normal_name.txt")
File stats = "~{output_stats}"
File f1r2_counts = "f1r2.tar.gz"
Array[File] tumor_pileups = glob("*tumor-pileups.table")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,16 @@ public void onTraversalStart() {
}

final List<String> sampleNames = getHeaderForReads().getReadGroups().stream().map(s -> s.getSample()).distinct().collect(Collectors.toList());
if (sampleNames.size() > 1) {
throw new UserException.BadInput("The given input bam has more than one unique sample name: " + StringUtils.join(sampleNames, ", "));
}

if (sampleNames.size() == 0) {
throw new UserException.BadInput("The given bam input has no sample names.");
}

try (final OutputStreamWriter fileWriter = new OutputStreamWriter(outputSampleNameFile.getOutputStream())) {
final String rawSample = sampleNames.get(0);
final String outputSample = urlEncode ? IOUtils.urlEncode(rawSample) : rawSample;
fileWriter.write(outputSample);
final String outputSamplesOnSeparateLines = sampleNames.stream()
.map(rawSample -> urlEncode ? IOUtils.urlEncode(rawSample) : rawSample)
.collect(Collectors.joining("\n"));
fileWriter.write(outputSamplesOnSeparateLines);
} catch (final IOException ioe) {
throw new UserException(String.format("Could not write to output file %s.", outputSampleNameFile), ioe);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@ public void testUrlEncoding() throws IOException {
Assert.assertTrue(Files.readAllLines(outputFile.toPath()).stream().filter(n -> n.equals("Hi%2CMom%21")).count() == 1);
}

@Test(expectedExceptions = UserException.class)
public void testMultiSampleBam() {
@Test
public void testMultiSampleBam() throws IOException {
final File outputFile = createTempFile("get-sample-name-ms", ".txt");
final String[] arguments = {
"-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAD_MULTI_SAMPLE_BAM_FILE.getAbsolutePath(),
"-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputFile.getAbsolutePath()
};
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
Assert.assertTrue(outputFile.length() > 0);
Assert.assertTrue(Files.readAllLines(outputFile.toPath()).stream().count() == 3);
}
}
Loading