Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update GATK version and support new java versions #224

Merged
merged 9 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/gradle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: '8'
java-version: '17'
distribution: 'adopt'
- name: Cache
uses: actions/cache@v3
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: '8'
java-version: '17'
distribution: 'adopt'
- name: Grant execute permission for gradlew
run: chmod +x gradlew
Expand Down Expand Up @@ -41,13 +41,13 @@ jobs:
enable_jekyll: true
- name: Docker meta
id: docker_meta
uses: crazy-max/ghaction-docker-meta@v4.3.0
uses: docker/metadata-action@v4
with:
images: ghcr.io/bimberlab/discvrseq
tag-sha: true
tag-semver: |
{{version}}
{{major}}.{{minor}}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
- name: Set up QEMU
uses: docker/setup-qemu-action@v2.1.0
- name: Set up Docker Buildx
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM adoptopenjdk/openjdk8
FROM adoptopenjdk/openjdk17

# See: https://stackoverflow.com/questions/44331836/apt-get-install-tzdata-noninteractive
ENV DEBIAN_FRONTEND=noninteractive
Expand Down
35 changes: 16 additions & 19 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ apply plugin: 'java'

repositories {
mavenCentral()
jcenter()
maven {
url "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot/"
}
Expand Down Expand Up @@ -52,12 +51,13 @@ configurations {
mainClassName = "com.github." + rootProject.name.toLowerCase() + ".Main"

//see this thread: https://github.com/broadinstitute/gatk/issues/2300#issuecomment-333627036
final gatkVersion = '4.3.0.0'
final htsjdkVersion = System.getProperty('htsjdk.version','3.0.1')
final barclayVersion = System.getProperty('barclay.version','4.0.2')
final gatkVersion = '4.4.0.0'
final htsjdkVersion = System.getProperty('htsjdk.version','3.0.5')
final barclayVersion = System.getProperty('barclay.version','5.0.0')
final luceneVersion = System.getProperty('lucene.version','8.11.2')
final testNGVersion = '7.0.0'
final googleCloudNioDependency = 'com.google.cloud:google-cloud-nio:0.123.25'
final log4j2Version = System.getProperty('log4j2Version', '2.17.1')

final docBuildDir = "$buildDir/docs"
logger.info(docBuildDir)
Expand All @@ -68,8 +68,8 @@ configurations.all {
force 'com.github.samtools:htsjdk:' + htsjdkVersion
// later versions explode Hadoop
// TODO: this is the same in GATK, but we should check if they solve this issue in the future
force 'com.google.protobuf:protobuf-java:3.8.0'
// force testng dependency so we don't pick up a different version via GenomicsDB/GATK
force 'com.google.protobuf:protobuf-java:3.21.6'
// force testng dependency so we don't pick up a different version via GenomicsDB
force 'org.testng:testng:' + testNGVersion
force 'org.broadinstitute:barclay:' + barclayVersion

Expand All @@ -79,16 +79,7 @@ configurations.all {
}
}

// Get the jdk files we need to run javaDoc. We need to use these during compile, testCompile,
// test execution, and toolDoc generation, but we don't want them as part of the runtime
// classpath and we don't want to redistribute them in the uber jar.
final javadocJDKFiles = ToolProvider.getSystemToolClassLoader() == null ? files([]) : files(((URLClassLoader) ToolProvider.getSystemToolClassLoader()).getURLs())

dependencies {
// javadoc utilities; compile/test only to prevent redistribution of sdk jars
compileOnly(javadocJDKFiles)
testImplementation(javadocJDKFiles)

implementation 'org.broadinstitute:barclay:' + barclayVersion
implementation 'org.apache.lucene:lucene-core:' + luceneVersion
implementation 'org.apache.lucene:lucene-queryparser:' + luceneVersion
Expand Down Expand Up @@ -131,6 +122,13 @@ dependencies {
implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.12.0'

implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.12.7.1'

implementation 'org.apache.logging.log4j:log4j-api:' + log4j2Version
implementation 'org.apache.logging.log4j:log4j-core:' + log4j2Version
// include the apache commons-logging bridge that matches the log4j version we use so
// messages that originate with dependencies that use commons-logging (such as jexl)
// are routed to log4j
implementation 'org.apache.logging.log4j:log4j-jcl:' + log4j2Version
}

wrapper {
Expand Down Expand Up @@ -205,8 +203,8 @@ javadoc {

// Generate Online Doc
task toolDoc(type: Javadoc, dependsOn: classes ) {
final File baseDocDir = new File("build/docs")
final File toolDocDir = new File("build/docs/toolDoc")
final File baseDocDir = new File("$buildDir/docs")
final File toolDocDir = new File("$buildDir/docs/toolDoc")
doFirst {
// make sure the output folder exists or we can create it
if (!toolDocDir.exists() && !toolDocDir.mkdirs()) {
Expand Down Expand Up @@ -243,8 +241,7 @@ task toolDoc(type: Javadoc, dependsOn: classes ) {

// The doc process instantiates any documented feature classes, so to run it we need the entire
// runtime classpath, as well as jdk javadoc files such as tools.jar, where com.sun.javadoc lives.
classpath = sourceSets.main.runtimeClasspath + javadocJDKFiles

classpath = sourceSets.main.runtimeClasspath
options.docletpath = classpath as List
options.doclet = "com.github.discvrseq.util.help.DISCVRSeqHelpDoclet"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,71 +1,15 @@
package com.github.discvrseq.util.help;

import com.sun.javadoc.ClassDoc;
import com.sun.javadoc.RootDoc;
import org.broadinstitute.barclay.help.DocWorkUnit;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.barclay.help.GSONWorkUnit;
import org.broadinstitute.hellbender.utils.help.GATKDocWorkUnit;
import org.broadinstitute.hellbender.utils.help.GATKHelpDocWorkUnitHandler;
import jdk.javadoc.doclet.DocletEnvironment;
import org.broadinstitute.hellbender.utils.help.GATKHelpDoclet;

import java.io.IOException;
import java.util.List;
import java.util.Map;

@SuppressWarnings("removal")
public class DISCVRSeqHelpDoclet extends GATKHelpDoclet {
public DISCVRSeqHelpDoclet() {

}

/**
* Create a doclet of the appropriate type and generate the FreeMarker templates properties.
* @param rootDoc
* @throws IOException
*/
public static boolean start(final RootDoc rootDoc) throws IOException {
return new DISCVRSeqHelpDoclet().startProcessDocs(rootDoc);
}

/**
* @return Create and return a DocWorkUnit-derived object to handle documentation
* for the target feature(s) represented by documentedFeature.
*
* @param documentedFeature DocumentedFeature annotation for the target feature
* @param classDoc javadoc classDoc for the target feature
* @param clazz class of the target feature
* @return DocWorkUnit to be used for this feature
*/
@Override
protected DocWorkUnit createWorkUnit(
final DocumentedFeature documentedFeature,
final ClassDoc classDoc,
final Class<?> clazz)
{
return new GATKDocWorkUnit(
new GATKHelpDocWorkUnitHandler(this),
documentedFeature,
classDoc,
clazz);
}

/**
* Create a GSONWorkUnit-derived object that holds our custom data. This method should create the object, and
* propagate any custom javadoc tags from the template map to the newly created GSON object; specifically
* "walkertype", which is pulled from a custom javadoc tag.
*
* @param workUnit work unit for which a GSON object is required
* @param groupMaps
* @param featureMaps
* @return a GSONWorkUnit-derived object for this work unit, populated with any custom values
*/
@Override
protected GSONWorkUnit createGSONWorkUnit(
final DocWorkUnit workUnit,
final List<Map<String, String>> groupMaps,
final List<Map<String, String>> featureMaps)
{
return super.createGSONWorkUnit(workUnit, groupMaps, featureMaps);
public static boolean processDocs(final DocletEnvironment docletEnv) {
return new DISCVRSeqHelpDoclet().run(docletEnv);
}
}
159 changes: 159 additions & 0 deletions src/main/java/com/github/discvrseq/walkers/ExtendedFuncotator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package com.github.discvrseq.walkers;

import com.github.discvrseq.tools.DiscvrSeqProgramGroup;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.funcotator.*;
import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Create functional annotations on given variants cross-referenced by a given set of data sources.
*
* <h3>Usage example:</h3>
* <pre>
* java -jar DISCVRseq.jar ExtendedFuncotator \
* -V input.vcf.gz \
* -O output.annotated.vcf.gz
* </pre>
*/
//@CommandLineProgramProperties(
// summary = "Create functional annotations on given variants cross-referenced by a given set of data sources.\n" +
// "A GATK functional annotation tool (similar functionality to Oncotator).",
// oneLineSummary = "Functional Annotator",
// programGroup = DiscvrSeqProgramGroup.class
//)
//@DocumentedFeature
public class ExtendedFuncotator extends Funcotator {
private static final Logger logger = LogManager.getLogger(ExtendedFuncotator.class);

@Override
public void onTraversalStart() {

// Get our overrides for annotations:
final LinkedHashMap<String, String> annotationDefaultsMap = FuncotatorEngine.splitAnnotationArgsIntoMap(getArguments().annotationDefaults);
final LinkedHashMap<String, String> annotationOverridesMap = FuncotatorEngine.splitAnnotationArgsIntoMap(getArguments().annotationOverrides);

// Get the header for our variants:
final VCFHeader vcfHeader = getHeaderForVariants();

// Create our output renderer:
logger.info("Creating a " + getArguments().outputFormatType + " file for output: " + getArguments().outputFile.toURI());
outputRenderer = new ExtendedVcfOutputRenderer(
this.createVCFWriter(getArguments().outputFile),
funcotatorEngine.getFuncotationFactories(),
vcfHeader,
annotationDefaultsMap,
annotationOverridesMap,
getDefaultToolVCFHeaderLines(),
getArguments().excludedFields,
this.getVersion()
);
}

protected void enqueueAndHandleVariant(final VariantContext variant, final ReferenceContext referenceContext, final FeatureContext featureContext) {

final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(variant, referenceContext, featureContext);

// At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected
outputRenderer.write(variant, funcotationMap);
}

private static final class ExtendedVcfOutputRenderer extends VcfOutputRenderer {
private final VariantContextWriter vcfWriter;

public ExtendedVcfOutputRenderer(final VariantContextWriter vcfWriter,
final List<DataSourceFuncotationFactory> dataSources,
final VCFHeader existingHeader,
final LinkedHashMap<String, String> unaccountedForDefaultAnnotations,
final LinkedHashMap<String, String> unaccountedForOverrideAnnotations,
final Set<VCFHeaderLine> defaultToolVcfHeaderLines,
final Set<String> excludedOutputFields,
final String toolVersion) {
super(vcfWriter, dataSources, existingHeader, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, defaultToolVcfHeaderLines, excludedOutputFields, toolVersion);
this.vcfWriter = vcfWriter;
}

@Override
public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) {

// Create a new variant context builder:
final VariantContextBuilder variantContextOutputBuilder = new VariantContextBuilder(variant);

final StringBuilder funcotatorAnnotationStringBuilder = new StringBuilder();

// Get the old VCF Annotation field and append the new information to it:
final Object existingAnnotation = variant.getAttribute(FUNCOTATOR_VCF_FIELD_NAME, null);
final List<String> existingAlleleAnnotations;
if ( existingAnnotation != null) {
existingAlleleAnnotations = Utils.split(existingAnnotation.toString(), ',');
}
else {
existingAlleleAnnotations = Collections.emptyList();
}

// Go through each allele and add it to the writer separately:
final List<Allele> alternateAlleles = variant.getAlternateAlleles();
for ( int alleleIndex = 0; alleleIndex < alternateAlleles.size() ; ++alleleIndex ) {

final Allele altAllele = alternateAlleles.get(alleleIndex);

if ( alleleIndex < existingAlleleAnnotations.size() ) {
funcotatorAnnotationStringBuilder.append( existingAlleleAnnotations.get(alleleIndex) );
funcotatorAnnotationStringBuilder.append(FIELD_DELIMITER);
}

// for (final String txId : txToFuncotationMap.getTranscriptList()) {
// funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER);
// final List<Funcotation> funcotations = txToFuncotationMap.get(txId);
// final Funcotation manualAnnotationFuncotation = createManualAnnotationFuncotation(altAllele);
//
// funcotatorAnnotationStringBuilder.append(
// Stream.concat(funcotations.stream(), Stream.of(manualAnnotationFuncotation))
// .filter(f -> f.getAltAllele().equals(altAllele))
// .filter(f -> f.getFieldNames().size() > 0)
// .filter(f -> !f.getDataSourceName().equals(FuncotatorConstants.DATASOURCE_NAME_FOR_INPUT_VCFS))
// .map(VcfOutputRenderer::adjustIndelAlleleInformation)
// .map(f -> FuncotatorUtils.renderSanitizedFuncotationForVcf(f, finalFuncotationFieldNames))
// .collect(Collectors.joining(FIELD_DELIMITER))
// );
//
// funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER);
// }
// // We have a trailing "#" - we need to remove it:
// funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);
// funcotatorAnnotationStringBuilder.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
}

// We have a trailing "," - we need to remove it:
funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);

// Add our new annotation:
variantContextOutputBuilder.attribute(FUNCOTATOR_VCF_FIELD_NAME, funcotatorAnnotationStringBuilder.toString());

// Add the genotypes from the variant:
variantContextOutputBuilder.genotypes( variant.getGenotypes() );

// Render and add our VCF line:
vcfWriter.add( variantContextOutputBuilder.make() );
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ public Collection<Annotation> makeVariantAnnotations() {

public static class DiscvrAnnotationPluginDescriptor extends GATKAnnotationPluginDescriptor
{
private static final long serialVersionUID = 1L;

@ArgumentCollection
public GenotypeConcordanceArgumentCollection genotypeConcordanceArgumentCollection = new GenotypeConcordanceArgumentCollection();

Expand Down