BimberLab · bbimber · Mar 29, 2023 · Feb 1, 2023 · Mar 17, 2023 · Mar 17, 2023
diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml
@@ -14,7 +14,7 @@ jobs:
     - name: Set up JDK
       uses: actions/setup-java@v3
       with:
-        java-version: '8'
+        java-version: '17'
         distribution: 'adopt'
     - name: Cache
       uses: actions/cache@v3

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -12,7 +12,7 @@ jobs:
       - name: Set up JDK
         uses: actions/setup-java@v3
         with:
-          java-version: '8'
+          java-version: '17'
           distribution: 'adopt'
       - name: Grant execute permission for gradlew
         run: chmod +x gradlew
@@ -41,13 +41,13 @@ jobs:
           enable_jekyll: true
       - name: Docker meta
         id: docker_meta
-        uses: crazy-max/ghaction-docker-meta@v4.3.0
+        uses: docker/metadata-action@v4
         with:
           images: ghcr.io/bimberlab/discvrseq
-          tag-sha: true
-          tag-semver: |
-            {{version}}
-            {{major}}.{{minor}}
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v2.1.0
       - name: Set up Docker Buildx

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM adoptopenjdk/openjdk8
+FROM adoptopenjdk/openjdk17
 
 # See: https://stackoverflow.com/questions/44331836/apt-get-install-tzdata-noninteractive
 ENV DEBIAN_FRONTEND=noninteractive

diff --git a/build.gradle b/build.gradle
@@ -20,7 +20,6 @@ apply plugin: 'java'
 
 repositories {
     mavenCentral()
-    jcenter()
     maven {
         url "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot/"
     }
@@ -52,12 +51,13 @@ configurations {
 mainClassName = "com.github." + rootProject.name.toLowerCase() + ".Main"
 
 //see this thread: https://github.com/broadinstitute/gatk/issues/2300#issuecomment-333627036
-final gatkVersion = '4.3.0.0'
-final htsjdkVersion = System.getProperty('htsjdk.version','3.0.1')
-final barclayVersion = System.getProperty('barclay.version','4.0.2')
+final gatkVersion = '4.4.0.0'
+final htsjdkVersion = System.getProperty('htsjdk.version','3.0.5')
+final barclayVersion = System.getProperty('barclay.version','5.0.0')
 final luceneVersion = System.getProperty('lucene.version','8.11.2')
 final testNGVersion = '7.0.0'
 final googleCloudNioDependency = 'com.google.cloud:google-cloud-nio:0.123.25'
+final log4j2Version = System.getProperty('log4j2Version', '2.17.1')
 
 final docBuildDir = "$buildDir/docs"
 logger.info(docBuildDir)
@@ -68,8 +68,8 @@ configurations.all {
         force 'com.github.samtools:htsjdk:' + htsjdkVersion
         // later versions explode Hadoop
         // TODO: this is the same in GATK, but we should check if they solve this issue in the future
-        force 'com.google.protobuf:protobuf-java:3.8.0'
-        // force testng dependency so we don't pick up a different version via GenomicsDB/GATK
+        force 'com.google.protobuf:protobuf-java:3.21.6'
+        // force testng dependency so we don't pick up a different version via GenomicsDB
         force 'org.testng:testng:' + testNGVersion
         force 'org.broadinstitute:barclay:' + barclayVersion
 
@@ -79,16 +79,7 @@ configurations.all {
     }
 }
 
-// Get the jdk files we need to run javaDoc. We need to use these during compile, testCompile,
-// test execution, and toolDoc generation, but we don't want them as part of the runtime
-// classpath and we don't want to redistribute them in the uber jar.
-final javadocJDKFiles = ToolProvider.getSystemToolClassLoader() == null ? files([]) : files(((URLClassLoader) ToolProvider.getSystemToolClassLoader()).getURLs())
-
 dependencies {
-    // javadoc utilities; compile/test only to prevent redistribution of sdk jars
-    compileOnly(javadocJDKFiles)
-    testImplementation(javadocJDKFiles)
-
     implementation 'org.broadinstitute:barclay:' + barclayVersion
     implementation 'org.apache.lucene:lucene-core:' + luceneVersion
     implementation 'org.apache.lucene:lucene-queryparser:' + luceneVersion
@@ -131,6 +122,13 @@ dependencies {
     implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.12.0'
 
     implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.12.7.1'
+
+    implementation 'org.apache.logging.log4j:log4j-api:' + log4j2Version
+    implementation 'org.apache.logging.log4j:log4j-core:' + log4j2Version
+    // include the apache commons-logging bridge that matches the log4j version we use so
+    // messages that originate with dependencies that use commons-logging (such as jexl)
+    // are routed to log4j
+    implementation 'org.apache.logging.log4j:log4j-jcl:' + log4j2Version
 }
 
 wrapper {
@@ -205,8 +203,8 @@ javadoc {
 
 // Generate Online Doc
 task toolDoc(type: Javadoc, dependsOn: classes ) {
-    final File baseDocDir = new File("build/docs")
-    final File toolDocDir = new File("build/docs/toolDoc")
+    final File baseDocDir = new File("$buildDir/docs")
+    final File toolDocDir = new File("$buildDir/docs/toolDoc")
     doFirst {
         // make sure the output folder exists or we can create it
         if (!toolDocDir.exists() && !toolDocDir.mkdirs()) {
@@ -243,8 +241,7 @@ task toolDoc(type: Javadoc, dependsOn: classes ) {
 
     // The doc process instantiates any documented feature classes, so to run it we need the entire
     // runtime classpath, as well as jdk javadoc files such as tools.jar, where com.sun.javadoc lives.
-    classpath = sourceSets.main.runtimeClasspath + javadocJDKFiles
-
+    classpath = sourceSets.main.runtimeClasspath
     options.docletpath = classpath as List
     options.doclet = "com.github.discvrseq.util.help.DISCVRSeqHelpDoclet"
 

diff --git a/src/main/java/com/github/discvrseq/util/help/DISCVRSeqHelpDoclet.java b/src/main/java/com/github/discvrseq/util/help/DISCVRSeqHelpDoclet.java
@@ -1,71 +1,15 @@
 package com.github.discvrseq.util.help;
 
-import com.sun.javadoc.ClassDoc;
-import com.sun.javadoc.RootDoc;
-import org.broadinstitute.barclay.help.DocWorkUnit;
-import org.broadinstitute.barclay.help.DocumentedFeature;
-import org.broadinstitute.barclay.help.GSONWorkUnit;
-import org.broadinstitute.hellbender.utils.help.GATKDocWorkUnit;
-import org.broadinstitute.hellbender.utils.help.GATKHelpDocWorkUnitHandler;
+import jdk.javadoc.doclet.DocletEnvironment;
 import org.broadinstitute.hellbender.utils.help.GATKHelpDoclet;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 @SuppressWarnings("removal")
 public class DISCVRSeqHelpDoclet extends GATKHelpDoclet {
     public DISCVRSeqHelpDoclet() {
 
     }
 
-    /**
-     * Create a doclet of the appropriate type and generate the FreeMarker templates properties.
-     * @param rootDoc
-     * @throws IOException
-     */
-    public static boolean start(final RootDoc rootDoc) throws IOException {
-        return new DISCVRSeqHelpDoclet().startProcessDocs(rootDoc);
-    }
-
-    /**
-     * @return Create and return a DocWorkUnit-derived object to handle documentation
-     * for the target feature(s) represented by documentedFeature.
-     *
-     * @param documentedFeature DocumentedFeature annotation for the target feature
-     * @param classDoc javadoc classDoc for the target feature
-     * @param clazz class of the target feature
-     * @return DocWorkUnit to be used for this feature
-     */
-    @Override
-    protected DocWorkUnit createWorkUnit(
-            final DocumentedFeature documentedFeature,
-            final ClassDoc classDoc,
-            final Class<?> clazz)
-    {
-        return new GATKDocWorkUnit(
-                new GATKHelpDocWorkUnitHandler(this),
-                documentedFeature,
-                classDoc,
-                clazz);
-    }
-
-    /**
-     * Create a GSONWorkUnit-derived object that holds our custom data. This method should create the object, and
-     * propagate any custom javadoc tags from the template map to the newly created GSON object; specifically
-     * "walkertype", which is pulled from a custom javadoc tag.
-     *
-     * @param workUnit work unit for which a GSON object is required
-     * @param groupMaps
-     * @param featureMaps
-     * @return a GSONWorkUnit-derived object for this work unit, populated with any custom values
-     */
-    @Override
-    protected GSONWorkUnit createGSONWorkUnit(
-            final DocWorkUnit workUnit,
-            final List<Map<String, String>> groupMaps,
-            final List<Map<String, String>> featureMaps)
-    {
-        return super.createGSONWorkUnit(workUnit, groupMaps, featureMaps);
+    public static boolean processDocs(final DocletEnvironment docletEnv) {
+        return new DISCVRSeqHelpDoclet().run(docletEnv);
     }
 }
diff --git a/src/main/java/com/github/discvrseq/walkers/ExtendedFuncotator.java b/src/main/java/com/github/discvrseq/walkers/ExtendedFuncotator.java
@@ -0,0 +1,159 @@
+package com.github.discvrseq.walkers;
+
+import com.github.discvrseq.tools.DiscvrSeqProgramGroup;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hellbender.engine.FeatureContext;
+import org.broadinstitute.hellbender.engine.ReferenceContext;
+import org.broadinstitute.hellbender.tools.funcotator.*;
+import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer;
+import org.broadinstitute.hellbender.utils.Utils;
+
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Create functional annotations on given variants cross-referenced by a given set of data sources.
+ *
+ * <h3>Usage example:</h3>
+ * <pre>
+ *  java -jar DISCVRseq.jar ExtendedFuncotator \
+ *     -V input.vcf.gz \
+ *     -O output.annotated.vcf.gz
+ * </pre>
+ */
+//@CommandLineProgramProperties(
+//        summary = "Create functional annotations on given variants cross-referenced by a given set of data sources.\n" +
+//                "A GATK functional annotation tool (similar functionality to Oncotator).",
+//        oneLineSummary = "Functional Annotator",
+//        programGroup = DiscvrSeqProgramGroup.class
+//)
+//@DocumentedFeature
+public class ExtendedFuncotator extends Funcotator {
+    private static final Logger logger = LogManager.getLogger(ExtendedFuncotator.class);
+
+    @Override
+    public void onTraversalStart() {
+
+        // Get our overrides for annotations:
+        final LinkedHashMap<String, String> annotationDefaultsMap = FuncotatorEngine.splitAnnotationArgsIntoMap(getArguments().annotationDefaults);
+        final LinkedHashMap<String, String> annotationOverridesMap = FuncotatorEngine.splitAnnotationArgsIntoMap(getArguments().annotationOverrides);
+
+        // Get the header for our variants:
+        final VCFHeader vcfHeader = getHeaderForVariants();
+
+        // Create our output renderer:
+        logger.info("Creating a " + getArguments().outputFormatType + " file for output: " + getArguments().outputFile.toURI());
+        outputRenderer = new ExtendedVcfOutputRenderer(
+                this.createVCFWriter(getArguments().outputFile),
+                funcotatorEngine.getFuncotationFactories(),
+                vcfHeader,
+                annotationDefaultsMap,
+                annotationOverridesMap,
+                getDefaultToolVCFHeaderLines(),
+                getArguments().excludedFields,
+                this.getVersion()
+        );
+    }
+
+    protected void enqueueAndHandleVariant(final VariantContext variant, final ReferenceContext referenceContext, final FeatureContext featureContext) {
+
+        final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(variant, referenceContext, featureContext);
+
+        // At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected
+        outputRenderer.write(variant, funcotationMap);
+    }
+
+    private static final class ExtendedVcfOutputRenderer extends VcfOutputRenderer {
+        private final VariantContextWriter vcfWriter;
+
+        public ExtendedVcfOutputRenderer(final VariantContextWriter vcfWriter,
+                                 final List<DataSourceFuncotationFactory> dataSources,
+                                 final VCFHeader existingHeader,
+                                 final LinkedHashMap<String, String> unaccountedForDefaultAnnotations,
+                                 final LinkedHashMap<String, String> unaccountedForOverrideAnnotations,
+                                 final Set<VCFHeaderLine> defaultToolVcfHeaderLines,
+                                 final Set<String> excludedOutputFields,
+                                 final String toolVersion) {
+            super(vcfWriter, dataSources, existingHeader, unaccountedForDefaultAnnotations, unaccountedForOverrideAnnotations, defaultToolVcfHeaderLines, excludedOutputFields, toolVersion);
+            this.vcfWriter = vcfWriter;
+        }
+
+        @Override
+        public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) {
+
+            // Create a new variant context builder:
+            final VariantContextBuilder variantContextOutputBuilder = new VariantContextBuilder(variant);
+
+            final StringBuilder funcotatorAnnotationStringBuilder = new StringBuilder();
+
+            // Get the old VCF Annotation field and append the new information to it:
+            final Object existingAnnotation = variant.getAttribute(FUNCOTATOR_VCF_FIELD_NAME, null);
+            final List<String> existingAlleleAnnotations;
+            if ( existingAnnotation != null) {
+                existingAlleleAnnotations = Utils.split(existingAnnotation.toString(), ',');
+            }
+            else {
+                existingAlleleAnnotations = Collections.emptyList();
+            }
+
+            // Go through each allele and add it to the writer separately:
+            final List<Allele> alternateAlleles = variant.getAlternateAlleles();
+            for ( int alleleIndex = 0; alleleIndex < alternateAlleles.size() ; ++alleleIndex ) {
+
+                final Allele altAllele = alternateAlleles.get(alleleIndex);
+
+                if ( alleleIndex < existingAlleleAnnotations.size() ) {
+                    funcotatorAnnotationStringBuilder.append( existingAlleleAnnotations.get(alleleIndex) );
+                    funcotatorAnnotationStringBuilder.append(FIELD_DELIMITER);
+                }
+
+//                for (final String txId : txToFuncotationMap.getTranscriptList()) {
+//                    funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER);
+//                    final List<Funcotation> funcotations = txToFuncotationMap.get(txId);
+//                    final Funcotation manualAnnotationFuncotation = createManualAnnotationFuncotation(altAllele);
+//
+//                    funcotatorAnnotationStringBuilder.append(
+//                            Stream.concat(funcotations.stream(), Stream.of(manualAnnotationFuncotation))
+//                                    .filter(f -> f.getAltAllele().equals(altAllele))
+//                                    .filter(f -> f.getFieldNames().size() > 0)
+//                                    .filter(f -> !f.getDataSourceName().equals(FuncotatorConstants.DATASOURCE_NAME_FOR_INPUT_VCFS))
+//                                    .map(VcfOutputRenderer::adjustIndelAlleleInformation)
+//                                    .map(f -> FuncotatorUtils.renderSanitizedFuncotationForVcf(f, finalFuncotationFieldNames))
+//                                    .collect(Collectors.joining(FIELD_DELIMITER))
+//                    );
+//
+//                    funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER);
+//                }
+//                // We have a trailing "#" - we need to remove it:
+//                funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);
+//                funcotatorAnnotationStringBuilder.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
+            }
+
+            // We have a trailing "," - we need to remove it:
+            funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1);
+
+            // Add our new annotation:
+            variantContextOutputBuilder.attribute(FUNCOTATOR_VCF_FIELD_NAME, funcotatorAnnotationStringBuilder.toString());
+
+            // Add the genotypes from the variant:
+            variantContextOutputBuilder.genotypes( variant.getGenotypes() );
+
+            // Render and add our VCF line:
+            vcfWriter.add( variantContextOutputBuilder.make() );
+        }
+    }
+}
diff --git a/src/main/java/com/github/discvrseq/walkers/annotator/DiscvrVariantAnnotator.java b/src/main/java/com/github/discvrseq/walkers/annotator/DiscvrVariantAnnotator.java
@@ -85,6 +85,8 @@ public Collection<Annotation> makeVariantAnnotations() {
 
     public static class DiscvrAnnotationPluginDescriptor extends GATKAnnotationPluginDescriptor
     {
+        private static final long serialVersionUID = 1L;
+
         @ArgumentCollection
         public GenotypeConcordanceArgumentCollection genotypeConcordanceArgumentCollection = new GenotypeConcordanceArgumentCollection();