fnothaft · fnothaft · Sep 3, 2016 · Sep 1, 2016
diff --git a/pom.xml b/pom.xml
@@ -44,16 +44,17 @@
   </issueManagement>
 
   <properties>
-    <adam.version>0.16.1-SNAPSHOT</adam.version>
-    <avro.version>1.7.6</avro.version>
-    <hadoop.version>2.2.0</hadoop.version>
-    <java.version>1.7</java.version>
-    <parquet.version>1.6.0rc4</parquet.version>
-    <scala.version>2.10.3</scala.version>
-    <scala.version.prefix>2.10</scala.version.prefix>
-    <spark.version>1.2.0</spark.version>
+    <adam.version>0.19.1-SNAPSHOT</adam.version>
+    <avro.version>1.8.0</avro.version>
     <!-- Edit the following line to configure the Hadoop (HDFS) version. -->
-    <utils.version>0.1.3-SNAPSHOT</utils.version>
+    <hadoop.version>2.6.0</hadoop.version>
+    <java.version>1.8</java.version>
+    <parquet.version>1.8.1</parquet.version>
+    <scala.version>2.10</scala.version>
+    <scala.version.prefix>2.10</scala.version.prefix>
+    <spark.version>1.6.1</spark.version>
+    <bdg-utils.version>0.2.7</bdg-utils.version>
+    <bdg-formats.version>0.9.1-SNAPSHOT</bdg-formats.version>
   </properties>
 
   <build>
@@ -82,7 +83,7 @@
       <plugin>
         <groupId>net.alchim31.maven</groupId>
         <artifactId>scala-maven-plugin</artifactId>
-        <version>3.1.5</version>
+        <version>3.2.2</version>
         <executions>
           <execution>
             <id>scala-compile-first</id>
@@ -107,8 +108,7 @@
           </execution>
         </executions>
         <configuration>
-          <scalaVersion>${scala.version}</scalaVersion>
-          <recompileMode>incremental</recompileMode>
+          <scalaVersion>2.10.4</scalaVersion>
           <useZincServer>true</useZincServer>
           <args>
             <arg>-unchecked</arg>
@@ -131,7 +131,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-gpg-plugin</artifactId>
-        <version>1.5</version>
+        <version>1.6</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -145,7 +145,7 @@
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
-        <version>1.0-RC2</version>
+        <version>1.0</version>
         <configuration>
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
@@ -197,7 +197,7 @@
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-source-plugin</artifactId>
-            <version>2.2.1</version>
+            <version>2.4</version>
             <executions>
               <execution>
                 <id>attach-sources</id>
@@ -210,7 +210,7 @@
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-javadoc-plugin</artifactId>
-            <version>2.9.1</version>
+            <version>2.10.3</version>
             <executions>
               <execution>
                 <id>attach-javadocs</id>
@@ -223,7 +223,7 @@
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-gpg-plugin</artifactId>
-            <version>1.5</version>
+            <version>1.6</version>
             <executions>
               <execution>
                 <id>sign-artifacts</id>
@@ -241,53 +241,71 @@
 
   <dependencies>
     <dependency>
-      <groupId>com.twitter</groupId>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-avro</artifactId>
       <version>${parquet.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.scala-lang</groupId>
+          <artifactId>scala-library</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-misc</artifactId>
-      <version>${utils.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <groupId>org.bdgenomics.bdg-formats</groupId>
+      <artifactId>bdg-formats</artifactId>
+      <version>${bdg-formats.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-misc</artifactId>
-      <version>${utils.version}</version>
+      <groupId>org.bdgenomics.utils</groupId>
+      <artifactId>utils-misc_2.10</artifactId>
+      <version>${bdg-utils.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-parquet</artifactId>
-      <version>${utils.version}</version>
+      <groupId>org.bdgenomics.utils</groupId>
+      <artifactId>utils-misc_2.10</artifactId>
+      <version>${bdg-utils.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.version.prefix}</artifactId>
-      <version>2.2.2</version>
+      <version>2.2.6</version>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>args4j</groupId>
       <artifactId>args4j</artifactId>
-      <version>2.0.23</version>
-    </dependency>
-    <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-cli</artifactId>
-      <version>${utils.version}</version>
+      <version>2.0.31</version>
     </dependency>
+    <!-- We exclude this from adam, so we need to manually include it -->
     <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-parquet</artifactId>
-      <version>${utils.version}</version>
+      <groupId>org.bdgenomics.utils</groupId>
+      <artifactId>utils-metrics_2.10</artifactId>
+      <version>${bdg-utils.version}</version>
     </dependency>
-    <!-- We exclude this from adam, so we need to manually include it -->
     <dependency>
-      <groupId>org.bdgenomics.bdg-utils</groupId>
-      <artifactId>bdg-utils-metrics</artifactId>
-      <version>${utils.version}</version>
+      <groupId>org.bdgenomics.utils</groupId>
+      <artifactId>utils-cli_2.10</artifactId>
+      <version>${bdg-utils.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
@@ -323,7 +341,7 @@
     </dependency>
     <dependency>
       <groupId>org.bdgenomics.adam</groupId>
-      <artifactId>adam-core</artifactId>
+      <artifactId>adam-core_${scala.version.prefix}</artifactId>
       <version>${adam.version}</version>
       <exclusions>
         <exclusion>
@@ -334,7 +352,7 @@
     </dependency>
     <dependency>
       <groupId>org.bdgenomics.adam</groupId>
-      <artifactId>adam-core</artifactId>
+      <artifactId>adam-core_${scala.version.prefix}</artifactId>
       <version>${adam.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
@@ -350,6 +368,16 @@
       <artifactId>spark-core_${scala.version.prefix}</artifactId>
       <version>${spark.version}</version>
       <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>
 </project>
diff --git a/src/main/resources/avro/bdg.avdl b/src/main/resources/avro/bdg.avdl
@@ -19,44 +19,6 @@
 @namespace("org.bdgenomics.formats.avro")
 protocol BDG {
 
-/**
- Record for describing a reference assembly. Not used for storing the contents
- of said assembly.
-
- @see NucleotideContigFragment
- */
-record Contig {
-  /**
-   The name of this contig in the assembly (e.g., "chr1").
-   */
-  union { null, string } contigName = null;
-
-  /**
-   The length of this contig.
-   */
-  union { null, long }   contigLength = null;
-
-  /**
-   The MD5 checksum of the assembly for this contig.
-   */
-  union { null, string } contigMD5 = null;
-
-  /**
-   The URL at which this reference assembly can be found.
-   */
-  union { null, string } referenceURL = null;
-
-  /**
-   The name of this assembly (e.g., "hg19").
-   */
-  union { null, string } assembly = null;
-
-  /**
-   The species that this assembly is for.
-   */
-  union { null, string } species = null;
-}
-
 /**
  Descriptors for the type of a structural variant. The most specific descriptor
  should be used, if possible. E.g., duplication should be used instead of
@@ -100,47 +62,78 @@ record StructuralVariant {
   union { null, int } endWindow = null;
 }
 
+/**
+ Variant.
+ */
 record Variant {
+
   /**
-   The reference contig that this variant exists on.
+   The reference contig this variant exists on. VCF column 1 "CONTIG".
    */
-  union { null, Contig } contig = null;
+  union { null, string } contigName = null;
+
   /**
-   The 0-based start position of this variant on the reference contig.
+   The zero-based start position of this variant on the reference contig.
+   VCF column 2 "POS" converted to zero-based coordinate system, closed-open intervals.
    */
   union { null, long } start = null;
+
   /**
-   The 0-based, exclusive end position of this variant on the reference contig.
+   The zero-based, exclusive end position of this variant on the reference contig.
+   Calculated by start + referenceAllele.length().
    */
   union { null, long } end = null;
 
   /**
-   A string describing the reference allele at this site.
+   Zero or more unique names or identifiers for this variant. If this is a dbSNP
+   variant it is encouraged to use the rs number(s). VCF column 3 "ID" copied for
+   multi-allelic sites.
    */
-  union { null, string } referenceAllele = null;
+  array<string> names = [];
+
   /**
-   A string describing the variant allele at this site. Should be left null if
-   the site is a structural variant.
+   A string describing the reference allele at this site. VCF column 4 "REF".
    */
-  union { null, string } alternateAllele = null;
+  union { null, string } referenceAllele = null;
+
   /**
-   The structural variant at this site, if the alternate allele is a structural
-   variant. If the site is not a structural variant, this field should be left
-   null.
+   A string describing the alternate allele at this site. VCF column 5 "ALT" split
+   for multi-allelic sites.
    */
-  union { null, StructuralVariant } svAllele = null;
+  union { null, string } alternateAllele = null;
 
   /**
-   A boolean describing whether this variant call is somatic; in this case, the
-   `referenceAllele` will have been observed in another sample.
+   True if this variant call is somatic; in this case, the referenceAllele will
+   have been observed in another sample. VCF INFO reserved key "SOMATIC" copied
+   for multi-allelic sites.
    */
-  union { boolean, null } isSomatic = false;
+  union { boolean, null } somatic = false;
 }
 
+/**
+ Strand of an alignment or feature.
+ */
 enum Strand {
-  Forward,
-  Reverse,
-  Independent
+
+  /**
+   Forward ("+") strand.
+   */
+  FORWARD,
+
+  /**
+   Reverse ("-") strand.
+   */
+  REVERSE,
+
+  /**
+   Independent or not stranded (".").
+   */
+  INDEPENDENT,
+
+  /**
+   Strandedness is relevant, but unknown ("?").
+   */
+  UNKNOWN
 }
 
 }
diff --git a/src/main/resources/avro/fig.avdl b/src/main/resources/avro/fig.avdl
@@ -24,7 +24,7 @@ record BindingSite {
   string tf;
 
   // position
-  org.bdgenomics.formats.avro.Contig contig;
+  string contigName;
   long start;
   long end;
   int shift = 0;
@@ -42,7 +42,7 @@ record ModifiedBindingSite {
   string tf;
 
   // position
-  org.bdgenomics.formats.avro.Contig contig;
+  string contigName;
   long start;
   long end;
   int shift;