Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated versions and avro formats to work with most ADAM 0.19.1-SNAPS… #5

Merged
merged 1 commit into from
Sep 3, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 73 additions & 45 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,17 @@
</issueManagement>

<properties>
<adam.version>0.16.1-SNAPSHOT</adam.version>
<avro.version>1.7.6</avro.version>
<hadoop.version>2.2.0</hadoop.version>
<java.version>1.7</java.version>
<parquet.version>1.6.0rc4</parquet.version>
<scala.version>2.10.3</scala.version>
<scala.version.prefix>2.10</scala.version.prefix>
<spark.version>1.2.0</spark.version>
<adam.version>0.19.1-SNAPSHOT</adam.version>
<avro.version>1.8.0</avro.version>
<!-- Edit the following line to configure the Hadoop (HDFS) version. -->
<utils.version>0.1.3-SNAPSHOT</utils.version>
<hadoop.version>2.6.0</hadoop.version>
<java.version>1.8</java.version>
<parquet.version>1.8.1</parquet.version>
<scala.version>2.10</scala.version>
<scala.version.prefix>2.10</scala.version.prefix>
<spark.version>1.6.1</spark.version>
<bdg-utils.version>0.2.7</bdg-utils.version>
<bdg-formats.version>0.9.1-SNAPSHOT</bdg-formats.version>
</properties>

<build>
Expand Down Expand Up @@ -82,7 +83,7 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.5</version>
<version>3.2.2</version>
<executions>
<execution>
<id>scala-compile-first</id>
Expand All @@ -107,8 +108,7 @@
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<recompileMode>incremental</recompileMode>
<scalaVersion>2.10.4</scalaVersion>
<useZincServer>true</useZincServer>
<args>
<arg>-unchecked</arg>
Expand All @@ -131,7 +131,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<version>1.6</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand All @@ -145,7 +145,7 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0-RC2</version>
<version>1.0</version>
<configuration>
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
Expand Down Expand Up @@ -197,7 +197,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<version>2.4</version>
<executions>
<execution>
<id>attach-sources</id>
Expand All @@ -210,7 +210,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<version>2.10.3</version>
<executions>
<execution>
<id>attach-javadocs</id>
Expand All @@ -223,7 +223,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<version>1.6</version>
<executions>
<execution>
<id>sign-artifacts</id>
Expand All @@ -241,53 +241,71 @@

<dependencies>
<dependency>
<groupId>com.twitter</groupId>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>${parquet.version}</version>
<exclusions>
<exclusion>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-misc</artifactId>
<version>${utils.version}</version>
<type>test-jar</type>
<scope>test</scope>
<groupId>org.bdgenomics.bdg-formats</groupId>
<artifactId>bdg-formats</artifactId>
<version>${bdg-formats.version}</version>
</dependency>
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-misc</artifactId>
<version>${utils.version}</version>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-misc_2.10</artifactId>
<version>${bdg-utils.version}</version>
<type>test-jar</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-parquet</artifactId>
<version>${utils.version}</version>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-misc_2.10</artifactId>
<version>${bdg-utils.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.version.prefix}</artifactId>
<version>2.2.2</version>
<version>2.2.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>args4j</groupId>
<artifactId>args4j</artifactId>
<version>2.0.23</version>
</dependency>
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-cli</artifactId>
<version>${utils.version}</version>
<version>2.0.31</version>
</dependency>
<!-- We exclude this from adam, so we need to manually include it -->
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-parquet</artifactId>
<version>${utils.version}</version>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-metrics_2.10</artifactId>
<version>${bdg-utils.version}</version>
</dependency>
<!-- We exclude this from adam, so we need to manually include it -->
<dependency>
<groupId>org.bdgenomics.bdg-utils</groupId>
<artifactId>bdg-utils-metrics</artifactId>
<version>${utils.version}</version>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-cli_2.10</artifactId>
<version>${bdg-utils.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down Expand Up @@ -323,7 +341,7 @@
</dependency>
<dependency>
<groupId>org.bdgenomics.adam</groupId>
<artifactId>adam-core</artifactId>
<artifactId>adam-core_${scala.version.prefix}</artifactId>
<version>${adam.version}</version>
<exclusions>
<exclusion>
Expand All @@ -334,7 +352,7 @@
</dependency>
<dependency>
<groupId>org.bdgenomics.adam</groupId>
<artifactId>adam-core</artifactId>
<artifactId>adam-core_${scala.version.prefix}</artifactId>
<version>${adam.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand All @@ -350,6 +368,16 @@
<artifactId>spark-core_${scala.version.prefix}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
107 changes: 50 additions & 57 deletions src/main/resources/avro/bdg.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,6 @@
@namespace("org.bdgenomics.formats.avro")
protocol BDG {

/**
Record for describing a reference assembly. Not used for storing the contents
of said assembly.

@see NucleotideContigFragment
*/
record Contig {
/**
The name of this contig in the assembly (e.g., "chr1").
*/
union { null, string } contigName = null;

/**
The length of this contig.
*/
union { null, long } contigLength = null;

/**
The MD5 checksum of the assembly for this contig.
*/
union { null, string } contigMD5 = null;

/**
The URL at which this reference assembly can be found.
*/
union { null, string } referenceURL = null;

/**
The name of this assembly (e.g., "hg19").
*/
union { null, string } assembly = null;

/**
The species that this assembly is for.
*/
union { null, string } species = null;
}

/**
Descriptors for the type of a structural variant. The most specific descriptor
should be used, if possible. E.g., duplication should be used instead of
Expand Down Expand Up @@ -100,47 +62,78 @@ record StructuralVariant {
union { null, int } endWindow = null;
}

/**
Variant.
*/
record Variant {

/**
The reference contig that this variant exists on.
The reference contig this variant exists on. VCF column 1 "CONTIG".
*/
union { null, Contig } contig = null;
union { null, string } contigName = null;

/**
The 0-based start position of this variant on the reference contig.
The zero-based start position of this variant on the reference contig.
VCF column 2 "POS" converted to zero-based coordinate system, closed-open intervals.
*/
union { null, long } start = null;

/**
The 0-based, exclusive end position of this variant on the reference contig.
The zero-based, exclusive end position of this variant on the reference contig.
Calculated by start + referenceAllele.length().
*/
union { null, long } end = null;

/**
A string describing the reference allele at this site.
Zero or more unique names or identifiers for this variant. If this is a dbSNP
variant it is encouraged to use the rs number(s). VCF column 3 "ID" copied for
multi-allelic sites.
*/
union { null, string } referenceAllele = null;
array<string> names = [];

/**
A string describing the variant allele at this site. Should be left null if
the site is a structural variant.
A string describing the reference allele at this site. VCF column 4 "REF".
*/
union { null, string } alternateAllele = null;
union { null, string } referenceAllele = null;

/**
The structural variant at this site, if the alternate allele is a structural
variant. If the site is not a structural variant, this field should be left
null.
A string describing the alternate allele at this site. VCF column 5 "ALT" split
for multi-allelic sites.
*/
union { null, StructuralVariant } svAllele = null;
union { null, string } alternateAllele = null;

/**
A boolean describing whether this variant call is somatic; in this case, the
`referenceAllele` will have been observed in another sample.
True if this variant call is somatic; in this case, the referenceAllele will
have been observed in another sample. VCF INFO reserved key "SOMATIC" copied
for multi-allelic sites.
*/
union { boolean, null } isSomatic = false;
union { boolean, null } somatic = false;
}

/**
Strand of an alignment or feature.
*/
enum Strand {
Forward,
Reverse,
Independent

/**
Forward ("+") strand.
*/
FORWARD,

/**
Reverse ("-") strand.
*/
REVERSE,

/**
Independent or not stranded (".").
*/
INDEPENDENT,

/**
Strandedness is relevant, but unknown ("?").
*/
UNKNOWN
}

}
4 changes: 2 additions & 2 deletions src/main/resources/avro/fig.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ record BindingSite {
string tf;

// position
org.bdgenomics.formats.avro.Contig contig;
string contigName;
long start;
long end;
int shift = 0;
Expand All @@ -42,7 +42,7 @@ record ModifiedBindingSite {
string tf;

// position
org.bdgenomics.formats.avro.Contig contig;
string contigName;
long start;
long end;
int shift;
Expand Down
Loading