forked from bigdatagenomics/adam
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Resolves bigdatagenomics#882. Adds an R API that binds around the ADAM GenomicRDD APIs. Supports Spark 2.x and onwards, as accessible SparkR functionality for binding to Java libraries was added in Spark 2.0.0.
- Loading branch information
Showing
20 changed files
with
1,539 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Package: bdg.adam | ||
Type: Package | ||
Version: 0.23.0 | ||
Title: R Frontend for Big Data Genomics/ADAM | ||
Description: The SparkR package provides an R Frontend for Apache Spark. | ||
Author: Big Data Genomics | ||
Maintainer: Frank Austin Nothaft <fnothaft@alumni.stanford.edu> | ||
Authors@R: c(person("Frank", "Nothaft", role = c("aut", "cre"), | ||
email = "fnothaft@alumni.stanford.edu"), | ||
person(family = "Big Data Genomics", role = c("aut", "cph"))) | ||
License: Apache License (== 2.0) | ||
URL: http://www.bdgenomics.org https://github.com/bigdatagenomics/adam | ||
BugReports: https://github.com/bigdatagenomics/adam/issues | ||
Imports: | ||
methods | ||
Depends: | ||
R (>= 3.0), | ||
SparkR (>= 2.1.0) | ||
Suggests: | ||
testthat | ||
Collate: | ||
'generics.R' | ||
'adam-context.R' | ||
'rdd.R' | ||
RoxygenNote: 6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(ADAMContext) | ||
exportClasses(ADAMContext) | ||
exportClasses(AlignmentRecordRDD) | ||
exportClasses(CoverageRDD) | ||
exportClasses(FeatureRDD) | ||
exportClasses(FragmentRDD) | ||
exportClasses(GenotypeRDD) | ||
exportClasses(NucleotideContigFragmentRDD) | ||
exportClasses(VariantRDD) | ||
exportMethods(aggregatedCoverage) | ||
exportMethods(countKmers) | ||
exportMethods(coverage) | ||
exportMethods(flankAdjacentFragments) | ||
exportMethods(flatten) | ||
exportMethods(loadAlignments) | ||
exportMethods(loadContigFragments) | ||
exportMethods(loadFeatures) | ||
exportMethods(loadFragments) | ||
exportMethods(loadGenotypes) | ||
exportMethods(loadVariants) | ||
exportMethods(markDuplicates) | ||
exportMethods(realignIndels) | ||
exportMethods(recalibrateBaseQualities) | ||
exportMethods(save) | ||
exportMethods(sortReadsByReferencePosition) | ||
exportMethods(sortReadsByReferencePositionAndIndex) | ||
exportMethods(toCoverage) | ||
exportMethods(toDF) | ||
exportMethods(toFeatureRDD) | ||
exportMethods(toFragments) | ||
exportMethods(toReads) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# | ||
# Licensed to Big Data Genomics (BDG) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The BDG licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
library(SparkR) | ||
|
||
setOldClass("jobj") | ||
|
||
#' @title Class that represents an ADAMContext. | ||
#' @description The ADAMContext provides helper methods for loading in genomic | ||
#' data into a Spark RDD/Dataframe. | ||
#' @slot jac Java object reference to the backing JavaADAMContext. | ||
#' @export | ||
setClass("ADAMContext", | ||
slots = list(jac = "jobj")) | ||
|
||
#' @export | ||
ADAMContext <- function(ss) { | ||
ssc = sparkR.callJMethod(ss, "sparkContext") | ||
ac = sparkR.newJObject("org.bdgenomics.adam.rdd.ADAMContext", ssc) | ||
jac = sparkR.newJObject("org.bdgenomics.adam.api.java.JavaADAMContext", ac) | ||
|
||
new("ADAMContext", jac = jac) | ||
} | ||
|
||
#' Loads in an ADAM read file. This method can load SAM, BAM, and ADAM files. | ||
#' | ||
#' Loads path names ending in: | ||
#' * .bam/.cram/.sam as BAM/CRAM/SAM format, | ||
#' * .fa/.fasta as FASTA format, | ||
#' * .fq/.fastq as FASTQ format, and | ||
#' * .ifq as interleaved FASTQ format. | ||
#' | ||
#' If none of these match, fall back to Parquet + Avro. | ||
#' | ||
#' For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported | ||
#' through compression codecs configured in Hadoop, which by default include .gz and .bz2, | ||
#' but can include more. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing reads. | ||
#' | ||
#' @export | ||
setMethod("loadAlignments", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadAlignments", filePath) | ||
AlignmentRecordRDD(jrdd) | ||
}) | ||
|
||
#' Loads in sequence fragments. | ||
#' | ||
#' Can load from FASTA or from Parquet encoded NucleotideContigFragments. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing sequence fragments. | ||
#' | ||
#' @export | ||
setMethod("loadContigFragments", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadContigFragments", filePath) | ||
NucleotideContigFragmentRDD(jrdd) | ||
}) | ||
|
||
#' Loads in read pairs as fragments. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing sequence fragments. | ||
#' | ||
#' @export | ||
setMethod("loadFragments", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadFragments", filePath) | ||
FragmentRDD(jrdd) | ||
}) | ||
|
||
#' Loads in genomic features. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing features. | ||
#' | ||
#' @export | ||
setMethod("loadFeatures", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadFeatures", filePath) | ||
FeatureRDD(jrdd) | ||
}) | ||
|
||
#' Loads in genomic features as coverage counts. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing coverage. | ||
#' | ||
#' @export | ||
setMethod("loadCoverage", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadCoverage", filePath) | ||
CoverageRDD(jrdd) | ||
}) | ||
|
||
#' Loads in genotypes. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing genotypes. | ||
#' | ||
#' @export | ||
setMethod("loadGenotypes", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadGenotypes", filePath) | ||
GenotypeRDD(jrdd) | ||
}) | ||
|
||
#' Loads in variants. | ||
#' | ||
#' @param ac The ADAMContext. | ||
#' @param filePath The path to load the file from. | ||
#' @return Returns an RDD containing variants. | ||
#' | ||
#' @export | ||
setMethod("loadVariants", | ||
signature(ac = "ADAMContext", filePath = "character"), | ||
function(ac, filePath) { | ||
jrdd <- sparkR.callJMethod(ac@jac, "loadVariants", filePath) | ||
VariantRDD(jrdd) | ||
}) |
Oops, something went wrong.