Skip to content

Commit

Permalink
Merge branch 'develop'. Version update to 1.0.1. Close #28
Browse files Browse the repository at this point in the history
  • Loading branch information
mikessh committed Jun 30, 2015
2 parents 588316e + e83c2e5 commit 89a9568
Show file tree
Hide file tree
Showing 32 changed files with 1,080 additions and 27 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

A comprehensive framework for post-analysis of immune repertoire sequencing data.
Compiled binaries are available from [here](https://github.com/mikessh/vdjtools/releases/latest).
The software is cross-platform and requires Java v1.7+ to run and R to generate publication-ready graphics.
The software is cross-platform and requires Java v1.8 (it can still be compiled from source under
JRE 1.7 though) to run and R to generate publication-ready graphics.

List of features and detailed documentation can be found at [ReadTheDocs](http://vdjtools-doc.readthedocs.org/en/latest/).

Expand Down
5 changes: 2 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

<groupId>com.antigenomics</groupId>
<artifactId>vdjtools</artifactId>
<version>1.0.0</version>
<version>1.0.1</version>
<packaging>jar</packaging>

<name>vdjtools</name>
Expand All @@ -35,9 +35,8 @@
<resource>
<directory>src/main/resources</directory>
<includes>
<include>segments/*</include>
<include>rscripts/*</include>
<include>pwm/*</include>
<include>profile/*</include>
</includes>
</resource>
</resources>
Expand Down
6 changes: 5 additions & 1 deletion src/main/groovy/com/antigenomics/vdjtools/VdjTools.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import com.antigenomics.vdjtools.operate.JoinSamples
import com.antigenomics.vdjtools.operate.PoolSamples
import com.antigenomics.vdjtools.overlap.*
import com.antigenomics.vdjtools.preprocess.*
import com.antigenomics.vdjtools.profile.CalcCdrAAProfile
import com.antigenomics.vdjtools.util.Convert
import com.antigenomics.vdjtools.util.ExecUtil
import com.antigenomics.vdjtools.util.RInstall
Expand Down Expand Up @@ -75,6 +76,7 @@ def printHelp = {
println ""
println "[Annotation]"
println "ScanDatabase"
println "CalcCdrAAProfile"
println ""
println "[Util]"
println "Convert"
Expand Down Expand Up @@ -134,6 +136,8 @@ def getScript = { String scriptName ->

case "SCANDATABASE":
return new ScanDatabase()
case "CALCCDRAAPROFILE":
return new CalcCdrAAProfile()

case "CONVERT":
return new Convert()
Expand Down Expand Up @@ -164,7 +168,7 @@ else {
try {
ExecUtil.run(script, args.length > 1 ? args[1..-1] : [""])
} catch (Exception e) {
println "[ERROR] $e.message, see _vdjtools_error.log for details"
println "[ERROR] ${e.toString()}, see _vdjtools_error.log for details"
new File("_vdjtools_error.log").withWriterAppend { writer ->
writer.println("[${new Date()} BEGIN]")
writer.println("[Script]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public class MiXcrParser extends ClonotypeStreamParser {
boolean initialized = false;
int countColumn, freqColumn, cdr3ntColumn, cdr3aaColumn,
vHitsColumn, dHitsColumn, jHitsColumn,
vAlignmentsColumn, dAlignmentsColumn, jAlignmentsColumn
vAlignmentsColumn, dAlignmentsColumn, jAlignmentsColumn,
numberOfColumns

/**
* {@inheritDoc}
Expand Down Expand Up @@ -67,6 +68,8 @@ public class MiXcrParser extends ClonotypeStreamParser {
vAlignmentsColumn == -1 || dAlignmentsColumn == -1 || jAlignmentsColumn == -1)
throw new RuntimeException("Some mandatory columns are absent in the input file.");

numberOfColumns = splitHeaderLine.size()

// Initialized
initialized = true;
}
Expand All @@ -78,7 +81,7 @@ public class MiXcrParser extends ClonotypeStreamParser {
protected Clonotype innerParse(String clonotypeString) {
ensureInitialized()

def splitString = clonotypeString.split(software.delimiter)
def splitString = clonotypeString.split(software.delimiter, numberOfColumns)

def count = splitString[countColumn].toInteger()
def freq = splitString[freqColumn].toDouble()
Expand All @@ -87,18 +90,21 @@ public class MiXcrParser extends ClonotypeStreamParser {

def cdr3aa = splitString[cdr3aaColumn] // no need to unify, MiXCR is based on milib


String v, d, j
(v, d, j) = extractVDJ(splitString[[vHitsColumn, dHitsColumn, jHitsColumn]])

List<Alignment> vAlignemtns = parseAlignments(splitString[vAlignmentsColumn])
List<Alignment> dAlignemtns = parseAlignments(splitString[dAlignmentsColumn])
List<Alignment> jAlignemtns = parseAlignments(splitString[jAlignmentsColumn])

def segmPoints = [vAlignemtns[0].seq2End - 1,
dAlignemtns.size() > 0 ? dAlignemtns[0].seq2Begin : -1,
dAlignemtns.size() > 0 ? dAlignemtns[0].seq2End - 1 : -1,
jAlignemtns[0].seq2Begin] as int[]
def segmPoints = [vAlignemtns.size() > 0 && vAlignemtns[0] != null ?
vAlignemtns[0].seq2End - 1 : 0,
dAlignemtns.size() > 0 && dAlignemtns[0] != null ?
dAlignemtns[0].seq2Begin : -1,
dAlignemtns.size() > 0 && dAlignemtns[0] != null ?
dAlignemtns[0].seq2End - 1 : -1,
jAlignemtns.size() > 0 && jAlignemtns[0] != null ?
jAlignemtns[0].seq2Begin : cdr3nt.size() - 1] as int[]

boolean inFrame = inFrame(cdr3aa),
noStop = noStop(cdr3aa),
Expand All @@ -113,12 +119,16 @@ public class MiXcrParser extends ClonotypeStreamParser {
if (alignmentsLine.isEmpty())
return Collections.EMPTY_LIST

String[] splitByAlignments = alignmentsLine.split(",")
String[] splitByAlignments = alignmentsLine.split(";", -1)
List<Alignment> ret = new ArrayList<>()
for (String alignmentString : splitByAlignments) {
String[] splitByFields = alignmentString.split("\\|")
ret.add(new Alignment(splitByFields[0].toInteger(), splitByFields[1].toInteger(),
splitByFields[3].toInteger(), splitByFields[4].toInteger()));
if (alignmentString.trim().empty) {
ret.add(null)
} else {
String[] splitByFields = alignmentString.split("\\|")
ret.add(new Alignment(splitByFields[0].toInteger(), splitByFields[1].toInteger(),
splitByFields[3].toInteger(), splitByFields[4].toInteger()));
}
}
return ret;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2013-2015 Mikhail Shugay (mikhail.shugay@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.antigenomics.vdjtools.profile

import com.antigenomics.vdjtools.util.CommonUtil

class BasicAminoAcidProperties {
static final BasicAminoAcidProperties INSTANCE = new BasicAminoAcidProperties()

private final AminoAcidProperty[] aminoAcidProperties

private BasicAminoAcidProperties() {
aminoAcidProperties = AminoAcidProperty.fromInput(CommonUtil.resourceStream("profile/aa_property_table.txt"))
}

List<String> getPropertyNames() {
aminoAcidProperties.collect { it.name }
}

AminoAcidProperty[] getProperties(List<String> propertyNames) {
if (propertyNames.isEmpty())
return getProperties()

aminoAcidProperties.findAll { propertyNames.contains(it.name) } as AminoAcidProperty[]
}

AminoAcidProperty[] getProperties() {
Arrays.copyOf(aminoAcidProperties, aminoAcidProperties.length)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright 2013-2015 Mikhail Shugay (mikhail.shugay@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.antigenomics.vdjtools.profile

import com.antigenomics.vdjtools.sample.Sample
import com.antigenomics.vdjtools.sample.SampleCollection
import com.antigenomics.vdjtools.sample.metadata.MetadataTable
import com.antigenomics.vdjtools.util.RUtil

import static com.antigenomics.vdjtools.util.ExecUtil.formOutputPath
import static com.antigenomics.vdjtools.util.ExecUtil.toPlotPath


def DEFAULT_AA_PROPERTIES = BasicAminoAcidProperties.INSTANCE.propertyNames.join(","),
DEFAULT_BINNING = "V-germ:5,VJ-junc:3,J-germ:5"

def cli = new CliBuilder(usage: "CalcCdrAAProfile [options] " +
"[sample1 sample2 sample3 ... if -m is not specified] output_prefix")
cli.h("display help message")
cli.m(longOpt: "metadata", argName: "filename", args: 1,
"Metadata file. First and second columns should contain file name and sample id. " +
"Header is mandatory and will be used to assign column names for metadata.")
cli.u(longOpt: "unweighted", "Will count each clonotype only once, apart from conventional frequency-weighted histogram.")
cli.o(longOpt: "property-list", argName: "group1,...", args: 1,
"Comma-separated list of amino-acid properties to analyze. " +
"Allowed values: $DEFAULT_AA_PROPERTIES. " +
"[default = use all]")
cli.r(longOpt: "region-list", argName: "segment1:nbins1,...", args: 1,
"List of segments to analyze and corresponding bin counts. " +
"Allowed segments: ${KnownCdr3Regions.INSTANCE.regionNames.join(",")}. " +
"[default = $DEFAULT_BINNING]")
cli.p(longOpt: "plot", "Plot amino acid property distributions for a specified list of segments.")
cli.f(longOpt: "factor", argName: "string", args: 1, "Metadata entry used to group samples in plot.")
cli._(longOpt: "plot-type", argName: "<pdf|png>", args: 1, "Plot output format [default=pdf]")
cli._(longOpt: "include-cfw", "Consider first and last AAs of CDR3, which are normally conserved C and F/W")


def opt = cli.parse(args)

if (opt == null)
System.exit(-1)

if (opt.h || opt.arguments().size() == 0) {
cli.usage()
System.exit(-1)
}

// Check if metadata is provided

def metadataFileName = opt.m

if (metadataFileName ? opt.arguments().size() != 1 : opt.arguments().size() < 2) {
if (metadataFileName)
println "Only output prefix should be provided in case of -m"
else
println "At least 1 sample files should be provided if not using -m"
cli.usage()
System.exit(-1)
}

// Remaining arguments

def outputFilePrefix = opt.arguments()[-1],
unweighted = (boolean) opt.u,
binning = (opt.r ?: DEFAULT_BINNING).split(",").collectEntries {
def split2 = it.split(":")
[(KnownCdr3Regions.INSTANCE.getByName(split2[0])): split2[1].toInteger()]
},
properties = (opt.o ?: DEFAULT_AA_PROPERTIES).split(","),
plot = (boolean) opt.p,
plotType = (opt.'plot-type' ?: "pdf").toString(),
includeCFW = (boolean) opt.'include-cfw'

def badProperties = properties.findAll { !BasicAminoAcidProperties.INSTANCE.propertyNames.contains(it) }

if (badProperties.size() > 0) {
println "[ERROR] Unknown amino acid properties: ${badProperties.join(",")}. " +
"Allowed values are $DEFAULT_AA_PROPERTIES"
System.exit(-1)
}

def scriptName = getClass().canonicalName.split("\\.")[-1]

//
// Batch load all samples (lazy)
//

println "[${new Date()} $scriptName] Reading sample(s)"

def sampleCollection = metadataFileName ?
new SampleCollection((String) metadataFileName) :
new SampleCollection(opt.arguments()[0..-2])

println "[${new Date()} $scriptName] ${sampleCollection.size()} sample(s) prepared"

//
// Compute and output diversity measures, spectratype, etc
//

def profileBuilder = new Cdr3AAProfileBuilder(binning, !unweighted, !includeCFW, properties)

def outputFileName = formOutputPath(outputFilePrefix, "cdr3aa", "profile", (unweighted ? "unwt" : "wt"))

new File(outputFileName).withPrintWriter { pw ->
def header = "#$MetadataTable.SAMPLE_ID_COLUMN\t" +
sampleCollection.metadataTable.columnHeader + "\t" +
"cdr3.segment\tbin\tproperty\tvalue\ttotal"

pw.println(header)

def sampleCounter = 0

sampleCollection.each { Sample sample ->
def profiles = profileBuilder.create(sample)

println "[${new Date()} $scriptName] ${++sampleCounter} sample(s) processed"

profiles.each { profileEntry ->
def segmentName = profileEntry.key.name
profileEntry.value.bins.each { bin ->
bin.summary.each {
pw.println([sample.sampleMetadata.sampleId, sample.sampleMetadata,
segmentName, bin.index, it.key,
it.value, bin.total].join("\t"))
}
}
}
}
}
if (plot) {
RUtil.execute("cdr3aa_profile.r",
outputFileName,
toPlotPath(outputFileName, plotType),
opt.f ? (sampleCollection.metadataTable.getColumnIndex(opt.f) + 2).toString() : "0",
)
}

println "[${new Date()} $scriptName] Finished"
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright 2013-2015 Mikhail Shugay (mikhail.shugay@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.antigenomics.vdjtools.profile

import com.antigenomics.vdjtools.sample.Clonotype

class Cdr3AAProfileBuilder {
private final Map<Cdr3Region, Integer> binning
private final boolean weighted, excludeCysPhe
private final List<String> groups

Cdr3AAProfileBuilder(Map<Cdr3Region, Integer> binning, boolean weighted,
boolean excludeCysPhe,
String... groups) {
this.binning = binning
this.weighted = weighted
this.excludeCysPhe = excludeCysPhe
this.groups = groups
}

Map<Cdr3Region, AminoAcidProfile> create(Iterable<Clonotype> clonotypes) {
def profiles = new HashMap<Cdr3Region, AminoAcidProfile>()

binning.each {
profiles.put(it.key, new AminoAcidProfile(it.value,
BasicAminoAcidProperties.INSTANCE.getProperties(groups)))
}

clonotypes.each { clonotype ->
if (clonotype.isCoding()) {
profiles.each {
def aaSeq = it.key.extractAminoAcid(clonotype, excludeCysPhe)
if (aaSeq.size() > 0) {
it.value.update(aaSeq, weighted ? clonotype.freq : 1.0d)
}
}
}
}

profiles
}
}
Loading

0 comments on commit 89a9568

Please sign in to comment.