Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
pedrofeijao committed Nov 20, 2017
2 parents 4ebd4b6 + 54ba887 commit d4df9f5
Show file tree
Hide file tree
Showing 12 changed files with 1,861 additions and 237 deletions.
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ We introduce MentaLiST, a new MLST caller, based on a k-mer counting algorithm a

## Installation

### Linux

The easiest way of installing MentaLiST is by creating a new environment with [Conda](https://conda.io/docs/). To create a new conda environment that includes MentaLiST, run:
```
conda create -n mentalist -c bioconda mentalist
Expand All @@ -33,14 +35,32 @@ source activate mentalist

Once the mentalist conda environment is active, you should be able to run MentaLiST. Typing:
```
MentaLiST.jl -h
mentalist -h
```
produces the help output.

The conda environment can be deactivated by running:
```
source deactivate
```
### macOS (+10.8)

There is currently no conda recipe for julia on macOS, so a more manual installation process is required.

1. Download the [julia-0.5.2.dmg](https://julialang-s3.julialang.org/bin/mac/x64/0.5/julia-0.5.2-mac64.dmg) file from julialang.org and install it by dragging the `Julia-0.5.app` bundle into your Applications folder.
2. Launch the `Julia-0.5` application and install all of the dependencies listed in the [REQUIRE](REQUIRE) file:

```julia
julia> Pkg.update()
julia> Pkg.add("Bio")
julia> Pkg.add("OpenGene")
julia> Pkg.add("Logging")
julia> Pkg.add("ArgParse")
julia> Pkg.add("Lumberjack")
julia> Pkg.add("Suppressor")
```
3. Add `/Applications/Julia-0.5.app/Contents/Resources/julia/bin/julia` to your `PATH`
4. Clone the MentaLiST git repostory (https://github.com/WGS-TB/MentaLiST.git). MentaLiST can be run directly from the repository: `src/mentalist -h`.

## Quick Start

Expand Down
2 changes: 1 addition & 1 deletion conda/build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

cp -r $SRC_DIR/src/mentalist $PREFIX/bin
cp -r $SRC_DIR/src/*.jl $PREFIX/bin
ln -s $PREFIX/bin/MentaLiST.jl $PREFIX/bin/mentalist
chmod +x $PREFIX/bin/mentalist

julia -e 'Pkg.init()'
Expand Down
25 changes: 18 additions & 7 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
{% set name = "MentaLiST" %}
{% set version = "0.1.3" %}

package:
name: mentalist
version: 0.1.3
name: {{ name|lower }}
version: {{ version }}

source:
git_url: https://github.com/WGS-TB/MentaLiST.git
git_rev: v0.1.3
folder: MentaLiST
fn: {{ name|lower }}-{{ version }}.tar.gz
url: https://github.com/WGS-TB/MentaLiST/archive/v{{ version }}.tar.gz

build:
number: 0
skip: True # [osx]

requirements:
build:
- julia 0.5.2
- hdf5
- mpfr
run:
- julia 0.5.2
- hdf5
- mpfr

build:
number: 0
test:
commands:
- mentalist -h

about:
home: https://github.com/WGS-TB/MentaLiST
summary: The MLST pipeline developed by the PathOGiST research group.
license: MIT
license_file: LICENSE
6 changes: 3 additions & 3 deletions galaxy/data_managers/data_manager_conf.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0"?>
<data_managers>
<data_manager tool_file="data_manager_mentalist_build_db/data_manager/mentalist_build_db.xml" id="mentalist_build_db" version="0.1.2">
<data_manager tool_file="data_manager_mentalist_build_db/data_manager/mentalist_build_db.xml" id="mentalist_build_db" version="0.1.3">
<data_table name="mentalist_databases">
<output>
<column name="value" />
Expand All @@ -15,7 +15,7 @@
</output>
</data_table>
</data_manager>
<data_manager tool_file="data_manager_mentalist_download_cgmlst/data_manager/mentalist_download_cgmlst.xml" id="mentalist_download_cgmlst" version="0.1.2">
<data_manager tool_file="data_manager_mentalist_download_cgmlst/data_manager/mentalist_download_cgmlst.xml" id="mentalist_download_cgmlst" version="0.1.3">
<data_table name="mentalist_databases">
<output>
<column name="value" />
Expand All @@ -30,7 +30,7 @@
</output>
</data_table>
</data_manager>
<data_manager tool_file="data_manager_mentalist_download_pubmlst/data_manager/mentalist_download_pubmlst.xml" id="mentalist_download_pubmlst" version="0.1.2">
<data_manager tool_file="data_manager_mentalist_download_pubmlst/data_manager/mentalist_download_pubmlst.xml" id="mentalist_download_pubmlst" version="0.1.3">
<data_table name="mentalist_databases">
<output>
<column name="value" />
Expand Down
8 changes: 5 additions & 3 deletions homebrew/mentalist.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
class Mentalist < Formula
desc "The MLST pipeline developed by the PathOGiST research group"
homepage "https://github.com/WGS-TB/MentaLiST"
url "https://github.com/WGS-TB/MentaLiST/archive/v0.1.2.tar.gz"
sha256 "0121110f87264423d9ffd18a6262753b1d643a84cbaabf0842c0d2f909d7ccf6"
url "https://github.com/WGS-TB/MentaLiST/archive/v0.1.3.tar.gz"
sha256 ""
# doi "10.1101/172858"
# tag "bioinformatics"

depends_on "julia"

Expand All @@ -11,6 +13,6 @@ def install
end

test do

system "#{bin}/mentalist", "-h"
end
end
210 changes: 210 additions & 0 deletions src/MentaLiST.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#!/usr/bin/env julia

using Suppressor
@suppress_err begin
using Lumberjack
using ArgParse
end

function parse_commandline()
s = ArgParseSettings()
@add_arg_table s begin
"call"
help = "MLST caller, given a sample and a k-mer database."
action = :command
"build_db"
help = "Build a MLST k-mer database, given a list of FASTA files."
action = :command
"list_pubmlst"
help = "List all available MLST schemes from www.pubmlst.org. "
action = :command
"download_pubmlst"
help = "Dowload a MLST scheme from pubmlst and build a MLST k-mer database."
action = :command
"list_cgmlst"
help = "List all available cgMLST schemes from www.cgmlst.org."
action = :command
"download_cgmlst"
help = "Dowload a MLST scheme from cgmlst.org and build a MLST k-mer database."
action = :command

end
# Calling MLST options:
@add_arg_table s["call"] begin
"-o"
help = "Output file with MLST call"
arg_type = String
required = true
"-s"
help = "Sample name"
arg_type = String
required = true
"--db"
help = "Kmer database"
required = true
arg_type = String
"files"
nargs = '*'
help = "FastQ input files"
required = true
arg_type = String
end
# Build DB from FASTA, options:
@add_arg_table s["build_db"] begin
"--db"
help = "Output file (kmer database)"
arg_type = String
required = true
"-k"
help = "Kmer size"
required = true
arg_type = Int8
"-f", "--fasta_files"
nargs = '+'
arg_type = String
help = "Fasta files with the MLST scheme"
required = true
"-p", "--profile"
arg_type = String
help = "Profile file for known genotypes."
"-c", "--disable_compression"
help = "Disables the default compression of the database, that stores only the most informative kmers. Not recommended unless for debugging."
action = :store_true
end
@add_arg_table s["list_pubmlst"] begin
"-p", "--prefix"
help = "Only list schemes that starts with this prefix."
arg_type = String
end

@add_arg_table s["list_cgmlst"] begin
"-p", "--prefix"
help = "Only list schemes that start with this prefix."
arg_type = String
end

@add_arg_table s["download_pubmlst"] begin
"-o", "--output"
help = "Output folder for the scheme files."
arg_type = String
required = true
"-s", "--scheme"
help = "Species name or ID of the scheme."
arg_type = String
required = true
"-k"
help = "K-mer size"
required = true
arg_type = Int8
"--db"
help = "Output file for the kmer database."
arg_type = String
required = true
"-c", "--disable_compression"
help = "Disables the default compression of the database, that stores only the most informative kmers. Not recommended unless for debugging."
action = :store_true
end

@add_arg_table s["download_cgmlst"] begin
"-o", "--output"
help = "Output folder for the scheme files."
arg_type = String
required = true
"-s", "--scheme"
help = "Species name or ID of the scheme"
arg_type = String
required = true
"-k"
help = "K-mer size"
required = true
arg_type = Int8
"--db"
help = "Output file for the kmer database."
arg_type = String
required = true
"-c", "--disable_compression"
help = "Disables the default compression of the database, that stores only the most informative kmers. Not recommended unless for debugging."
action = :store_true
end

return parse_args(s)
end


#### Main COMMAND functions:
function call_mlst(args)
include("build_db_functions.jl")
# check if the files exist:
check_files([args["db"];args["files"]])
info("Opening kmer database ... ")
kmer_db, loci, loci2alleles, k, profile = open_db(args["db"])
info("Opening fastq file(s) ... ")
votes, loci_votes = count_kmers_and_vote(DNAKmer{k}, args["files"], kmer_db, loci2alleles)
info("Writing output ...")
write_calls(votes, loci_votes, loci, loci2alleles, args["s"], args["o"], profile)
info("Done.")
end

function list_pubmlst(args)
include("mlst_download_functions.jl")
list_pubmlst_schema(args["prefix"])
end

function download_pubmlst(args)
include("mlst_download_functions.jl")
loci_files, profile_file = download_pubmlst_scheme(args["scheme"], args["output"])
info("Building the k-mer database ...")
args["fasta_files"] = loci_files
args["profile"] = profile_file
build_db(args)
end

function list_cgmlst(args)
include("mlst_download_functions.jl")
list_cgmlst_schema(args["prefix"])
end

function download_cgmlst(args)
include("mlst_download_functions.jl")
loci_files = download_cgmlst_scheme(args["scheme"], args["output"])
info("Building the k-mer database ...")
args["fasta_files"] = loci_files
args["profile"] = nothing
build_db(args)
end

function build_db(args)
include("build_db_functions.jl")
check_files(args["fasta_files"])
k::Int8 = args["k"]
info("Opening FASTA files ... ")
results, loci = kmer_class_for_each_locus(k, args["fasta_files"], !args["disable_compression"])
# Combine results:
info("Combining results for each locus ...")
kmer_classification = combine_loci_classification(k, results, loci)

info("Saving DB ...")
save_db(k, kmer_classification, loci, args["db"], args["profile"])
info("Done!")
end

##### Main function: just calls the appropriate commands, with arguments:
function main()
args = parse_commandline()
# determine command:
if args["%COMMAND%"] == "call"
call_mlst(args["call"])
elseif args["%COMMAND%"] == "build_db"
build_db(args["build_db"])
elseif args["%COMMAND%"] == "list_pubmlst"
list_pubmlst(args["list_pubmlst"])
elseif args["%COMMAND%"] == "download_pubmlst"
download_pubmlst(args["download_pubmlst"])
elseif args["%COMMAND%"] == "list_cgmlst"
list_cgmlst(args["list_cgmlst"])
elseif args["%COMMAND%"] == "download_cgmlst"
download_cgmlst(args["download_cgmlst"])
end
end

main()
8 changes: 8 additions & 0 deletions src/build_db_functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ using OpenGene
end
include("db_graph.jl")

function check_files(files)
dont_exist = [file for file in files if !isfile(file)]
if length(dont_exist) > 0
Lumberjack.warn("The following input file(s) could not be found: $(join(dont_exist,',')), aborting ...")
exit(-1)
end
end

function complement_alleles(vector, m)
comp_vector = Int16[]
expected::Int16 = 1
Expand Down
Loading

0 comments on commit d4df9f5

Please sign in to comment.