Skip to content

Commit

Permalink
Specify version number for installed software (#32)
Browse files Browse the repository at this point in the history
* Specify version number for R packages, git software, and the basespace CLI

* Install mamba packages using an environment file

After installation, we save an `environment_versioned.yml` file that contains all the installed versions of our software

* Don't install `libgfortran4`

This was causing building errors (#38)

* Clean installation docs, adding instructions on reproducible builds, and update changelog
  • Loading branch information
arisp99 authored May 2, 2022
1 parent 5ebaf36 commit c4001d2
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 108 deletions.
131 changes: 49 additions & 82 deletions MIPTools.def
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,10 @@ From: amd64/ubuntu:20.04

# set build environment
export DEBIAN_FRONTEND=noninteractive \
CONDA_DIR=/opt/conda \
SHELL=/bin/bash \
LANG=en_US.UTF-8 \
LANGUAGE=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
MINICONDA_VERSION=4.8.3
export PATH=$CONDA_DIR/bin:$PATH
LC_ALL=en_US.UTF-8

# install system packages
apt-get update \
Expand Down Expand Up @@ -81,108 +78,76 @@ From: amd64/ubuntu:20.04

# install msa2vcf
cd /opt/programs
git clone https://github.com/lindenb/jvarkit.git
git clone --branch 2021.10.13 https://github.com/lindenb/jvarkit.git
cd jvarkit
./gradlew msa2vcf

# install conda
# install conda and python via Miniconda3
PYTHON_VERSION=3.8
PYTHON_VERSION=$(echo ${PYTHON_VERSION} | sed 's/[^0-9]//g' | head -c2)
MINICONDA_VERSION=4.8.3
MINICONDA_MD5=d63adf39f2c220950a063e0529d4ff74
CONDA_DIR=/opt/conda
export PATH=${CONDA_DIR}/bin:${PATH}
cd /tmp && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "d63adf39f2c220950a063e0529d4ff74 *Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
/bin/bash Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
$CONDA_DIR/bin/conda config --add channels defaults && \
$CONDA_DIR/bin/conda config --add channels bioconda && \
$CONDA_DIR/bin/conda config --add channels conda-forge && \
$CONDA_DIR/bin/conda config --add channels r && \
$CONDA_DIR/bin/conda config --system --set show_channel_urls true && \
$CONDA_DIR/bin/conda install --quiet --yes conda="${MINICONDA_VERSION%.*}.*" && \
conda clean --all -f -y

# install mamba
conda install mamba -c conda-forge

# install conda packages using mamba
mamba install -qy\
"r" \
"r-epitools" \
"rpy2" \
"r-irkernel" \
"r-plotly" \
"r-knitr" \
"r-shiny" \
"r-ggplot2" \
"r-devtools" \
"r-dplyr" \
"r-dt" \
"r-pkgbuild" \
"gxx_linux-64" \
"python" \
"notebook" \
"nbconvert" \
"jupyter_contrib_nbextensions" \
"xlrd" \
"bcftools" \
"samtools" \
"vcftools" \
"htslib" \
"bwa" \
"bowtie2" \
"primer3" \
"primer3-py" \
"numpy" \
"scipy" \
"biopython" \
"pysam" \
"pandas" \
"matplotlib" \
"seaborn" \
"scikit-learn" \
"scandir" \
"openpyxl" \
"simplegeneric" \
"matplotlib-venn" \
"tblib" \
"parallel" \
"scikit-allel" \
"bioconductor-dnacopy" \
"basemap-data-hires" \
"seqtk=1.3" \
"gatk4" \
"freebayes" \
"lastz" \
"plotly" \
"texlive-core"
wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-py${PYTHON_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh" && \
echo "${MINICONDA_MD5} *Miniconda3-py${PYTHON_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
/bin/bash Miniconda3-py${PYTHON_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh -bfp ${CONDA_DIR} && \
rm Miniconda3-py${PYTHON_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \
${CONDA_DIR}/bin/conda config --system --set show_channel_urls true && \
conda clean --all --force-pkgs-dirs --yes

# Install mamba
# Note that the mamba installation will update conda to the latest version
conda install mamba --channel conda-forge

# Install mamba packages using an environment file.
# If the versioned file exists, use it as a template to ensure version
# numbers are fixed. Otherwise, install packages with the latest versions.
# Note that instead of creating a new environment, we update the base
# environment, which is activated by default.
if [ -f "/opt/environment_versioned.yml" ]; then
mamba env update --prefix ${CONDA_DIR} --file /opt/environment_versioned.yml
else
# Update environment and save information to a file
mamba env update --prefix ${CONDA_DIR} --file /opt/environment.yml
mamba env export --prefix ${CONDA_DIR} > /opt/environment_versioned.yml
fi

# install vt variant tool set
cd /opt/programs
git clone https://github.com/atks/vt.git
git clone --branch 0.577 https://github.com/atks/vt.git
cd vt
git checkout 0.577
make -j $CPU_COUNT
scp vt /opt/bin

# install magrittr
Rscript -e 'install.packages("magrittr", repos = "https://cloud.r-project.org")'
Rscript -e 'devtools::install_version(
package = "magrittr",
version = "2.0.2",
repos = "https://cloud.r-project.org"
)'

# install RealMcCoil
Rscript -e 'devtools::install_github("OJWatson/McCOILR")'
Rscript -e 'devtools::install_github("OJWatson/McCOILR", ref = "v1.3.1")'

# install rehh
Rscript -e 'install.packages("rehh", repos="https://cloud.r-project.org")'
Rscript -e 'devtools::install_version(
package = "rehh",
version = "3.2.2",
repos = "https://cloud.r-project.org"
)'

# install MIPWrangler
cd /opt/programs
git clone https://github.com/bailey-lab/MIPWrangler
git clone --branch v1.2.0 https://github.com/bailey-lab/MIPWrangler
cd MIPWrangler
git checkout v1.2.0
./install.sh $CPU_COUNT

# install elucidator
cd /opt/programs
git clone https://github.com/nickjhathaway/elucidator
git clone --branch develop https://github.com/nickjhathaway/elucidator
cd elucidator
git checkout develop
./install.sh $CPU_COUNT

# install parasight
Expand All @@ -196,7 +161,8 @@ From: amd64/ubuntu:20.04
scp mipscripts/mipscripts.py /opt/bin/mipscripts.py

# install basespace cli
BS_PATH="https://launch.basespace.illumina.com/CLI/latest/amd64-linux/bs"
BS_VERSION=1.5.1
BS_PATH="https://launch.basespace.illumina.com/CLI/${BS_VERSION}/amd64-linux/bs"
wget $BS_PATH -O /opt/bin/bs

# add executable flag to executables
Expand All @@ -216,6 +182,7 @@ From: amd64/ubuntu:20.04
## Files Section ##
#################################################################
%files
environment* /opt
programs /opt
bin /opt
src /opt
Expand Down
4 changes: 4 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ MIPTools (development version)
New Features
------------

- Add the capability to freeze software version numbers when building the
container. Additionally, the version number for key software tools has been
fixed (:github:user:`arisp99`,
:github:pull:`32`).
- Install :github:repo:`mipscripts <bailey-lab/mipscripts>`, which contains
additional tools for analysis pipelines.
- Perform additional argument parsing to ensure arguments are formatted
Expand Down
95 changes: 69 additions & 26 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,30 +45,9 @@ You can download the development version or any previous release:

Install From Source
===================
MIPTools can also be built from source code using the definition file provided
in this `GitHub repository <https://github.com/bailey-lab/MIPTools>`_.

The process can take about 10-30 minutes to build, depending on the number of
CPU cores available. By default, the build process will use 6 CPU cores. This
should pose no problems with most modern computers, but if the computer used
for building the container has less then 6 cpu cores available, change the
:code:`"CPU_COUNT=6"` value at the top of the :code:`MIPTools.def` file to a
suitable number before running the following code. On the other hand, if
you have access to more CPU power, by all means, use them by setting the
same parameter to a higher value.

You must have **sudo** privelege to *build* the image. You do not need sudo to
*use* the image. So if you want to run the container on an environment without
sudo, either download a prebuilt image (see above) or build the container on
your own machine where you *do* have sudo privilege and copy the image file to
the computer without sudo. Note that the Singularity program itself must have
been installed with sudo.

If you plan to use MIPTools to demultiplex bcl files, you should download
:code:`bcl2fastq` separately. Currently, you can download it from `here <https://support.illumina.com/downloads/bcl2fastq-conversion-software-v2-20.html>`_.
You must download the file: :code:`bcl2fastq2 Conversion Software v2.20 Installer (Linux rpm)` and place it in the :code:`MIPTools/programs` directory.

You can install the most recent release using the following:
MIPTools can also be built from source using the definition file provided in
the `GitHub repository <https://github.com/bailey-lab/MIPTools>`_. You can
install the most recent release using the following:

.. code-block:: shell
Expand All @@ -92,5 +71,69 @@ MIPTools!
sudo singularity build miptools.sif MIPTools.def
:code:`miptools.sif` is a single **portable** file which has all the programs
needed for MIP design, data analysis, and a lot more. More information
about the extra programs and their uses will be added over time.
needed for MIP design, data analysis, and a lot more.

Sudo Privileges
---------------

.. warning::

You must have ``sudo`` privileges to *build* the image. You do not need
``sudo`` to *use* the image.

If you want to run the container on an environment without ``sudo``, either
download a prebuilt image (see above) or build the container on your own
machine where you *do* have ``sudo`` privilege and copy the image file to the
computer without ``sudo``. Note that the Singularity program itself must have
been installed with ``sudo``.

Software Versioning
-------------------

MIPTools installs several software tools together into the final built
container. Software packages are installed in the ``%post`` section on the
definition file, ``MIPTools.def`` (for more information of the definition file
consult the `Singularity documentation <https://sylabs.io/docs>`_). Programs in
MIPTools are installed in a variety of ways including via ``wget``,
``apt-get``, building source code for programs downloaded via ``git``, and even
via ``mamba``.

In order to ensure reproducible builds, the version number has been fixed for
many of the key programs MIPTools uses. The exceptions to this rule include
software installed via ``apt-get`` and ``mamba``. Software installed via
``mamba`` is defined in an ``environment.yml`` file in the root of the MIPTools
directory. This ``environment.yml`` file does not contain package versions as
in many cases dependency conflicts may arise. It is, however, possible to
specify the version number of installed packages by defining an
``environment_versioned.yml`` file in the root of the MIPTools directory.
During the build process if this file exists it will be used to install
``mamba`` packages. If no ``environment_versioned.yml`` file exists, it will be
generated during the build process and saved within the MIPTools container.
Users may then save this file to the root of the MIPTools directory to ensure
package versions of software installed with ``mamba`` do not change. To save
this file locally you may use ``singularity exec``:

.. code-block:: shell
singularity exec <container> cat /opt/environment_versioned.yml > environment_versioned.yml
Demultiplexing
--------------

If you plan to use MIPTools to demultiplex bcl files, you must download
:code:`bcl2fastq` separately. Currently, you can download it from `here
<https://support.illumina.com/downloads/bcl2fastq-conversion-software-v2-20.html>`_.
You must download the file: :code:`bcl2fastq2 Conversion Software v2.20
Installer (Linux rpm)` and place it in the :code:`MIPTools/programs` directory.

CPU Usage
---------

The build process can take about 30-60 minutes to build, depending on the
number of CPU cores available. By default, the build process will use 20 CPU
cores. If the computer used for building the container has less then 20 CPU
cores available, change the :code:`CPU_COUNT=20` value at the top of the
:code:`MIPTools.def` file to a suitable number before building the container.
On the other hand, if the computer has additional CPU's, by all means, use them
by setting the same parameter to a higher value.
57 changes: 57 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: base
channels:
- r
- bioconda
- conda-forge
- defaults
dependencies:
- r-base
- r-epitools
- rpy2
- r-irkernel
- r-plotly
- r-knitr
- r-shiny
- r-ggplot2
- r-devtools
- r-dplyr
- r-dt
- r-pkgbuild
- gxx_linux-64
- python
- notebook
- nbconvert
- jupyter_contrib_nbextensions
- xlrd
- bcftools
- samtools
- vcftools
- htslib
- bwa
- bowtie2
- primer3
- primer3-py
- numpy
- scipy
- biopython
- pysam
- pandas
- matplotlib
- seaborn
- scikit-learn
- scandir
- openpyxl
- simplegeneric
- matplotlib-venn
- tblib
- parallel
- scikit-allel
- bioconductor-dnacopy
- basemap-data-hires
- seqtk=1.3
- gatk4
- freebayes
- lastz
- plotly
- texlive-core
prefix: /opt/conda

0 comments on commit c4001d2

Please sign in to comment.