diff --git a/Snakefile b/Snakefile index 58d479d..79127f5 100755 --- a/Snakefile +++ b/Snakefile @@ -50,6 +50,8 @@ def get_tag(): except subprocess.CalledProcessError: raise RuntimeError('[ERROR] Unable to get version number from git tags.') if '-' in version: + if hasattr(workflow, 'use_singularity') and workflow.use_singularity: + print("[WARNING] You're using an untagged version of Nanopype with the Singularity backend. Make sure to also update the pipeline repository to avoid inconsistency between code and container.", file=sys.stderr) return 'latest' else: return version @@ -90,14 +92,13 @@ if 'references' in nanopype_env: genome = values['genome'] chr_sizes = values['chr_sizes'] if 'chr_sizes' in values else '' if not os.path.isfile(genome): - print("[WARNING] Genome for {name} not found in {genome}, skipping entry".format( + print("[WARNING] Genome for {name} not found in {genome}, skipping entry.".format( name=name, genome=genome), file=sys.stderr) continue if chr_sizes and not os.path.isfile(chr_sizes): - print("[WARNING] Chromosome sizes for {name} not found in {chr_sizes}, skipping entry".format( + print("[WARNING] Chromosome sizes for {name} not found in {chr_sizes}, skipping entry.".format( name=name, chr_sizes=chr_sizes), file=sys.stderr) continue - print('add ', name) config['references'][name] = {"genome":genome, "chr_sizes":chr_sizes} diff --git a/docs/installation/singularity.md b/docs/installation/singularity.md old mode 100644 new mode 100755 index 1028058..ff7596a --- a/docs/installation/singularity.md +++ b/docs/installation/singularity.md @@ -51,6 +51,17 @@ cd nanopype pip3 install -r requirements.txt cd .. ``` + +It is recommended to install a tagged version of Nanopype. Using the 'latest' from master will always pull the most recent Singularity images. If the remaining pipeline is then not regularly updated via ``` git pull ```, pipeline code and container code can diverge. To install a specific version modify the above commands to: + +``` +git clone --recursive https://github.com/giesselmann/nanopype +cd nanopype +git fetch --tags && git checkout v0.6.0 +pip3 install -r requirements.txt +cd .. +``` + To deactivate a virtual python environment after installation or usage of the pipeline just type: ``` diff --git a/docs/installation/src.md b/docs/installation/src.md old mode 100644 new mode 100755 index 4f9b07d..779fad3 --- a/docs/installation/src.md +++ b/docs/installation/src.md @@ -129,3 +129,6 @@ There are some common errors that could arise during the installation process. I **terminated by signal 4** : Nanopype is mostly compiling integrated tools from source. In heterogeneous cluster environments this can lead to errors if the compilation takes place on a machine supporting modern vector instructions (SSE, AVX, etc.) but execution also uses less recent computers. The error message *terminated by signal 4* indicates an instruction in the software not supported by the underlying hardware. Please re-compile and install the tools from a machine with a common subset of vector instructions in this case. + +**The TensorFlow library was compiled to use AVX instructions, but these aren't available on your machine** +: This error can occur while running tools which are using tensorflow in the backend (e.g. Deepbinner). The PyPl version of tensorflow installed with Nanopype is pre-compiled to use AVX (Advanced Vector Extensions). Either run the pipeline for these workflows on a different node with AVX (Intel since Haswell, AMD since Excavator) or build tensorflow from source on a computer without AVX. Install tensorflow afterwards into the same python as Nanopype. diff --git a/docs/release-notes.md b/docs/release-notes.md index 46937b1..ddecfc3 100755 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -1,5 +1,14 @@ # Release notes +#### v0.7.0 - 2019-05-16 +Development release: + +: * Update guppy_basecaller to version v3.0.3 + * Move guppy installation into separate directory + * Fix demultiplexing rule raw data fetching + * Add doc section on updating the pipeline + * Minor documentation edits + #### v0.6.0 - 2019-04-30 Development release: diff --git a/docs/update.md b/docs/update.md new file mode 100755 index 0000000..b972946 --- /dev/null +++ b/docs/update.md @@ -0,0 +1,56 @@ +# Updates + +We deploy new versions of Nanopype for both important updates in the included tools and for additional features of the pipeline. We use semantic versioning in the format v1.0.3 (MAJOR.MINOR.PATCH). The major version is increased on significant changes in the pipeline. Any version update of included tools forces a new minor version. Pipeline releases with the same major and minor version are expected to produce the same results across installations. + +Before starting the update process, please make sure to backup your configuration files. Changes to the Nanopype repository (e.g. *env.yaml*) will be lost after the update and have to be restored manually. + +Independant of the installation method, the repository of the pipeline needs to be updated. The examples illustrate an update to version v0.7.0. + +``` +cd /path/to/nanopype +git fetch --tags +git checkout -f v0.7.0 && git clean -dfx && git gc --auto +``` + +The pipeline repository is now in a default state, changes made to the environment configuration in **env.yaml** need to be restored. Please also note that workflow configurations might change between pipeline releases. Compare the default **nanopype.yaml** in the repository with the one in the working directory to identify additional options. + + +## Singularity + +No specific steps are needed. You might consider deleting old Singularity images before running the pipeline. These are either in the hidden *.snakemake* folder of each working directory or at a common location when using *--singularity-prefix* + + +## Source + +When running the pipeline with tools built from source, the update includes re-building those. Depending on the extent of the patch, only a subset of tools is affected. Assuming your pipeline binaries are located in an isolated folder it is easiest to delete all, keep the sources from the first installation and run the same commands as described in the [installation](installation/src.md) section. + +According to the versions listed in the [release notes](release-notes.md) tools can be individually upgraded. The following example shows how to update the basecaller guppy. + +``` +rm ~/bin/guppy_basecaller +cd /path/to/nanopype +snakemake --snakefile rules/install.smk --directory ~/ guppy +``` + +The build rules are implemented as Snakemake workflows, thus deleting the output file (the binary) triggers a re-run of the build/download process. + + +## Docker + +The docker update is straightforward, just pull any updated version with e.g.: + +``` +docker pull giesselmann/nanopype:v0.7.0 +``` + +You can remove previous installations by running + +``` +docker images # list all images +docker rmi giesselmann/nanopype:v0.6.0 # remove tagged version +``` + + +## Configuration + +The pipeline configuration on [environment](installation/configuration.md) and [workflow](usage/general.md) level might be affected by an update. Please compare the default keys in the **env.yaml** and **nanopype.yaml** in the repository with the copies in your system and workflow directories. Start new workflows with the template from the repository. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index e2898fe..0513df8 100755 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -96,6 +96,7 @@ nav: - Docker: installation/docker.md - Configuration: installation/configuration.md - Tests: installation/test.md + - Update: update.md - Workflow: - General: usage/general.md - Cluster computing: usage/cluster.md diff --git a/rules/demux.smk b/rules/demux.smk index cb961ac..68f5456 100755 --- a/rules/demux.smk +++ b/rules/demux.smk @@ -61,7 +61,7 @@ rule deepbinner: shell: """ mkdir -p raw - {config[sbin_singularity][storage_batch2fast5.sh]} {input.signal} raw/ {config[sbin_singularity][base]} {config[bin_singularity][python]} + {config[bin][python]} {config[sbin][storage_fast5Index.py]} extract {input.signal} raw/ --output_format single {config[bin_singularity][python]} {config[bin_singularity][deepbinner]} classify raw -s {input.model} --intra_op_parallelism_threads {threads} --omp_num_threads {threads} --inter_op_parallelism_threads {threads} | tail -n +2 > {output} """ @@ -75,7 +75,7 @@ checkpoint demux_split_barcodes: "docker://nanopype/demux:{tag}".format(tag=config['version']['tag']) run: import os, itertools, collections - os.makedirs(output.barcodes) + os.makedirs(output.barcodes, exist_ok=True) barcode_ids = collections.defaultdict(list) for f in input.batches: read_barcodes = [] diff --git a/rules/install.smk b/rules/install.smk index 1e60a52..38f9da7 100755 --- a/rules/install.smk +++ b/rules/install.smk @@ -351,9 +351,12 @@ rule guppy: """ # wget https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_2.3.1_linux64.tar.gz && # wget https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_2.3.5_linux64.tar.gz && - wget https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_2.3.7_linux64.tar.gz && \ - tar --skip-old-files -xzf ont-guppy-cpu_2.3.7_linux64.tar.gz -C ./ --strip 1 && \ - rm ont-guppy-cpu_2.3.7_linux64.tar.gz + # wget https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_2.3.7_linux64.tar.gz && + mkdir -p src/guppy && cd src/guppy + wget https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_3.0.3_linux64.tar.gz && \ + tar -xzf ont-guppy-cpu_3.0.3_linux64.tar.gz -C ./ --strip 1 && \ + rm ont-guppy-cpu_3.0.3_linux64.tar.gz + ln -s $(pwd)/bin/guppy_basecaller ../../bin/guppy_basecaller """ rule pychopper: diff --git a/rules/sv.smk b/rules/sv.smk index 67069a0..5466eab 100755 --- a/rules/sv.smk +++ b/rules/sv.smk @@ -98,7 +98,7 @@ rule strique: model = config['sv_STRique_model'] if 'sv_STRique_model' in config else '', mod_model = '--mod_model {}'.format(config['sv_STRique_mod_model']) if 'sv_STRique_mod_model' in config else '' resources: - mem_mb = lambda wildcards, threads, attempt: int((1.0 + (0.1 * (attempt - 1))) * (16000 + 2000 * threads)), + mem_mb = lambda wildcards, threads, attempt: int((1.0 + (0.1 * (attempt - 1))) * (32000 + 4000 * threads)), time_min = lambda wildcards, threads, attempt: int((3840 / threads) * attempt) # 240 min / 16 threads singularity: "docker://nanopype/sv:{tag}".format(tag=config['version']['tag']) diff --git a/rules/utils/basecalling_fastx_stats.py b/rules/utils/basecalling_fastx_stats.py index 86e0bb9..cce7f4c 100755 --- a/rules/utils/basecalling_fastx_stats.py +++ b/rules/utils/basecalling_fastx_stats.py @@ -69,7 +69,7 @@ def fastqIter(iterable): quality = next(it).decode('utf-8').strip() yield name, sequence, comment, quality, attrs line = next(it).decode('utf-8').strip() - if line[0] == '>': # fasta + elif line[0] == '>': # fasta name, attrs = parse_name(line) sequence = next(it).decode('utf-8').strip() try: diff --git a/setup.py b/setup.py index 6ef9a02..fa902b4 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ def run(self): setup( name='nanopype', - version='0.6.0', + version='0.7.0', author='Pay Giesselmann', author_email='giesselmann@molgen.mpg.de', description='Nanopore data processing workflows',