-
Notifications
You must be signed in to change notification settings - Fork 193
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1090 from KevinMenden/refgenie
initial working version of refgenie integration
- Loading branch information
Showing
4 changed files
with
223 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Update a nextflow.config file with refgenie genomes | ||
""" | ||
|
||
import logging | ||
import os | ||
import re | ||
from pathlib import Path | ||
from textwrap import dedent | ||
|
||
# import refgenconf | ||
from warnings import warn | ||
|
||
import rich | ||
from rich.logging import RichHandler | ||
|
||
import nf_core.utils | ||
|
||
# Set up logging | ||
log = logging.getLogger(__name__) | ||
log.setLevel(logging.INFO) | ||
|
||
# # Setup rich traceback | ||
stderr = rich.console.Console(stderr=True, force_terminal=nf_core.utils.rich_force_colors()) | ||
rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1) | ||
|
||
NF_CFG_TEMPLATE = """ | ||
// This is a read-only config file managed by refgenie. Manual changes to this file will be overwritten | ||
// To make changes here, use refgenie to update the reference genome data | ||
params {{ | ||
genomes {{ | ||
{content} | ||
}} | ||
}} | ||
""" | ||
|
||
|
||
def _print_nf_config(rgc): | ||
""" | ||
Generate a nextflow config file with the genomes | ||
from the refgenie config file | ||
Adapted from: https://github.com/refgenie/refgenie_nfcore | ||
Takes a RefGenConf object as argument | ||
""" | ||
abg = rgc.list_assets_by_genome() | ||
genomes_str = "" | ||
for genome, asset_list in abg.items(): | ||
genomes_str += f" '{genome}' {{\n" | ||
for asset in asset_list: | ||
try: | ||
pth = rgc.seek(genome, asset) | ||
# Catch general exception instead of refgencof exception --> no refgenconf import needed | ||
except Exception as e: | ||
log.warn(f"{genome}/{asset} is incomplete, ignoring...") | ||
else: | ||
genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n' | ||
genomes_str += " }\n" | ||
|
||
return NF_CFG_TEMPLATE.format(content=genomes_str) | ||
|
||
|
||
def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home): | ||
""" | ||
Update the $NXF_HOME/config file by adding a includeConfig statement to it | ||
for the 'refgenie_genomes_config_file' if not already defined | ||
""" | ||
# Check if NXF_HOME/config exists and has a | ||
include_config_string = dedent( | ||
f""" | ||
///// >>> nf-core + RefGenie >>> ///// | ||
// !! Contents within this block are managed by 'nf-core/tools' !! | ||
// Includes auto-generated config file with RefGenie genome assets | ||
includeConfig '{os.path.abspath(refgenie_genomes_config_file)}' | ||
///// <<< nf-core + RefGenie <<< ///// | ||
""" | ||
) | ||
nxf_home_config = Path(nxf_home) / "config" | ||
if os.path.exists(nxf_home_config): | ||
# look for include statement in config | ||
has_include_statement = False | ||
with open(nxf_home_config, "r") as fh: | ||
lines = fh.readlines() | ||
for line in lines: | ||
if re.match(f"\s*includeConfig\s*'{os.path.abspath(refgenie_genomes_config_file)}'", line): | ||
has_include_statement = True | ||
break | ||
|
||
# if include statement is missing, add it to the last line | ||
if not has_include_statement: | ||
with open(nxf_home_config, "a") as fh: | ||
fh.write(include_config_string) | ||
|
||
log.info(f"Included refgenie_genomes.config to {nxf_home_config}") | ||
|
||
else: | ||
# create new config and add include statement | ||
with open(nxf_home_config, "w") as fh: | ||
fh.write(include_config_string) | ||
log.info(f"Created new nextflow config file: {nxf_home_config}") | ||
|
||
|
||
def update_config(rgc): | ||
""" | ||
Update the genomes.config file after a local refgenie database has been updated | ||
This function is executed after running 'refgenie pull <genome>/<asset>' | ||
The refgenie config file is transformed into a nextflow.config file, which is used to | ||
overwrited the 'refgenie_genomes.config' file. | ||
The path to the target config file is inferred from the following options, in order: | ||
- the 'nextflow_config' attribute in the refgenie config file | ||
- the NXF_REFGENIE_PATH environment variable | ||
- otherwise defaults to: $NXF_HOME/nf-core/refgenie_genomes.config | ||
Additionaly, an 'includeConfig' statement is added to the file $NXF_HOME/config | ||
""" | ||
|
||
# Compile nextflow refgenie_genomes.config from refgenie config | ||
refgenie_genomes = _print_nf_config(rgc) | ||
|
||
# Get the path to NXF_HOME | ||
# If NXF_HOME is not set, create it at $HOME/.nextflow | ||
# If $HOME is not set, set nxf_home to false | ||
nxf_home = os.environ.get("NXF_HOME") | ||
if not nxf_home: | ||
try: | ||
nxf_home = Path.home() / ".nextflow" | ||
if not os.path.exists(nxf_home): | ||
log.info(f"Creating NXF_HOME directory at {nxf_home}") | ||
os.makedirs(nxf_home, exist_ok=True) | ||
except RuntimeError: | ||
nxf_home = False | ||
|
||
# Get the path for storing the updated refgenie_genomes.config | ||
if hasattr(rgc, "nextflow_config"): | ||
refgenie_genomes_config_file = rgc.nextflow_config | ||
elif "NXF_REFGENIE_PATH" in os.environ: | ||
refgenie_genomes_config_file = os.environ.get("NXF_REFGENIE_PATH") | ||
elif nxf_home: | ||
refgenie_genomes_config_file = Path(nxf_home) / "nf-core/refgenie_genomes.config" | ||
else: | ||
log.info("Could not determine path to 'refgenie_genomes.config' file.") | ||
return False | ||
|
||
# Save the updated genome config | ||
try: | ||
with open(refgenie_genomes_config_file, "w") as fh: | ||
fh.write(refgenie_genomes) | ||
log.info(f"Updated nf-core genomes config: {refgenie_genomes_config_file}") | ||
except FileNotFoundError as e: | ||
log.warn(f"Could not write to {refgenie_genomes_config_file}") | ||
return False | ||
|
||
# Add include statement to NXF_HOME/config | ||
if nxf_home: | ||
_update_nextflow_home_config(refgenie_genomes_config_file, nxf_home) | ||
|
||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ pytest-datafiles | |
pytest-cov | ||
mock | ||
black | ||
refgenie | ||
Sphinx | ||
sphinx_rtd_theme | ||
isort | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/env python | ||
""" Tests covering the refgenie integration code | ||
""" | ||
|
||
import os | ||
import shlex | ||
import subprocess | ||
import tempfile | ||
import unittest | ||
|
||
import nf_core.refgenie | ||
|
||
|
||
class TestRefgenie(unittest.TestCase): | ||
"""Class for refgenie tests""" | ||
|
||
def setUp(self): | ||
""" | ||
Prepare a refgenie config file | ||
""" | ||
self.tmp_dir = tempfile.mkdtemp() | ||
self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow") | ||
self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config") | ||
self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml") | ||
# Set NXF_HOME environment variable | ||
# avoids adding includeConfig statement to config file outside the current tmpdir | ||
try: | ||
self.NXF_HOME_ORIGINAL = os.environ["NXF_HOME"] | ||
except: | ||
self.NXF_HOME_ORIGINAL = None | ||
os.environ["NXF_HOME"] = self.NXF_HOME | ||
|
||
# create NXF_HOME and nf-core directories | ||
os.makedirs(os.path.join(self.NXF_HOME, "nf-core"), exist_ok=True) | ||
|
||
# Initialize a refgenie config | ||
os.system(f"refgenie init -c {self.REFGENIE}") | ||
|
||
# Add NXF_REFGENIE_PATH to refgenie config | ||
with open(self.REFGENIE, "a") as fh: | ||
fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n") | ||
|
||
def tearDown(self) -> None: | ||
# Remove the tempdir again | ||
os.system(f"rm -rf {self.tmp_dir}") | ||
# Reset NXF_HOME environment variable | ||
if self.NXF_HOME_ORIGINAL is None: | ||
del os.environ["NXF_HOME"] | ||
else: | ||
os.environ["NXF_HOME"] = self.NXF_HOME_ORIGINAL | ||
|
||
def test_update_refgenie_genomes_config(self): | ||
"""Test that listing pipelines works""" | ||
# Populate the config with a genome | ||
cmd = f"refgenie pull t7/fasta -c {self.REFGENIE}" | ||
out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) | ||
|
||
assert "Updated nf-core genomes config" in str(out) |