Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial working version of refgenie integration #1090

Merged
merged 21 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions nf_core/refgenie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python
"""
Update a nextflow.config file with refgenie genomes
"""

import logging
import os
import re
from pathlib import Path
from textwrap import dedent

# import refgenconf
from warnings import warn

import rich
from rich.logging import RichHandler

import nf_core.utils

# Set up logging
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

# # Setup rich traceback
stderr = rich.console.Console(stderr=True, force_terminal=nf_core.utils.rich_force_colors())
rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)

NF_CFG_TEMPLATE = """
// This is a read-only config file managed by refgenie. Manual changes to this file will be overwritten
// To make changes here, use refgenie to update the reference genome data
params {{
genomes {{
{content}
}}
}}
"""


def _print_nf_config(rgc):
"""
Generate a nextflow config file with the genomes
from the refgenie config file
Adapted from: https://github.com/refgenie/refgenie_nfcore

Takes a RefGenConf object as argument
"""
abg = rgc.list_assets_by_genome()
genomes_str = ""
for genome, asset_list in abg.items():
genomes_str += f" '{genome}' {{\n"
for asset in asset_list:
try:
pth = rgc.seek(genome, asset)
# Catch general exception instead of refgencof exception --> no refgenconf import needed
except Exception as e:
log.warn(f"{genome}/{asset} is incomplete, ignoring...")
else:
genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n'
genomes_str += " }\n"

return NF_CFG_TEMPLATE.format(content=genomes_str)


def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home):
"""
Update the $NXF_HOME/config file by adding a includeConfig statement to it
for the 'refgenie_genomes_config_file' if not already defined
"""
# Check if NXF_HOME/config exists and has a
include_config_string = dedent(
f"""
///// >>> nf-core + RefGenie >>> /////
// !! Contents within this block are managed by 'nf-core/tools' !!
// Includes auto-generated config file with RefGenie genome assets
includeConfig '{os.path.abspath(refgenie_genomes_config_file)}'
///// <<< nf-core + RefGenie <<< /////
"""
)
nxf_home_config = Path(nxf_home) / "config"
if os.path.exists(nxf_home_config):
# look for include statement in config
has_include_statement = False
with open(nxf_home_config, "r") as fh:
lines = fh.readlines()
for line in lines:
if re.match(f"\s*includeConfig\s*'{os.path.abspath(refgenie_genomes_config_file)}'", line):
has_include_statement = True
break

# if include statement is missing, add it to the last line
if not has_include_statement:
with open(nxf_home_config, "a") as fh:
fh.write(include_config_string)

log.info(f"Included refgenie_genomes.config to {nxf_home_config}")

else:
# create new config and add include statement
with open(nxf_home_config, "w") as fh:
fh.write(include_config_string)
log.info(f"Created new nextflow config file: {nxf_home_config}")


def update_config(rgc):
"""
Update the genomes.config file after a local refgenie database has been updated

This function is executed after running 'refgenie pull <genome>/<asset>'
The refgenie config file is transformed into a nextflow.config file, which is used to
overwrited the 'refgenie_genomes.config' file.
The path to the target config file is inferred from the following options, in order:

- the 'nextflow_config' attribute in the refgenie config file
- the NXF_REFGENIE_PATH environment variable
- otherwise defaults to: $NXF_HOME/nf-core/refgenie_genomes.config

Additionaly, an 'includeConfig' statement is added to the file $NXF_HOME/config
"""

# Compile nextflow refgenie_genomes.config from refgenie config
refgenie_genomes = _print_nf_config(rgc)

# Get the path to NXF_HOME
# If NXF_HOME is not set, create it at $HOME/.nextflow
# If $HOME is not set, set nxf_home to false
nxf_home = os.environ.get("NXF_HOME")
if not nxf_home:
try:
nxf_home = Path.home() / ".nextflow"
if not os.path.exists(nxf_home):
log.info(f"Creating NXF_HOME directory at {nxf_home}")
os.makedirs(nxf_home, exist_ok=True)
except RuntimeError:
nxf_home = False

# Get the path for storing the updated refgenie_genomes.config
if hasattr(rgc, "nextflow_config"):
refgenie_genomes_config_file = rgc.nextflow_config
elif "NXF_REFGENIE_PATH" in os.environ:
refgenie_genomes_config_file = os.environ.get("NXF_REFGENIE_PATH")
elif nxf_home:
refgenie_genomes_config_file = Path(nxf_home) / "nf-core/refgenie_genomes.config"
else:
log.info("Could not determine path to 'refgenie_genomes.config' file.")
return False

# Save the updated genome config
try:
with open(refgenie_genomes_config_file, "w") as fh:
fh.write(refgenie_genomes)
log.info(f"Updated nf-core genomes config: {refgenie_genomes_config_file}")
except FileNotFoundError as e:
log.warn(f"Could not write to {refgenie_genomes_config_file}")
return False

# Add include statement to NXF_HOME/config
if nxf_home:
_update_nextflow_home_config(refgenie_genomes_config_file, nxf_home)

return True
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pytest-datafiles
pytest-cov
mock
black
refgenie
Sphinx
sphinx_rtd_theme
isort
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
author_email="phil.ewels@scilifelab.se",
url="https://github.com/nf-core/tools",
license="MIT",
entry_points={"console_scripts": ["nf-core=nf_core.__main__:run_nf_core"]},
entry_points={
"console_scripts": ["nf-core=nf_core.__main__:run_nf_core"],
"refgenie.hooks.post_update": ["nf-core-refgenie=nf_core.refgenie:update_config"],
},
install_requires=required,
packages=find_packages(exclude=("docs")),
include_package_data=True,
Expand Down
58 changes: 58 additions & 0 deletions tests/test_refgenie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python
""" Tests covering the refgenie integration code
"""

import os
import shlex
import subprocess
import tempfile
import unittest

import nf_core.refgenie


class TestRefgenie(unittest.TestCase):
"""Class for refgenie tests"""

def setUp(self):
"""
Prepare a refgenie config file
"""
self.tmp_dir = tempfile.mkdtemp()
self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow")
self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config")
self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml")
# Set NXF_HOME environment variable
# avoids adding includeConfig statement to config file outside the current tmpdir
try:
self.NXF_HOME_ORIGINAL = os.environ["NXF_HOME"]
except:
self.NXF_HOME_ORIGINAL = None
os.environ["NXF_HOME"] = self.NXF_HOME

# create NXF_HOME and nf-core directories
os.makedirs(os.path.join(self.NXF_HOME, "nf-core"), exist_ok=True)

# Initialize a refgenie config
os.system(f"refgenie init -c {self.REFGENIE}")

# Add NXF_REFGENIE_PATH to refgenie config
with open(self.REFGENIE, "a") as fh:
fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n")

def tearDown(self) -> None:
# Remove the tempdir again
os.system(f"rm -rf {self.tmp_dir}")
# Reset NXF_HOME environment variable
if self.NXF_HOME_ORIGINAL is None:
del os.environ["NXF_HOME"]
else:
os.environ["NXF_HOME"] = self.NXF_HOME_ORIGINAL

def test_update_refgenie_genomes_config(self):
"""Test that listing pipelines works"""
# Populate the config with a genome
cmd = f"refgenie pull t7/fasta -c {self.REFGENIE}"
out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT)

assert "Updated nf-core genomes config" in str(out)