From 6093ae926a0dc2372b2f6fa27fe9fb4b0d2415ad Mon Sep 17 00:00:00 2001 From: Lucas Czech Date: Mon, 15 Jul 2024 23:07:51 +0200 Subject: [PATCH] Adapt to new Snakemake profiles for slurm --- profiles/README.md | 6 - profiles/memex/cluster_config.yaml | 32 --- profiles/memex/slurm-jobscript.sh | 1 - profiles/memex/slurm-status.py | 1 - profiles/memex/slurm-submit.py | 1 - profiles/memex/slurm_utils.py | 1 - profiles/slurm/LICENSE | 21 -- profiles/slurm/config.yaml | 16 -- profiles/slurm/slurm-jobscript.sh | 3 - profiles/slurm/slurm-status.py | 69 ----- profiles/slurm/slurm-submit.py | 130 --------- profiles/slurm/slurm_utils.py | 268 ------------------ workflow/envs/grenepipe.yaml | 1 + workflow/profiles/README.md | 6 + .../profiles/default}/config.yaml | 0 .../profiles}/slurm/cluster_config.yaml | 7 + .../profiles/slurm}/config.yaml | 3 - 17 files changed, 14 insertions(+), 552 deletions(-) delete mode 100644 profiles/README.md delete mode 100644 profiles/memex/cluster_config.yaml delete mode 120000 profiles/memex/slurm-jobscript.sh delete mode 120000 profiles/memex/slurm-status.py delete mode 120000 profiles/memex/slurm-submit.py delete mode 120000 profiles/memex/slurm_utils.py delete mode 100644 profiles/slurm/LICENSE delete mode 100644 profiles/slurm/config.yaml delete mode 100755 profiles/slurm/slurm-jobscript.sh delete mode 100755 profiles/slurm/slurm-status.py delete mode 100755 profiles/slurm/slurm-submit.py delete mode 100644 profiles/slurm/slurm_utils.py create mode 100644 workflow/profiles/README.md rename {profiles/local => workflow/profiles/default}/config.yaml (100%) rename {profiles => workflow/profiles}/slurm/cluster_config.yaml (84%) rename {profiles/memex => workflow/profiles/slurm}/config.yaml (75%) diff --git a/profiles/README.md b/profiles/README.md deleted file mode 100644 index 9d09718..0000000 --- a/profiles/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Overview -============ - -Profiles that might come in handy when running the pipeline in a cluster setting. - -See the [Cluster and Profiles](https://github.com/lczech/grenepipe/wiki/Cluster-and-Profiles) wiki page for details. diff --git a/profiles/memex/cluster_config.yaml b/profiles/memex/cluster_config.yaml deleted file mode 100644 index c499a1a..0000000 --- a/profiles/memex/cluster_config.yaml +++ /dev/null @@ -1,32 +0,0 @@ -__default__: - account: lczech # Your hpc account - mail-user: lczech@carnegiescience.edu # Your email (optional) - partition: DPB,SHARED # The partition you use - time: 120 # Default time (minutes). A time limit of zero requests that no time limit be imposed - mem: 10G # Default memory. A memory size specification of zero grants the job access to all of the memory on each node. - cpus-per-task: 1 - nodes: 1 - ntasks: 1 - -trim_reads_se: - mem: 5G - cpus-per-task: 4 - -trim_reads_pe: - mem: 5G - cpus-per-task: 4 - -map_reads: - cpus-per-task: 12 - -call_variants: - partition: DPB # Use DPB only, as it as inf time limit - time: 1-0 - cpus-per-task: 8 - -combine_calls: - time: 1-0 - -gatk_calls_combine: - time: 1-0 - cpus-per-task: 8 diff --git a/profiles/memex/slurm-jobscript.sh b/profiles/memex/slurm-jobscript.sh deleted file mode 120000 index ebd421a..0000000 --- a/profiles/memex/slurm-jobscript.sh +++ /dev/null @@ -1 +0,0 @@ -../slurm/slurm-jobscript.sh \ No newline at end of file diff --git a/profiles/memex/slurm-status.py b/profiles/memex/slurm-status.py deleted file mode 120000 index 6352e75..0000000 --- a/profiles/memex/slurm-status.py +++ /dev/null @@ -1 +0,0 @@ -../slurm/slurm-status.py \ No newline at end of file diff --git a/profiles/memex/slurm-submit.py b/profiles/memex/slurm-submit.py deleted file mode 120000 index 01c6cc3..0000000 --- a/profiles/memex/slurm-submit.py +++ /dev/null @@ -1 +0,0 @@ -../slurm/slurm-submit.py \ No newline at end of file diff --git a/profiles/memex/slurm_utils.py b/profiles/memex/slurm_utils.py deleted file mode 120000 index e1e9196..0000000 --- a/profiles/memex/slurm_utils.py +++ /dev/null @@ -1 +0,0 @@ -../slurm/slurm_utils.py \ No newline at end of file diff --git a/profiles/slurm/LICENSE b/profiles/slurm/LICENSE deleted file mode 100644 index e23284b..0000000 --- a/profiles/slurm/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Snakemake-Profiles - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/profiles/slurm/config.yaml b/profiles/slurm/config.yaml deleted file mode 100644 index 56067f3..0000000 --- a/profiles/slurm/config.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Default settings of the cookiecutter setup -restart-times: 3 -jobscript: "slurm-jobscript.sh" -cluster: "slurm-submit.py" -cluster-status: "slurm-status.py" -max-jobs-per-second: 1 -max-status-checks-per-second: 10 -local-cores: 1 -latency-wait: 60 - -# Additional settings used for our purposes -use-conda: True -jobs: 100 -keep-going: True -rerun-incomplete: True -printshellcmds: True diff --git a/profiles/slurm/slurm-jobscript.sh b/profiles/slurm/slurm-jobscript.sh deleted file mode 100755 index 391741e..0000000 --- a/profiles/slurm/slurm-jobscript.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -# properties = {properties} -{exec_job} diff --git a/profiles/slurm/slurm-status.py b/profiles/slurm/slurm-status.py deleted file mode 100755 index 1929b78..0000000 --- a/profiles/slurm/slurm-status.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python3 -import re -import subprocess as sp -import shlex -import sys -import time -import logging - -logger = logging.getLogger("__name__") - -STATUS_ATTEMPTS = 20 - -jobid = sys.argv[1] - -for i in range(STATUS_ATTEMPTS): - try: - sacct_res = sp.check_output(shlex.split("sacct -P -b -j {} -n".format(jobid))) - res = { - x.split("|")[0]: x.split("|")[1] - for x in sacct_res.decode().strip().split("\n") - } - break - except sp.CalledProcessError as e: - logger.error("sacct process error") - logger.error(e) - except IndexError as e: - pass - # Try getting job with scontrol instead in case sacct is misconfigured - try: - sctrl_res = sp.check_output( - shlex.split("scontrol -o show job {}".format(jobid)) - ) - m = re.search("JobState=(\w+)", sctrl_res.decode()) - res = {jobid: m.group(1)} - break - except sp.CalledProcessError as e: - logger.error("scontrol process error") - logger.error(e) - if i >= STATUS_ATTEMPTS - 1: - print("failed") - exit(0) - else: - time.sleep(1) - -status = res[jobid] - -if status == "BOOT_FAIL": - print("failed") -elif status == "OUT_OF_MEMORY": - print("failed") -elif status.startswith("CANCELLED"): - print("failed") -elif status == "COMPLETED": - print("success") -elif status == "DEADLINE": - print("failed") -elif status == "FAILED": - print("failed") -elif status == "NODE_FAIL": - print("failed") -elif status == "PREEMPTED": - print("failed") -elif status == "TIMEOUT": - print("failed") -# Unclear whether SUSPENDED should be treated as running or failed -elif status == "SUSPENDED": - print("failed") -else: - print("running") diff --git a/profiles/slurm/slurm-submit.py b/profiles/slurm/slurm-submit.py deleted file mode 100755 index 10ab601..0000000 --- a/profiles/slurm/slurm-submit.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python -""" -Snakemake SLURM submit script. -""" -import warnings # use warnings.warn() rather than print() to output info in this script -import os -import datetime -import socket - -from snakemake.utils import read_job_properties -import slurm_utils - -# Prepare directory and params for slurm log files -workingdir = os.getcwd() -extra_params = {} -extra_params["log_base"] = os.path.join(workingdir, "slurm-logs") -os.makedirs(extra_params["log_base"], exist_ok=True) - -def write_debug_log(msg): - with open( os.path.join(extra_params["log_base"], "slurm-debug.log"), "a") as debug: - now = datetime.datetime.now() - debug.write(now.strftime("%Y-%m-%d %H:%M:%S") + "\t" + str(msg) + "\n") - # pass - -# cookiecutter arguments -SBATCH_DEFAULTS = """""" -ADVANCED_ARGUMENT_CONVERSION = {"yes": True, "no": False}["no"] - -# Try to find a cluster config in the dir where the current script is located at; -# this supposedly also works if we use a symlink to the script. -# If no config file found, reset to empty config file. -CLUSTER_CONFIG = os.path.join( os.path.dirname(__file__), "cluster_config.yaml" ) -if not os.path.exists(CLUSTER_CONFIG): - CLUSTER_CONFIG = "" - -RESOURCE_MAPPING = { - "time": ("time", "runtime", "walltime", "time_min"), - "mem": ("mem", "mem_mb", "ram", "memory"), - "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"), - "nodes": ("nodes", "nnodes") -} - -# parse job -jobscript = slurm_utils.parse_jobscript() -job_properties = read_job_properties(jobscript) - -sbatch_options = {} -cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG) -# write_debug_log( "c\t" + str(cluster_config)) - -# 1) sbatch default arguments -sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS)) -write_debug_log( "1\t" + str(sbatch_options)) - -# 2) cluster_config defaults -sbatch_options.update(cluster_config["__default__"]) -write_debug_log( "2\t" + str(sbatch_options)) - -# 3) Convert resources (no unit conversion!) and threads -sbatch_options.update( - slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING) -) -write_debug_log( "3\t" + str(sbatch_options)) - -# 4) cluster_config for particular rule or group -if job_properties["type"] == "single": - sbatch_options.update(cluster_config.get(job_properties.get("rule"), {})) -elif job_properties["type"] == "group": - sbatch_options.update(cluster_config.get(job_properties.get("groupid"), {})) -else: - print("Error: slurm-submit.py doesn't support job type {} yet!".format(job_properties["type"])) - sys.exit(1) -write_debug_log( "4\t" + str(sbatch_options)) - -# 5) cluster_config options -sbatch_options.update(job_properties.get("cluster", {})) -write_debug_log( "5\t" + str(sbatch_options)) - -# 6) Advanced conversion of parameters -if ADVANCED_ARGUMENT_CONVERSION: - sbatch_options = slurm_utils.advanced_argument_conversion(sbatch_options) -write_debug_log( "6\t" + str(sbatch_options)) - -# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv -# Additional features that we want and need. -# Inspiration from: https://github.com/bnprks/snakemake-slurm-profile - -def file_escape(string): - return string.replace("/", "_").replace(" ", "_") - -# Prepare job name for log script -if job_properties["type"] == "single": - extra_params["job_name"] = "snakejob." + job_properties["rule"] - extra_params["log_dir"] = os.path.join(workingdir, "slurm-logs", job_properties["rule"]) -elif job_properties["type"] == "group": - extra_params["job_name"] = "snakejob." + job_properties["groupid"] - extra_params["log_dir"] = os.path.join(workingdir, "slurm-logs", job_properties["groupid"]) -else: - print("Error: slurm-submit.py doesn't support job type {} yet!".format(job_properties["type"])) - sys.exit(1) -if "wildcards" in job_properties and len(job_properties["wildcards"]) > 0: - extra_params["job_name"] += "." + ".".join([key + "=" + file_escape(value) for key,value in job_properties["wildcards"].items()]) -os.makedirs(extra_params["log_dir"], exist_ok=True) - -# Set job name and out and err slurm log files -sbatch_options["job-name"] = extra_params["job_name"] -sbatch_options["output"] = "{log_dir}/{job_name}.%j.out".format(**extra_params) -sbatch_options["error"] = "{log_dir}/{job_name}.%j.err".format(**extra_params) -write_debug_log( "S\t" + str(sbatch_options) + "\n") - -# Write out the submission string. We do not want to rewire too much of this script as of now, -# so instead we do something ugly and duplicate the code from slurm_utils.submit_job() to re-create -# the submission string here. Can beautify in the future. -with open( os.path.join(extra_params["log_base"], "slurm-submissions.log"), "a") as slurmlog: - now = datetime.datetime.now() - opt = ["--"+str(k)+"="+str(v) for k, v in sbatch_options.items()] - cmd = ["sbatch"] + opt + [jobscript] - slurmlog.write(now.strftime("%Y-%m-%d %H:%M:%S") + "\t" + extra_params["job_name"] + "\t" + ' '.join(cmd) + "\n") - -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -#7) Format pattern in snakemake style -sbatch_options = slurm_utils.format_values(sbatch_options, job_properties) - -# ensure sbatch output dirs exist -for o in ("output", "error"): - slurm_utils.ensure_dirs_exist(sbatch_options[o]) if o in sbatch_options else None - -# submit job and echo id back to Snakemake (must be the only stdout) -print(slurm_utils.submit_job(jobscript, **sbatch_options)) diff --git a/profiles/slurm/slurm_utils.py b/profiles/slurm/slurm_utils.py deleted file mode 100644 index 7e31a07..0000000 --- a/profiles/slurm/slurm_utils.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env python3 -import os -import re -import math -import argparse -import subprocess - -from snakemake import io -from snakemake.io import Wildcards -from snakemake.utils import SequenceFormatter, AlwaysQuotedFormatter, QuotedFormatter -from snakemake.exceptions import WorkflowError - -def parse_jobscript(): - """Minimal CLI to require/only accept single positional argument.""" - p = argparse.ArgumentParser(description="SLURM snakemake submit script") - p.add_argument("jobscript", help="Snakemake jobscript with job properties.") - return p.parse_args().jobscript - - -def parse_sbatch_defaults(parsed): - """Unpack SBATCH_DEFAULTS.""" - d = parsed.split() if type(parsed) == str else parsed - args = {k.strip().strip("-"): v.strip() for k, v in [a.split("=") for a in d]} - return args - - -def load_cluster_config(path): - """Load config to dict either from absolute path or relative to profile dir.""" - if path: - path = os.path.join(os.path.dirname(__file__), os.path.expandvars(path)) - dcc = io.load_configfile(path) - else: - dcc = {} - if "__default__" not in dcc: - dcc["__default__"] = {} - return dcc - -# adapted from format function in snakemake.utils -def format(_pattern, _quote_all=False, **kwargs): - """Format a pattern in Snakemake style. - This means that keywords embedded in braces are replaced by any variable - values that are available in the current namespace. - """ - fmt = SequenceFormatter(separator=" ") - if _quote_all: - fmt.element_formatter = AlwaysQuotedFormatter() - else: - fmt.element_formatter = QuotedFormatter() - try: - return fmt.format(_pattern, **kwargs) - except KeyError as ex: - raise NameError( - "The name {} is unknown in this context. Please " - "make sure that you defined that variable. " - "Also note that braces not used for variable access " - "have to be escaped by repeating them " - ) - -# adapted from Job.format_wildcards in snakemake.jobs -def format_wildcards(string, job_properties): - """ Format a string with variables from the job. """ - - class Job(object): - def __init__(self, job_properties): - for key in job_properties: - setattr(self, key, job_properties[key]) - job = Job(job_properties) - if "params" in job_properties: - job._format_params = Wildcards(fromdict=job_properties['params']) - else: - job._format_params = Wildcards() - if "wildcards" in job_properties: - job._format_wildcards = Wildcards(fromdict=job_properties['wildcards']) - else: - job._format_wildcards = Wildcards() - if not hasattr(job, 'rule'): - job.rule = Wildcards() - _variables = dict() - _variables.update( - dict( - params=job._format_params, - wildcards=job._format_wildcards, - rule=job.rule - ) - ) - try: - return format(string, **_variables) - except NameError as ex: - raise WorkflowError( - "NameError with group job {}: {}".format(job.jobid, str(ex)) - ) - except IndexError as ex: - raise WorkflowError( - "IndexError with group job {}: {}".format(job.jobid, str(ex)) - ) - -# adapted from ClusterExecutor.cluster_params function in snakemake.executor -def format_values(dictionary, job_properties): - formatted = dictionary.copy() - for key, value in list(formatted.items()): - if isinstance(value, str): - try: - formatted[key] = format_wildcards(value, job_properties) - except NameError as e: - msg = ( - "Failed to format cluster config " - "entry for job {}.".format(job_properties['rule']) - ) - raise WorkflowError(msg, e) - return formatted - -def convert_job_properties(job_properties, resource_mapping={}): - options = {} - resources = job_properties.get("resources", {}) - for k, v in resource_mapping.items(): - options.update({k: resources[i] for i in v if i in resources}) - - if "threads" in job_properties: - options["cpus-per-task"] = job_properties["threads"] - return options - - -def ensure_dirs_exist(path): - """Ensure output folder for Slurm log files exist.""" - di = os.path.dirname(path) - if di == "": - return - if not os.path.exists(di): - os.makedirs(di, exist_ok=True) - return - - -def submit_job(jobscript, **sbatch_options): - """Submit jobscript and return jobid.""" - optsbatch_options = [f"--{k}={v}" for k, v in sbatch_options.items()] - try: - res = subprocess.check_output(["sbatch"] + optsbatch_options + [jobscript]) - except subprocess.CalledProcessError as e: - raise e - # Get jobid - res = res.decode() - try: - jobid = re.search(r"Submitted batch job (\d+)", res).group(1) - except Exception as e: - raise e - return jobid - - -def advanced_argument_conversion(arg_dict): - """Experimental adjustment of sbatch arguments to the given or default partition. - """ - adjusted_args = {} - - partition = arg_dict.get("partition", None) or _get_default_partition() - constraint = arg_dict.get("constraint", None) - ncpus = int(arg_dict.get("cpus-per-task", 1)) - nodes = int(arg_dict.get("nodes", 1)) - mem = arg_dict.get("mem", None) - # Determine partition with features. If no constraints have been set, - # select the partition with lowest memory - try: - config = _get_cluster_configuration(partition) - mem_feat = _get_features_and_memory(partition) - MEMORY_PER_PARTITION = _get_available_memory(mem_feat, constraint) - MEMORY_PER_CPU = MEMORY_PER_PARTITION / int(config["cpus"]) - except Exception as e: - print(e) - raise e - - # Adjust memory in the single-node case only; getting the - # functionality right for multi-node multi-cpu jobs requires more - # development - if "nodes" not in arg_dict or nodes == 1: - if mem: - adjusted_args["mem"] = min(int(mem), MEMORY_PER_PARTITION) - AVAILABLE_MEM = ncpus * MEMORY_PER_CPU - if adjusted_args["mem"] > AVAILABLE_MEM: - adjusted_args["cpus-per-task"] = int( - math.ceil(int(mem) / MEMORY_PER_CPU) - ) - adjusted_args["cpus-per-task"] = min(int(config["cpus"]), ncpus) - else: - if nodes == 1: - # Allocate at least as many tasks as requested nodes - adjusted_args["cpus-per-task"] = nodes - # Update time. If requested time is larger than maximum allowed time, reset - try: - if "time" in arg_dict: - adjusted_args["time"] = min(int(config["time"]), int(arg_dict["time"])) - except Exception as e: - print(e) - raise e - # update and return - arg_dict.update(adjusted_args) - return arg_dict - - -def _get_default_partition(): - """Retrieve default partition for cluster""" - res = subprocess.check_output(["sinfo", "-O", "partition"]) - m = re.search(r"(?P\S+)\*", res.decode(), re.M) - partition = m.group("partition") - return partition - - -def _get_cluster_configuration(partition): - """Retrieve cluster configuration for a partition.""" - # Retrieve partition info; we tacitly assume we only get one response - cmd = " ".join( - [ - 'sinfo -e -O "partition,cpus,memory,time,size,maxcpuspernode"', - "-h -p {}".format(partition), - ] - ) - res = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE) - m = re.search( - r"(?P\S+)\s+(?P\d+)\s+(?P\S+)\s+((?P\d+)-)?(?P\d+):(?P\d+):(?P\d+)\s+(?P\S+)\s+(?P\S+)", - res.stdout.decode(), - ) - d = m.groupdict() - if not "days" in d or not d["days"]: - d["days"] = 0 - d["time"] = ( - int(d["days"]) * 24 * 60 - + int(d["hours"]) * 60 - + int(d["minutes"]) - + math.ceil(int(d["seconds"]) / 60) - ) - return d - - -def _get_features_and_memory(partition): - """Retrieve features and memory for a partition in the cluster - configuration. """ - cmd = " ".join(['sinfo -e -O "memory,features_act"', "-h -p {}".format(partition)]) - res = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE) - mem_feat = [] - for x in res.stdout.decode().split("\n"): - if not re.search(r"^\d+", x): - continue - m = re.search(r"^(?P\d+)\s+(?P\S+)", x) - mem_feat.append( - {"mem": m.groupdict()["mem"], "features": m.groupdict()["feat"].split(",")} - ) - return mem_feat - - -def _get_available_memory(mem_feat, constraints=None): - """Get available memory - - If constraints are given, parse constraint string into array of - constraints and compare them to active features. Currently only - handles comma-separated strings and not the more advanced - constructs described in the slurm manual. - - Else, the minimum memory for a given partition is returned. - - """ - if constraints is None: - return min([int(x["mem"]) for x in mem_feat]) - try: - constraint_set = set(constraints.split(",")) - for x in mem_feat: - if constraint_set.intersection(x["features"]) == constraint_set: - return int(x["mem"]) - except Exception as e: - print(e) - raise diff --git a/workflow/envs/grenepipe.yaml b/workflow/envs/grenepipe.yaml index 3a86300..25894f1 100644 --- a/workflow/envs/grenepipe.yaml +++ b/workflow/envs/grenepipe.yaml @@ -15,6 +15,7 @@ dependencies: # Snakemake - snakemake ==8.15.2 - snakemake-wrapper-utils ==0.6.2 + - snakemake-executor-plugin-slurm # Python - python ==3.12 #==3.7.10 diff --git a/workflow/profiles/README.md b/workflow/profiles/README.md new file mode 100644 index 0000000..8cdfa22 --- /dev/null +++ b/workflow/profiles/README.md @@ -0,0 +1,6 @@ +Overview +============ + +Profiles that might come in handy when running the pipeline in a cluster setting. The profile in `slurm` also contains a basic slurm configuration for some of the rule time and memory requirements that have worked for us for variant calling on normal-sized fastq inputs. + +See the [Cluster and Profiles](https://github.com/lczech/grenepipe/wiki/Cluster-and-Profiles) wiki page for details on how those can be used with grenepipe. We also highly recommend to get familiar with the general Snakemake [Profiles])(https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles) as well as the Snakemake [SLURM Plugin](https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html) if you want to run grenepipe on a cluster. diff --git a/profiles/local/config.yaml b/workflow/profiles/default/config.yaml similarity index 100% rename from profiles/local/config.yaml rename to workflow/profiles/default/config.yaml diff --git a/profiles/slurm/cluster_config.yaml b/workflow/profiles/slurm/cluster_config.yaml similarity index 84% rename from profiles/slurm/cluster_config.yaml rename to workflow/profiles/slurm/cluster_config.yaml index db64c93..773b3d4 100644 --- a/profiles/slurm/cluster_config.yaml +++ b/workflow/profiles/slurm/cluster_config.yaml @@ -19,3 +19,10 @@ map_reads: call_variants: time: 1-0 cpus-per-task: 4 + +combine_calls: + time: 1-0 + +gatk_calls_combine: + time: 1-0 + cpus-per-task: 8 diff --git a/profiles/memex/config.yaml b/workflow/profiles/slurm/config.yaml similarity index 75% rename from profiles/memex/config.yaml rename to workflow/profiles/slurm/config.yaml index 56067f3..5d009dd 100644 --- a/profiles/memex/config.yaml +++ b/workflow/profiles/slurm/config.yaml @@ -1,8 +1,5 @@ # Default settings of the cookiecutter setup restart-times: 3 -jobscript: "slurm-jobscript.sh" -cluster: "slurm-submit.py" -cluster-status: "slurm-status.py" max-jobs-per-second: 1 max-status-checks-per-second: 10 local-cores: 1