Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.0.0 #148

Merged
merged 33 commits into from
Oct 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b933bd4
add description/troubleshooting for aws backend (about limit for numb…
leepc12 Jul 2, 2021
899b6b7
Merge pull request #137 from ENCODE-DCC/hotfix_update_doc_aws_increas…
leepc12 Aug 6, 2021
e23a5bb
bump ver
leepc12 Aug 6, 2021
9fc82d7
upgrade cromwell 59->65
leepc12 Aug 6, 2021
005aaf5
use modified cromwell for aws backend only (to fix localization error…
leepc12 Aug 6, 2021
db14353
use modified cromwell in the init script
leepc12 Aug 6, 2021
08922f1
upgrade circleci ubuntu image 1604->2004
leepc12 Aug 6, 2021
1399c24
doc: java requirement 8 -> 11
leepc12 Aug 6, 2021
32f66e5
ci: fix singularity installation
leepc12 Aug 6, 2021
86719a1
ci: singularity key server
leepc12 Aug 6, 2021
6a0da57
revert ubuntu upgrade and install java11 on ubuntu16
leepc12 Aug 6, 2021
4c9554d
ci: install openjdk11
leepc12 Aug 6, 2021
9e8eceb
ci: remove old java
leepc12 Aug 6, 2021
1723cfa
ci: java 11
leepc12 Aug 6, 2021
27f2118
ci: set default java as 11
leepc12 Aug 6, 2021
a71f559
Merge pull request #139 from ENCODE-DCC/modified_cromwell_for_aws_bac…
leepc12 Aug 16, 2021
8b876a6
change key name for docker, singularity, conda (added) in WDL meta
leepc12 Oct 6, 2021
a015fc6
refactor for conda, singularity, HPCs(slurm,sge,pbs,lsf), added lsf s…
leepc12 Oct 21, 2021
f621d12
fix singularity test
leepc12 Oct 22, 2021
44eac0a
fix lsf errore
leepc12 Oct 22, 2021
eff2457
fix broken test (workflow opts error due to lsf)
leepc12 Oct 22, 2021
368fd76
fix slurm path remap problem
leepc12 Oct 22, 2021
85ea37f
fix bash nullglob issue
leepc12 Oct 22, 2021
8ec46a9
fix sed issue in slurm backend
leepc12 Oct 22, 2021
dd51f11
fix conda and singularity path remap (inside docker->outside)
leepc12 Oct 22, 2021
3a9fa11
fix hpc errors (typo)
leepc12 Oct 22, 2021
c1d2d37
update readme
leepc12 Oct 22, 2021
4df1285
update readme
leepc12 Oct 22, 2021
4e8a8d7
update doc
leepc12 Oct 22, 2021
6d15fa4
bump ver to 2.0.0
leepc12 Oct 22, 2021
d2ec26b
Merge pull request #147 from ENCODE-DCC/PIP-1626_docker_and_conda_in_…
leepc12 Oct 25, 2021
8eb80a4
fix typo
leepc12 Oct 25, 2021
a1a05a6
replace dra with default_runtime_attributes to make it clearer
leepc12 Oct 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ update_apt: &update_apt
sudo apt-get update


install_python3: &install_python3_java
name: Install python3, pip3, java
install_python3: &install_python3
name: Install python3, pip3
command: |
sudo apt-get install software-properties-common git wget curl default-jre -y
sudo apt-get install software-properties-common git wget curl -y
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt-get update && sudo apt-get install python3.6 -y
sudo wget --no-check-certificate https://bootstrap.pypa.io/get-pip.py
Expand Down Expand Up @@ -61,17 +61,26 @@ install_aws_lib: &install_aws_lib
sudo pip3 install boto3 awscli


install_java: &install_java
name: Install openjdk-11
command: |
sudo add-apt-repository ppa:openjdk-r/ppa -y
sudo apt-get update && sudo apt-get install openjdk-11-jdk -y
# automatically set 11 as default java
sudo update-java-alternatives -a

jobs:
pytest:
<<: *machine_defaults
steps:
- checkout
- run: *update_apt
- run: *install_python3_java
- run: *install_python3
- run: *install_singularity
- run: *install_py3_packages
- run: *install_gcs_lib
- run: *install_aws_lib
- run: *install_java
- run:
no_output_timeout: 60m
command: |
Expand Down
384 changes: 380 additions & 4 deletions DETAILS.md

Large diffs are not rendered by default.

564 changes: 59 additions & 505 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion caper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .caper_runner import CaperRunner

__all__ = ['CaperClient', 'CaperClientSubmit', 'CaperRunner']
__version__ = '1.6.3'
__version__ = '2.0.0'
63 changes: 37 additions & 26 deletions caper/caper_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
LOCAL_HASH_STRAT_FILE,
LOCAL_HASH_STRAT_PATH,
LOCAL_HASH_STRAT_PATH_MTIME,
CromwellBackendAWS,
CromwellBackendAws,
CromwellBackendBase,
CromwellBackendCommon,
CromwellBackendDatabase,
CromwellBackendGCP,
CromwellBackendGcp,
CromwellBackendLocal,
CromwellBackendSlurm,
)
from .cromwell_rest_api import CromwellRestAPI
from .resource_analysis import ResourceAnalysis
from .server_heartbeat import ServerHeartbeat
from .singularity import Singularity

DEFAULT_CAPER_CONF = '~/.caper/default.conf'
DEFAULT_LIST_FORMAT = 'id,status,name,str_label,user,parent,submission'
Expand Down Expand Up @@ -115,7 +115,7 @@ def get_parser_and_defaults(conf_file=None):
'localized input JSON files due to deepcopying (recursive localization). '
'Cromwell\'s MySQL/PostgreSQL DB password can be exposed on backend.conf '
'on this directory. Therefore, DO NOT USE /tmp HERE. This directory is '
'also used for storing cached files for local/slurm/sge/pbs backends.',
'also used for storing cached files for local/slurm/sge/pbs/lsf backends.',
)
group_loc.add_argument(
'--gcp-loc-dir',
Expand Down Expand Up @@ -278,7 +278,7 @@ def get_parser_and_defaults(conf_file=None):
'does not allow hard-linking. e.g. beeGFS. '
'This flag does not work with backends based on a Docker container. '
'i.e. gcp and aws. Also, '
'it does not work with local backends (local/slurm/sge/pbs) '
'it does not work with local backends (local/slurm/sge/pbs/lsf) '
'with --. However, it works fine with --singularity.',
)
group_cromwell.add_argument(
Expand All @@ -290,7 +290,7 @@ def get_parser_and_defaults(conf_file=None):
LOCAL_HASH_STRAT_PATH_MTIME,
],
help='File hashing strategy for call caching. '
'For local backends (local/slurm/sge/pbs) only. '
'For local backends (local/slurm/sge/pbs/lsf) only. '
'file: use md5sum hash (slow), path: use path only, '
'path+modtime (default): use path + mtime.',
)
Expand All @@ -305,6 +305,14 @@ def get_parser_and_defaults(conf_file=None):
'For gcp, define --gcp-out-dir. '
'For aws, define --aws-out-dir.',
)
group_local.add_argument(
'--slurm-resource-param',
help='SLURM resource parameters to be passed to sbatch. '
'You can customize this to fit your cluster\'s configuration. '
'You can use WDL syntax in ${} notation with Cromwell\'s built-in resource '
'variables. See documentation for details. ',
default=CromwellBackendSlurm.DEFAULT_SLURM_RESOURCE_PARAM,
)

group_gc_all = parent_backend.add_argument_group(
title='GCP backend arguments for server/runner/client'
Expand All @@ -325,7 +333,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_gc.add_argument(
'--gcp-region',
default=CromwellBackendGCP.DEFAULT_REGION,
default=CromwellBackendGcp.DEFAULT_REGION,
help='GCP region for Google Cloud Life Sciences API. '
'This is used only when --use-google-cloud-life-sciences is defined.',
)
Expand All @@ -338,7 +346,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_gc.add_argument(
'--gcp-call-caching-dup-strat',
default=CromwellBackendGCP.DEFAULT_CALL_CACHING_DUP_STRAT,
default=CromwellBackendGcp.DEFAULT_CALL_CACHING_DUP_STRAT,
choices=[CALL_CACHING_DUP_STRAT_REFERENCE, CALL_CACHING_DUP_STRAT_COPY],
help='Duplication strategy for call-cached outputs for GCP backend: '
'copy: make a copy, reference: refer to old output in metadata.json.',
Expand All @@ -360,7 +368,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_aws.add_argument(
'--aws-call-caching-dup-strat',
default=CromwellBackendAWS.DEFAULT_CALL_CACHING_DUP_STRAT,
default=CromwellBackendAws.DEFAULT_CALL_CACHING_DUP_STRAT,
choices=[CALL_CACHING_DUP_STRAT_REFERENCE, CALL_CACHING_DUP_STRAT_COPY],
help='Duplication strategy for call-cached outputs for AWS backend: '
'copy: make a copy, reference: refer to old output in metadata.json.',
Expand Down Expand Up @@ -397,14 +405,6 @@ def get_parser_and_defaults(conf_file=None):
action='store_true',
help='Put a hold on a workflow when submitted to a Cromwell server.',
)
parent_submit.add_argument(
'--singularity-cachedir',
default=Singularity.DEFAULT_SINGULARITY_CACHEDIR,
help='Singularity cache directory. Equivalent to exporting an '
'environment variable SINGULARITY_CACHEDIR. '
'Define it to prevent repeatedly building a singularity image '
'for every pipeline task',
)
parent_submit.add_argument(
'--use-gsutil-for-s3',
action='store_true',
Expand Down Expand Up @@ -477,7 +477,7 @@ def get_parser_and_defaults(conf_file=None):
description='Cloud-based backends (gc and aws) will only use Docker '
'so that "--docker URI_FOR_DOCKER_IMG" must be specified '
'in the command line argument or as a comment "#CAPER '
'docker URI_FOR_DOCKER_IMG" or value for "workflow.meta.caper_docker"'
'docker URI_FOR_DOCKER_IMG" or value for "workflow.meta.default_docker"'
'in a WDL file',
)
group_dep.add_argument(
Expand All @@ -486,9 +486,9 @@ def get_parser_and_defaults(conf_file=None):
const='',
default=None,
help='URI for Docker image (e.g. ubuntu:latest). '
'This can also be used as a flag to use Docker image address '
'This can also be used as a flag to use Docker image URI '
'defined in your WDL file as a comment ("#CAPER docker") or '
'as "workflow.meta.caper_docker" in WDL.',
'as "workflow.meta.default_docker" in WDL.',
)
group_dep_local = parent_submit.add_argument_group(
title='dependency resolver for local backend',
Expand All @@ -505,21 +505,26 @@ def get_parser_and_defaults(conf_file=None):
help='URI or path for Singularity image '
'(e.g. ~/.singularity/ubuntu-latest.simg, '
'docker://ubuntu:latest, shub://vsoch/hello-world). '
'This can also be used as a flag to use Docker image address '
'This can also be used as a flag to use Singularity image URI '
'defined in your WDL file as a comment ("#CAPER singularity") or '
'as "workflow.meta.caper_singularity" in WDL.',
'as "workflow.meta.default_singularity" in WDL.',
)
group_dep_local.add_argument(
'--no-build-singularity',
action='store_true',
help='Do not build singularity image before running a workflow. ',
'--conda',
nargs='?',
const='',
default=None,
help='Default Conda environment\'s name. '
'If defined each task in WDL will be called with conda run -n ENV_NAME.'
'This can also be used as a flag to use Conda environment '
'defined in your WDL file under "workflow.meta.default_conda".',
)

group_slurm = parent_submit.add_argument_group('SLURM arguments')
group_slurm.add_argument('--slurm-partition', help='SLURM partition')
group_slurm.add_argument('--slurm-account', help='SLURM account')
group_slurm.add_argument(
'--slurm-extra-param', help='SLURM extra parameters. Must be double-quoted'
'--slurm-extra-param', help='SLURM extra parameters to be passed to sbatch. '
)

group_sge = parent_submit.add_argument_group('SGE arguments')
Expand All @@ -537,6 +542,12 @@ def get_parser_and_defaults(conf_file=None):
'--pbs-extra-param', help='PBS extra parameters. Must be double-quoted'
)

group_lsf = parent_submit.add_argument_group('LSF arguments')
group_lsf.add_argument('--lsf-queue', help='LSF queue')
group_lsf.add_argument(
'--lsf-extra-param', help='LSF extra parameters. Must be double-quoted'
)

# server
parent_server = argparse.ArgumentParser(add_help=False)
parent_server.add_argument(
Expand Down
58 changes: 44 additions & 14 deletions caper/caper_backend_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
BACKEND_AWS,
BACKEND_GCP,
BACKEND_SGE,
CromwellBackendAWS,
CromwellBackendAws,
CromwellBackendBase,
CromwellBackendCommon,
CromwellBackendDatabase,
CromwellBackendGCP,
CromwellBackendGcp,
CromwellBackendLocal,
CromwellBackendPBS,
CromwellBackendSGE,
CromwellBackendSLURM,
CromwellBackendLsf,
CromwellBackendPbs,
CromwellBackendSge,
CromwellBackendSlurm,
)
from .dict_tool import merge_dict
from .hocon_string import HOCONString
Expand Down Expand Up @@ -53,23 +54,29 @@ def __init__(
file_db=None,
gcp_prj=None,
gcp_out_dir=None,
gcp_call_caching_dup_strat=CromwellBackendGCP.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_call_caching_dup_strat=CromwellBackendGcp.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_service_account_key_json=None,
use_google_cloud_life_sciences=False,
gcp_region=CromwellBackendGCP.DEFAULT_REGION,
gcp_region=CromwellBackendGcp.DEFAULT_REGION,
aws_batch_arn=None,
aws_region=None,
aws_out_dir=None,
aws_call_caching_dup_strat=CromwellBackendAWS.DEFAULT_CALL_CACHING_DUP_STRAT,
aws_call_caching_dup_strat=CromwellBackendAws.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_zones=None,
slurm_partition=None,
slurm_account=None,
slurm_extra_param=None,
slurm_resource_param=CromwellBackendSlurm.DEFAULT_SLURM_RESOURCE_PARAM,
sge_pe=None,
sge_queue=None,
sge_extra_param=None,
sge_resource_param=CromwellBackendSge.DEFAULT_SGE_RESOURCE_PARAM,
pbs_queue=None,
pbs_extra_param=None,
pbs_resource_param=CromwellBackendPbs.DEFAULT_PBS_RESOURCE_PARAM,
lsf_queue=None,
lsf_extra_param=None,
lsf_resource_param=CromwellBackendLsf.DEFAULT_LSF_RESOURCE_PARAM,
):
"""Initializes the backend conf's stanzas.

Expand All @@ -93,7 +100,7 @@ def __init__(
max_concurrent_tasks:
Limit for concurrent number of tasks for each workflow.
soft_glob_output:
Local backends only (Local, sge, pbs, slurm).
Local backends only (Local, sge, pbs, slurm, lsf).
Glob with ln -s instead of hard-linking (ln alone).
Useful for file-system like beeGFS, which does not allow hard-linking.
local_hash_strat:
Expand Down Expand Up @@ -166,11 +173,21 @@ def __init__(
slurm_partition:
slurm_account:
slurm_extra_param:
slurm_resource_param:
For slurm backend only.
Resource parameters to be passed to sbatch.
You can use WDL syntax and Cromwell's built-in variables in ${} notation.
e.g. cpu, time, memory_mb
sge_pe:
sge_queue:
sge_extra_param:
sge_resource_param:
pbs_queue:
pbs_extra_param:
pbs_resource_param:
lsf_queue:
lsf_extra_param:
lsf_resource_param:
"""
self._template = {}

Expand Down Expand Up @@ -216,20 +233,21 @@ def __init__(

merge_dict(
self._template,
CromwellBackendSLURM(
CromwellBackendSlurm(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
local_hash_strat=local_hash_strat,
slurm_partition=slurm_partition,
slurm_account=slurm_account,
slurm_extra_param=slurm_extra_param,
slurm_resource_param=slurm_resource_param,
),
)

merge_dict(
self._template,
CromwellBackendSGE(
CromwellBackendSge(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
Expand All @@ -242,7 +260,7 @@ def __init__(

merge_dict(
self._template,
CromwellBackendPBS(
CromwellBackendPbs(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
Expand All @@ -252,6 +270,18 @@ def __init__(
),
)

merge_dict(
self._template,
CromwellBackendLsf(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
local_hash_strat=local_hash_strat,
lsf_queue=lsf_queue,
lsf_extra_param=lsf_extra_param,
),
)

# cloud backends
if gcp_prj and gcp_out_dir:
if gcp_service_account_key_json:
Expand All @@ -267,7 +297,7 @@ def __init__(

merge_dict(
self._template,
CromwellBackendGCP(
CromwellBackendGcp(
max_concurrent_tasks=max_concurrent_tasks,
gcp_prj=gcp_prj,
gcp_out_dir=gcp_out_dir,
Expand All @@ -282,7 +312,7 @@ def __init__(
if aws_batch_arn and aws_region and aws_out_dir:
merge_dict(
self._template,
CromwellBackendAWS(
CromwellBackendAws(
max_concurrent_tasks=max_concurrent_tasks,
aws_batch_arn=aws_batch_arn,
aws_region=aws_region,
Expand Down
Loading