From 992e3b18069a9591a4b0408c1aa14cc69b965c00 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 27 May 2016 17:51:27 -0400 Subject: [PATCH 01/38] Pathmapper now has list of file objects and not just file names. Job.py now stages files so that each file is in a separate directory (along with any secondary files). Building block for Directory type and filename munging. Most but not all conformance tests pass. --- cwltool/draft2tool.py | 23 ++++----- cwltool/job.py | 19 +++++-- cwltool/pathmapper.py | 105 +++++++++++++++++---------------------- cwltool/process.py | 22 +++----- tests/test_pathmapper.py | 9 ++-- 5 files changed, 85 insertions(+), 93 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index b74b04cc1..f08a54bfb 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -4,7 +4,7 @@ from .flatten import flatten from functools import partial import os -from .pathmapper import PathMapper, DockerPathMapper +from .pathmapper import PathMapper from .job import CommandLineJob import yaml import glob @@ -119,14 +119,10 @@ def __init__(self, toolpath_object, **kwargs): def makeJobRunner(self): # type: () -> CommandLineJob return CommandLineJob() - def makePathMapper(self, reffiles, **kwargs): + def makePathMapper(self, reffiles, stagedir, **kwargs): # type: (Set[str], str, **Any) -> PathMapper - dockerReq, _ = self.get_requirement("DockerRequirement") try: - if dockerReq and kwargs.get("use_container"): - return DockerPathMapper(reffiles, kwargs["basedir"]) - else: - return PathMapper(reffiles, kwargs["basedir"]) + return PathMapper(reffiles, kwargs["basedir"], stagedir) except OSError as e: if e.errno == errno.ENOENT: raise WorkflowException(u"Missing input file %s" % e) @@ -140,6 +136,7 @@ def job(self, joborder, output_callback, **kwargs): cacheargs = kwargs.copy() cacheargs["outdir"] = "/out" cacheargs["tmpdir"] = "/tmp" + cacheargs["stagedir"] = "/stage" cachebuilder = self._init_job(joborder, **cacheargs) cachebuilder.pathmapper = PathMapper(set((f["path"] for f in cachebuilder.files)), kwargs["basedir"]) @@ -200,7 +197,7 @@ def rm_pending_output_callback(output_callback, jobcachepending, builder = self._init_job(joborder, **kwargs) - reffiles = set((f["path"] for f in builder.files)) + reffiles = copy.deepcopy(builder.files) j = self.makeJobRunner() j.builder = builder @@ -225,14 +222,14 @@ def rm_pending_output_callback(output_callback, jobcachepending, if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) - reffiles.add(j.stdin) + reffiles.append({"class": "File", "path": j.stdin}) if self.tool.get("stdout"): j.stdout = builder.do_eval(self.tool["stdout"]) - if os.path.isabs(j.stdout) or ".." in j.stdout: + if os.path.isabs(j.stdout) or ".." in j.stdout or not j.stdout: raise validate.ValidationException("stdout must be a relative path") - builder.pathmapper = self.makePathMapper(reffiles, **kwargs) + builder.pathmapper = self.makePathMapper(reffiles, builder.stagedir, **kwargs) builder.requirements = j.requirements # map files to assigned path inside a container. We need to also explicitly @@ -250,15 +247,17 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] _logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4)) - dockerReq = self.get_requirement("DockerRequirement")[0] + dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix) tmpdir_prefix = kwargs.get('tmpdir_prefix') j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix) + j.stagedir = None else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir + j.stagedir = builder.stagedir createFiles = self.get_requirement("CreateFileRequirement")[0] j.generatefiles = {} diff --git a/cwltool/job.py b/cwltool/job.py index 4d44c0f48..bcd9c153c 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -79,8 +79,9 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): - if not os.path.isfile(self.pathmapper.mapper(f)[0]): - raise WorkflowException(u"Required input file %s not found or is not a regular file." % self.pathmapper.mapper(f)[0]) + p = self.pathmapper.mapper(f) + if not os.path.isfile(p[0]): + raise WorkflowException(u"Input file %s (at %s) not found or is not a regular file." % (f, self.pathmapper.mapper(f)[0])) img_id = None if docker_req and kwargs.get("use_container") is not False: @@ -136,6 +137,14 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, if key in vars_to_preserve and key not in env: env[key] = value + for f in self.pathmapper.files(): + p = self.pathmapper.mapper(f) + if not os.path.exists(os.path.dirname(p[1])): + os.makedirs(os.path.dirname(p[1]), 0755) + if not os.path.exists(p[1]): + os.symlink(p[0], p[1]) + + stdin = None # type: Union[IO[Any],int] stdout = None # type: IO[Any] @@ -150,7 +159,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, self.name, self.outdir, " \\\n ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]), - u' < %s' % (self.stdin) if self.stdin else '', + u' < %s' % self.pathmapper.mapper(self.stdin)[1] if self.stdin else '', u' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: @@ -251,6 +260,10 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, self.output_callback(outputs, processStatus) + if self.stagedir and os.path.exists(self.stagedir): + _logger.debug(u"[job %s] Removing input staging directory %s", self.name, self.stagedir) + shutil.rmtree(self.stagedir, True) + if rm_tmpdir: _logger.debug(u"[job %s] Removing temporary directory %s", self.name, self.tmpdir) shutil.rmtree(self.tmpdir, True) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 33e6f591a..08818808c 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -6,6 +6,18 @@ _logger = logging.getLogger("cwltool") +def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None + """Apply a mapping function to each File path in the object `rec`.""" + + if isinstance(rec, dict): + if rec.get("class") == "File": + rec["path"] = op(rec["path"]) + for d in rec: + adjustFiles(rec[d], op) + if isinstance(rec, list): + for d in rec: + adjustFiles(d, op) + def abspath(src, basedir): # type: (str,str) -> str if src.startswith("file://"): @@ -20,16 +32,46 @@ class PathMapper(object): """Mapping of files from relative path provided in the file to a tuple of (absolute local path, absolute container path)""" - def __init__(self, referenced_files, basedir): + def __init__(self, referenced_files, basedir, stagedir, scramble=False): # type: (Set[str], str) -> None self._pathmap = {} # type: Dict[str, Tuple[str, str]] + self.stagedir = stagedir + self.scramble = scramble self.setup(referenced_files, basedir) def setup(self, referenced_files, basedir): # type: (Set[str], str) -> None - for src in referenced_files: - ab = abspath(src, basedir) - self._pathmap[src] = (ab, ab) + + # Go through each file and set the target to its own directory along + # with any secondary files. + for fob in referenced_files: + stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) + + def visit(path): + if path in self._pathmap: + return path + ab = abspath(path, basedir) + if self.scramble: + tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) + else: + tgt = os.path.join(stagedir, os.path.basename(path)) + self._pathmap[path] = (ab, tgt) + return path + + adjustFiles(fob, visit) + + # Dereference symbolic links + for path, (ab, tgt) in self._pathmap.items(): + deref = ab + st = os.lstat(deref) + while stat.S_ISLNK(st.st_mode): + rl = os.readlink(deref) + deref = rl if os.path.isabs(rl) else os.path.join( + os.path.dirname(deref), rl) + st = os.lstat(deref) + + self._pathmap[path] = (deref, tgt) + def mapper(self, src): # type: (str) -> Tuple[str,str] if "#" in src: @@ -50,58 +92,3 @@ def reversemap(self, target): # type: (str) -> Tuple[str, str] if v[1] == target: return (k, v[0]) return None - - -class DockerPathMapper(PathMapper): - - def __init__(self, referenced_files, basedir): - # type: (Set[str], str) -> None - self.dirs = {} # type: Dict[str, Union[bool, str]] - super(DockerPathMapper, self).__init__(referenced_files, basedir) - - def setup(self, referenced_files, basedir): - for src in referenced_files: - ab = abspath(src, basedir) - dirn, fn = os.path.split(ab) - - subdir = False - for d in self.dirs: - if dirn.startswith(d): - subdir = True - break - - if not subdir: - for d in list(self.dirs): - if d.startswith(dirn): - # 'dirn' is a parent of 'd' - del self.dirs[d] - self.dirs[dirn] = True - - prefix = "job" + str(random.randint(1, 1000000000)) + "_" - - names = set() # type: Set[str] - for d in self.dirs: - name = os.path.join("/var/lib/cwl", prefix + os.path.basename(d)) - i = 1 - while name in names: - i += 1 - name = os.path.join("/var/lib/cwl", - prefix + os.path.basename(d) + str(i)) - names.add(name) - self.dirs[d] = name - - for src in referenced_files: - ab = abspath(src, basedir) - - deref = ab - st = os.lstat(deref) - while stat.S_ISLNK(st.st_mode): - rl = os.readlink(deref) - deref = rl if os.path.isabs(rl) else os.path.join( - os.path.dirname(deref), rl) - st = os.lstat(deref) - - for d in self.dirs: - if ab.startswith(d): - self._pathmap[src] = (deref, os.path.join( - self.dirs[d], ab[len(d)+1:])) diff --git a/cwltool/process.py b/cwltool/process.py index 6cb32327b..8fa40c7ba 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -19,7 +19,7 @@ import tempfile import glob from .errors import WorkflowException -from .pathmapper import abspath +from .pathmapper import abspath, adjustFiles from typing import Any, Callable, Generator, Union, IO, AnyStr, Tuple from collections import Iterable from rdflib import URIRef @@ -137,18 +137,6 @@ def checkRequirements(rec, supportedProcessRequirements): for d in rec: checkRequirements(d, supportedProcessRequirements) -def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None - """Apply a mapping function to each File path in the object `rec`.""" - - if isinstance(rec, dict): - if rec.get("class") == "File": - rec["path"] = op(rec["path"]) - for d in rec: - adjustFiles(rec[d], op) - if isinstance(rec, list): - for d in rec: - adjustFiles(d, op) - def adjustFilesWithSecondary(rec, op, primary=None): """Apply a mapping function to each File path in the object `rec`, propagating the primary file associated with a group of secondary files. @@ -318,13 +306,19 @@ def _init_job(self, joborder, **kwargs): builder.resources = {} builder.timeout = kwargs.get("eval_timeout") - dockerReq, _ = self.get_requirement("DockerRequirement") + dockerReq, is_req = self.get_requirement("DockerRequirement") + + if dockerReq and is_req and not kwargs.get("use_container"): + raise WorkflowException("Document has DockerRequirement under 'requirements' but use_container is false. DockerRequirement must be under 'hints' or use_container must be true.") + if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" + builder.stagedir = kwargs.get("docker_stagedir") or "/var/lib/cwl" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() + builder.stagedir = kwargs.get("stagedir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess(kwargs["basedir"]) diff --git a/tests/test_pathmapper.py b/tests/test_pathmapper.py index 7e4af9e5b..b54c89a3f 100644 --- a/tests/test_pathmapper.py +++ b/tests/test_pathmapper.py @@ -9,13 +9,12 @@ class TestPathMapper(unittest.TestCase): def test_subclass(self): - + class SubPathMapper(PathMapper): - def __init__(self, referenced_files, basedir, new): - super(SubPathMapper, self).__init__(referenced_files, basedir) + def __init__(self, referenced_files, basedir, stagedir, new): + super(SubPathMapper, self).__init__(referenced_files, basedir, stagedir) self.new = new - a = SubPathMapper([], '', "new") + a = SubPathMapper([], '', '', "new") self.assertTrue(a.new, "new") - From 7f979d8dc6d664048fe585eeffc1080bd644cb34 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 10 Jun 2016 17:45:14 -0400 Subject: [PATCH 02/38] Squashed 'cwltool/schemas/' changes from 3e1b02c..a2263a9 a2263a9 Make path/listing optional (sort of). de7cb76 Add listing to directory type 57a5bc8 Fix last test using "args" 3cc79d2 Port draft-3 changes to command line generation tests to draft-4. f2a18c3 Add missing args.py used by draft-3 tests. ac17f5d Port draft-2 changes to command line generation tests to draft-3. ddf417b Command line generation tests now work by executing a small script that copies the command line to the output. Allows cwl-runner implementations to drop the --conformance-test code path. c4cecad Merge pull request #228 from common-workflow-language/scoped-ref-and-typedsl d1d53b3 Merge pull request #227 from common-workflow-language/stderr 0b987d3 add test 71e9b3d add stderr object 347c28e Merge pull request #212 from common-workflow-language/cwl-runner-interface 28f64f0 clarify description d791500 add output stanza, fix syntax ee0bea3 add descriptions 04dfbf0 toolfile is optional b1e740e bind and order the tool and jobfiles 81bd8de description c7a2bf1 move to a better location 979453a initial description of cwl-runner interface git-subtree-dir: cwltool/schemas git-subtree-split: a2263a9bf013e9813e4f55a89a2c70be5c0ff7bc --- draft-2/conformance_test_draft-2.yaml | 67 +++++++--------- draft-2/draft-2/args.py | 7 ++ draft-2/draft-2/binding-test.cwl | 11 ++- draft-2/draft-2/bwa-mem-job.json | 6 +- draft-2/draft-2/bwa-mem-tool.cwl | 23 ++++-- draft-2/draft-2/cat1-testcli.cwl | 34 ++++++++ draft-2/draft-2/cat2-tool.cwl | 23 ------ draft-2/draft-2/chr20.fa | 0 .../draft-2/example_human_Illumina.pe_1.fastq | 0 .../draft-2/example_human_Illumina.pe_2.fastq | 0 draft-2/draft-2/reads.fastq | 0 draft-2/draft-2/template-tool.cwl | 9 ++- draft-2/draft-2/tmap-tool.cwl | 21 +++-- draft-3/conformance_test_draft-3.yaml | 70 +++++++---------- draft-3/cwl-runner.cwl | 78 +++++++++++++++++++ draft-3/draft-3/args.py | 7 ++ draft-3/draft-3/binding-test.cwl | 11 ++- draft-3/draft-3/bwa-mem-job.json | 6 +- draft-3/draft-3/bwa-mem-tool.cwl | 22 ++++-- draft-3/draft-3/cat1-testcli.cwl | 35 +++++++++ draft-3/draft-3/chr20.fa | 0 .../draft-3/example_human_Illumina.pe_1.fastq | 0 .../draft-3/example_human_Illumina.pe_2.fastq | 0 draft-3/draft-3/reads.fastq | 0 draft-3/draft-3/template-tool.cwl | 9 ++- draft-3/draft-3/tmap-tool.cwl | 18 ++++- draft-4/CommandLineTool.yml | 12 +++ draft-4/Process.yml | 35 ++++++--- draft-4/conformance_test_draft-4.yaml | 70 +++++++---------- draft-4/draft-4/args.py | 7 ++ draft-4/draft-4/binding-test.cwl | 15 +++- draft-4/draft-4/bwa-mem-job.json | 6 +- draft-4/draft-4/bwa-mem-tool.cwl | 36 +++++---- draft-4/draft-4/cat1-testcli.cwl | 35 +++++++++ draft-4/draft-4/chr20.fa | 0 .../draft-4/example_human_Illumina.pe_1.fastq | 0 .../draft-4/example_human_Illumina.pe_2.fastq | 0 draft-4/draft-4/reads.fastq | 0 draft-4/draft-4/template-tool.cwl | 9 ++- draft-4/draft-4/tmap-tool.cwl | 18 ++++- 40 files changed, 486 insertions(+), 214 deletions(-) create mode 100755 draft-2/draft-2/args.py create mode 100755 draft-2/draft-2/cat1-testcli.cwl delete mode 100755 draft-2/draft-2/cat2-tool.cwl create mode 100644 draft-2/draft-2/chr20.fa create mode 100644 draft-2/draft-2/example_human_Illumina.pe_1.fastq create mode 100644 draft-2/draft-2/example_human_Illumina.pe_2.fastq create mode 100644 draft-2/draft-2/reads.fastq create mode 100644 draft-3/cwl-runner.cwl create mode 100755 draft-3/draft-3/args.py create mode 100755 draft-3/draft-3/cat1-testcli.cwl create mode 100644 draft-3/draft-3/chr20.fa create mode 100644 draft-3/draft-3/example_human_Illumina.pe_1.fastq create mode 100644 draft-3/draft-3/example_human_Illumina.pe_2.fastq create mode 100644 draft-3/draft-3/reads.fastq create mode 100755 draft-4/draft-4/args.py create mode 100755 draft-4/draft-4/cat1-testcli.cwl create mode 100644 draft-4/draft-4/chr20.fa create mode 100644 draft-4/draft-4/example_human_Illumina.pe_1.fastq create mode 100644 draft-4/draft-4/example_human_Illumina.pe_2.fastq create mode 100644 draft-4/draft-4/reads.fastq diff --git a/draft-2/conformance_test_draft-2.yaml b/draft-2/conformance_test_draft-2.yaml index 1a6ca998b..a0ffecef1 100644 --- a/draft-2/conformance_test_draft-2.yaml +++ b/draft-2/conformance_test_draft-2.yaml @@ -1,63 +1,52 @@ -- args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', draft-2/rabix/tests/test-files/chr20.fa, - draft-2/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, draft-2/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] +- args: job: draft-2/bwa-mem-job.json - stdout: output.sam tool: draft-2/bwa-mem-tool.cwl + output: + args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', + chr20.fa, + example_human_Illumina.pe_1.fastq, + example_human_Illumina.pe_2.fastq] doc: General test of command line generation -- args: [bwa, mem, draft-2/rabix/tests/test-files/chr20.fa, - "-XXX", - "-YYY", draft-2/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, - "-YYY", draft-2/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] +- output: + args: [bwa, mem, chr20.fa, + "-XXX", + "-YYY", example_human_Illumina.pe_1.fastq, + "-YYY", example_human_Illumina.pe_2.fastq] job: draft-2/bwa-mem-job.json tool: draft-2/binding-test.cwl doc: Test nested prefixes with arrays -- args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, +- output: + args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, '20', stage2, map1, --max-seq-length, '20', --min-seq-length, '10', --seed-length, '16', map2, --max-seed-hits, '-1', --max-seq-length, '20', --min-seq-length, '10'] job: draft-2/tmap-job.json - stdin: draft-2/reads.fastq - stdout: output.sam tool: draft-2/tmap-tool.cwl - doc: Test nested command line bindings and stdin/stdout redirection + doc: Test nested command line bindings -- args: [cat, draft-2/hello.txt] +- output: + args: [cat, hello.txt] job: draft-2/cat-job.json - tool: draft-2/cat1-tool.cwl + tool: draft-2/cat1-testcli.cwl doc: Test command line with optional input (missing) -- args: [cat, -n, draft-2/hello.txt] +- output: + args: [cat, -n, hello.txt] job: draft-2/cat-n-job.json - tool: draft-2/cat1-tool.cwl + tool: draft-2/cat1-testcli.cwl doc: Test command line with optional input (provided) -- args: [cat] - job: draft-2/cat-job.json - stdin: draft-2/hello.txt - tool: draft-2/cat2-tool.cwl - doc: Test command line with stdin redirection - -- args: [cat, draft-2/hello.txt] - job: draft-2/cat-job.json - stdout: output.txt - tool: draft-2/cat3-tool.cwl - doc: Test command line with stdout redirection - -- args: [cat] - job: draft-2/cat-job.json - stdin: draft-2/hello.txt - stdout: output.txt - tool: draft-2/cat4-tool.cwl - doc: Test command line with stdin and stdout redirection - -- args: [cat, foo.txt] - createfiles: {foo.txt: 'The file is draft-2/hello.txt - - '} +- output: + "foo": { + "checksum": "sha1$63da67422622fbf9251a046d7a34b7ea0fd4fead", + "class": "File", + "path": "foo.txt", + "size": 22 + } job: draft-2/cat-job.json tool: draft-2/template-tool.cwl - doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig features + doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig feature - job: draft-2/cat-job.json output: diff --git a/draft-2/draft-2/args.py b/draft-2/draft-2/args.py new file mode 100755 index 000000000..1baf6a090 --- /dev/null +++ b/draft-2/draft-2/args.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +import sys +import json +import os +args = [os.path.basename(a) for a in sys.argv[1:]] +with open("cwl.output.json", "w") as f: + json.dump({"args": args}, f) diff --git a/draft-2/draft-2/binding-test.cwl b/draft-2/draft-2/binding-test.cwl index 9f3914e98..cd3857861 100755 --- a/draft-2/draft-2/binding-test.cwl +++ b/draft-2/draft-2/binding-test.cwl @@ -14,6 +14,15 @@ inputs: inputBinding: { prefix: "-YYY" } inputBinding: { position: 3, prefix: "-XXX" } + - id: "#args.py" + type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + outputs: [] -baseCommand: ["bwa", "mem"] +baseCommand: python +arguments: ["bwa", "mem"] diff --git a/draft-2/draft-2/bwa-mem-job.json b/draft-2/draft-2/bwa-mem-job.json index 331cc256b..299f75b31 100644 --- a/draft-2/draft-2/bwa-mem-job.json +++ b/draft-2/draft-2/bwa-mem-job.json @@ -1,18 +1,18 @@ { "reference": { "class": "File", - "path": "rabix/tests/test-files/chr20.fa", + "path": "chr20.fa", "size": 123, "checksum": "sha1$hash" }, "reads": [ { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_1.fastq" + "path": "example_human_Illumina.pe_1.fastq" }, { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_2.fastq" + "path": "example_human_Illumina.pe_2.fastq" } ], "min_std_max_min": [ diff --git a/draft-2/draft-2/bwa-mem-tool.cwl b/draft-2/draft-2/bwa-mem-tool.cwl index c6cdba102..c875ed2eb 100755 --- a/draft-2/draft-2/bwa-mem-tool.cwl +++ b/draft-2/draft-2/bwa-mem-tool.cwl @@ -4,11 +4,6 @@ class: CommandLineTool requirements: - import: node-engine.cwl -hints: - - class: DockerRequirement - dockerPull: images.sbgenomics.com/rabix/bwa - dockerImageId: 9d3b9b0359cf - inputs: - id: "#reference" type: File @@ -31,14 +26,28 @@ inputs: prefix: "-I" itemSeparator: "," + - id: "#args.py" + type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + outputs: - id: "#sam" - type: "File" + type: ["null", "File"] outputBinding: { "glob": "output.sam" } + - id: "#args" + type: + type: array + items: string -baseCommand: ["bwa", "mem"] +baseCommand: python arguments: + - "bwa" + - "mem" - valueFrom: engine: "node-engine.cwl" script: "$job.allocatedResources.cpu" diff --git a/draft-2/draft-2/cat1-testcli.cwl b/draft-2/draft-2/cat1-testcli.cwl new file mode 100755 index 000000000..410833193 --- /dev/null +++ b/draft-2/draft-2/cat1-testcli.cwl @@ -0,0 +1,34 @@ +#!/usr/bin/env cwl-runner +{ + "class": "CommandLineTool", + "description": "Print the contents of a file to stdout using 'cat' running in a docker container.", + "inputs": [ + { + "id": "#file1", + "type": "File", + "inputBinding": {"position": 1} + }, + { + "id": "#numbering", + "type": ["null", "boolean"], + "inputBinding": { + "position": 0, + "prefix": "-n" + } + }, + { + id: "#args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 + } + } + ], + "outputs": [], + "baseCommand": "python", + "arguments": ["cat"] +} diff --git a/draft-2/draft-2/cat2-tool.cwl b/draft-2/draft-2/cat2-tool.cwl deleted file mode 100755 index a7c3f44b3..000000000 --- a/draft-2/draft-2/cat2-tool.cwl +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env cwl-runner -{ - "class": "CommandLineTool", - "description": "Print the contents of a file to stdout using 'cat' running in a docker container.", - "hints": [ - { - "class": "DockerRequirement", - "dockerPull": "debian:wheezy" - } - ], - "inputs": [ - { - "id": "#file1", - "type": "File" - } - ], - "outputs": [], - "baseCommand": "cat", - "stdin": { - "engine": "cwl:JsonPointer", - "script": "job/file1/path" - } -} diff --git a/draft-2/draft-2/chr20.fa b/draft-2/draft-2/chr20.fa new file mode 100644 index 000000000..e69de29bb diff --git a/draft-2/draft-2/example_human_Illumina.pe_1.fastq b/draft-2/draft-2/example_human_Illumina.pe_1.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-2/draft-2/example_human_Illumina.pe_2.fastq b/draft-2/draft-2/example_human_Illumina.pe_2.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-2/draft-2/reads.fastq b/draft-2/draft-2/reads.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-2/draft-2/template-tool.cwl b/draft-2/draft-2/template-tool.cwl index 50dc7e563..2543600ae 100755 --- a/draft-2/draft-2/template-tool.cwl +++ b/draft-2/draft-2/template-tool.cwl @@ -17,9 +17,14 @@ requirements: - filename: foo.txt fileContent: engine: "#js" - script: 't("The file is <%= $job.file1.path %>\n")' + script: > + t("The file is <%= $job.file1.path.split('/').slice(-1)[0] %>\n") inputs: - id: "#file1" type: File -outputs: [] +outputs: + - id: "#foo" + type: File + outputBinding: + glob: foo.txt baseCommand: ["cat", "foo.txt"] diff --git a/draft-2/draft-2/tmap-tool.cwl b/draft-2/draft-2/tmap-tool.cwl index a9c8ac62e..3f9ee02c5 100755 --- a/draft-2/draft-2/tmap-tool.cwl +++ b/draft-2/draft-2/tmap-tool.cwl @@ -16,7 +16,18 @@ "type": "array", "items": "Stage" } + }, + { + id: "#args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 } + } ], "outputs": [ { @@ -24,7 +35,7 @@ "outputBinding": { "glob": "output.sam" }, - "type": "File" + "type": ["null", "File"] } ], "requirements": [ @@ -233,10 +244,6 @@ ] } ]}], - "baseCommand": ["tmap", "mapall"], - "stdin": { - "engine": "cwl:JsonPointer", - "script": "job/reads/path" - }, - "stdout": "output.sam" + "baseCommand": "python", + "arguments": ["tmap", "mapall"] } diff --git a/draft-3/conformance_test_draft-3.yaml b/draft-3/conformance_test_draft-3.yaml index 724853d6c..f187afc52 100644 --- a/draft-3/conformance_test_draft-3.yaml +++ b/draft-3/conformance_test_draft-3.yaml @@ -1,63 +1,51 @@ -- args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', draft-3/rabix/tests/test-files/chr20.fa, - draft-3/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, draft-3/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] - job: draft-3/bwa-mem-job.json - stdout: output.sam +- job: draft-3/bwa-mem-job.json tool: draft-3/bwa-mem-tool.cwl + output: + args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', + chr20.fa, + example_human_Illumina.pe_1.fastq, + example_human_Illumina.pe_2.fastq] doc: General test of command line generation -- args: [bwa, mem, draft-3/rabix/tests/test-files/chr20.fa, - "-XXX", - "-YYY", draft-3/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, - "-YYY", draft-3/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] +- output: + args: [bwa, mem, chr20.fa, + "-XXX", + "-YYY", example_human_Illumina.pe_1.fastq, + "-YYY", example_human_Illumina.pe_2.fastq] job: draft-3/bwa-mem-job.json tool: draft-3/binding-test.cwl doc: Test nested prefixes with arrays -- args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, +- output: + args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, '20', stage2, map1, --max-seq-length, '20', --min-seq-length, '10', --seed-length, '16', map2, --max-seed-hits, '-1', --max-seq-length, '20', --min-seq-length, '10'] job: draft-3/tmap-job.json - stdin: draft-3/reads.fastq - stdout: output.sam tool: draft-3/tmap-tool.cwl - doc: Test nested command line bindings and stdin/stdout redirection + doc: Test nested command line bindings -- args: [cat, draft-3/hello.txt] +- output: + args: [cat, hello.txt] job: draft-3/cat-job.json - tool: draft-3/cat1-tool.cwl + tool: draft-3/cat1-testcli.cwl doc: Test command line with optional input (missing) -- args: [cat, -n, draft-3/hello.txt] +- output: + args: [cat, -n, hello.txt] job: draft-3/cat-n-job.json - tool: draft-3/cat1-tool.cwl + tool: draft-3/cat1-testcli.cwl doc: Test command line with optional input (provided) -- args: [cat] - job: draft-3/cat-job.json - stdin: draft-3/hello.txt - tool: draft-3/cat2-tool.cwl - doc: Test command line with stdin redirection - -- args: [cat, draft-3/hello.txt] - job: draft-3/cat-job.json - stdout: output.txt - tool: draft-3/cat3-tool.cwl - doc: Test command line with stdout redirection - -- args: [cat] - job: draft-3/cat-job.json - stdin: draft-3/hello.txt - stdout: output.txt - tool: draft-3/cat4-tool.cwl - doc: Test command line with stdin and stdout redirection - -- args: [cat, foo.txt] - createfiles: {foo.txt: 'The file is draft-3/hello.txt - - '} +- output: + "foo": { + "checksum": "sha1$63da67422622fbf9251a046d7a34b7ea0fd4fead", + "class": "File", + "path": "foo.txt", + "size": 22 + } job: draft-3/cat-job.json tool: draft-3/template-tool.cwl - doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig features + doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig feature - job: draft-3/cat-job.json output: @@ -504,7 +492,7 @@ doc: | Test parameter evaluation, with support for JS expressions -- args: [cat, draft-3/hello.txt] +- output: {} job: draft-3/cat-job.json tool: draft-3/metadata.cwl doc: Test metadata diff --git a/draft-3/cwl-runner.cwl b/draft-3/cwl-runner.cwl new file mode 100644 index 000000000..7304f9633 --- /dev/null +++ b/draft-3/cwl-runner.cwl @@ -0,0 +1,78 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: "cwl:draft-3" + +description: | + Generic interface to run a Common Workflow Language tool or workflow from the + command line. To be implemented by each CWL compliant execution platform for + testing conformance to the standard and optionally for use by users. + +inputs: + - id: outdir + type: string + default: outdir + description: | + Output directory, defaults to the current directory + inputBinding: + prefix: "--outdir" + + - id: quiet + type: boolean + description: no diagnostic output + inputBinding: + prefix: "--quiet" + + - id: toolfile + type: [ "null", File ] + description: | + The tool or workflow description to run. Optional if the jobfile has a + `cwl:tool` field to indicate the tool or workflow description to run. + inputBinding: + position: 1 + + - id: jobfile + type: File + inputBinding: + position: 2 + + - id: conformance-test + type: boolean + inputBinding: + prefix: "--conformance-test" + + - id: basedir + type: string + inputBinding: + prefix: "--basedir" + + - id: no-container + type: boolean + description: | + Do not execute jobs in a Docker container, even when listed as a Requirement + inputBinding: + prefix: "--no-container" + + - id: tmp-outdir-prefix + type: string + description: | + Path prefix for temporary directories. Useful for OS X so that boot2docker + writes to /Users + inputBinding: + prefix: "--tmp-outdir-prefix" + + - id: tmpdir-prefix + type: string + description: | + Path prefix for temporary directories + inputBinding: + prefix: "--tmpdir-prefix" + +baseCommand: cwl-runner + +stdout: output-object.json + +outputs: + - id: output-object + type: File + outputBinding: + glob: output-object.json diff --git a/draft-3/draft-3/args.py b/draft-3/draft-3/args.py new file mode 100755 index 000000000..1baf6a090 --- /dev/null +++ b/draft-3/draft-3/args.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +import sys +import json +import os +args = [os.path.basename(a) for a in sys.argv[1:]] +with open("cwl.output.json", "w") as f: + json.dump({"args": args}, f) diff --git a/draft-3/draft-3/binding-test.cwl b/draft-3/draft-3/binding-test.cwl index 2905c511c..50271f53f 100755 --- a/draft-3/draft-3/binding-test.cwl +++ b/draft-3/draft-3/binding-test.cwl @@ -15,6 +15,15 @@ inputs: inputBinding: { prefix: "-YYY" } inputBinding: { position: 3, prefix: "-XXX" } + - id: "#args.py" + type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + outputs: [] -baseCommand: [bwa, mem] +baseCommand: python +arguments: ["bwa", "mem"] diff --git a/draft-3/draft-3/bwa-mem-job.json b/draft-3/draft-3/bwa-mem-job.json index b6b290140..48d7d6cfe 100644 --- a/draft-3/draft-3/bwa-mem-job.json +++ b/draft-3/draft-3/bwa-mem-job.json @@ -1,18 +1,18 @@ { "reference": { "class": "File", - "path": "rabix/tests/test-files/chr20.fa", + "path": "chr20.fa", "size": 123, "checksum": "sha1$hash" }, "reads": [ { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_1.fastq" + "path": "example_human_Illumina.pe_1.fastq" }, { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_2.fastq" + "path": "example_human_Illumina.pe_2.fastq" } ], "min_std_max_min": [ diff --git a/draft-3/draft-3/bwa-mem-tool.cwl b/draft-3/draft-3/bwa-mem-tool.cwl index 7144a4bdd..985b15316 100755 --- a/draft-3/draft-3/bwa-mem-tool.cwl +++ b/draft-3/draft-3/bwa-mem-tool.cwl @@ -5,10 +5,6 @@ cwlVersion: cwl:draft-3 class: CommandLineTool hints: - - class: DockerRequirement - dockerPull: images.sbgenomics.com/rabix/bwa - dockerImageId: 9d3b9b0359cf - - class: ResourceRequirement coresMin: 4 @@ -34,14 +30,28 @@ inputs: prefix: -I itemSeparator: "," + - id: args.py + type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + outputs: - id: sam - type: File + type: ["null", File] outputBinding: { glob: output.sam } + - id: args + type: + type: array + items: string -baseCommand: [bwa, mem] +baseCommand: python arguments: + - bwa + - mem - valueFrom: $(runtime.cores) position: 1 prefix: -t diff --git a/draft-3/draft-3/cat1-testcli.cwl b/draft-3/draft-3/cat1-testcli.cwl new file mode 100755 index 000000000..31f3a94d6 --- /dev/null +++ b/draft-3/draft-3/cat1-testcli.cwl @@ -0,0 +1,35 @@ +#!/usr/bin/env cwl-runner +{ + "class": "CommandLineTool", + "cwlVersion": "cwl:draft-3", + "description": "Print the contents of a file to stdout using 'cat' running in a docker container.", + "inputs": [ + { + "id": "file1", + "type": "File", + "inputBinding": {"position": 1} + }, + { + "id": "numbering", + "type": ["null", "boolean"], + "inputBinding": { + "position": 0, + "prefix": "-n" + } + }, + { + id: "args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 + } + } + ], + "outputs": [], + "baseCommand": "python", + "arguments": ["cat"] +} diff --git a/draft-3/draft-3/chr20.fa b/draft-3/draft-3/chr20.fa new file mode 100644 index 000000000..e69de29bb diff --git a/draft-3/draft-3/example_human_Illumina.pe_1.fastq b/draft-3/draft-3/example_human_Illumina.pe_1.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-3/draft-3/example_human_Illumina.pe_2.fastq b/draft-3/draft-3/example_human_Illumina.pe_2.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-3/draft-3/reads.fastq b/draft-3/draft-3/reads.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-3/draft-3/template-tool.cwl b/draft-3/draft-3/template-tool.cwl index 744227868..9af5f0e01 100755 --- a/draft-3/draft-3/template-tool.cwl +++ b/draft-3/draft-3/template-tool.cwl @@ -11,9 +11,14 @@ requirements: - class: CreateFileRequirement fileDef: - filename: foo.txt - fileContent: $(t("The file is <%= inputs.file1.path %>\n")) + fileContent: > + $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) inputs: - id: file1 type: File -outputs: [] +outputs: + - id: foo + type: File + outputBinding: + glob: foo.txt baseCommand: [cat, foo.txt] diff --git a/draft-3/draft-3/tmap-tool.cwl b/draft-3/draft-3/tmap-tool.cwl index c25a887d5..514816a9f 100755 --- a/draft-3/draft-3/tmap-tool.cwl +++ b/draft-3/draft-3/tmap-tool.cwl @@ -18,7 +18,18 @@ "type": "array", "items": "#Stage" } + }, + { + id: "#args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 } + } ], "outputs": [ { @@ -26,7 +37,7 @@ "outputBinding": { "glob": "output.sam" }, - "type": "File" + "type": ["null", "File"] } ], "requirements": [ @@ -235,7 +246,6 @@ ] } ]}], - "baseCommand": ["tmap", "mapall"], - "stdin": "$(inputs.reads.path)", - "stdout": "output.sam" + "baseCommand": "python", + "arguments": ["tmap", "mapall"] } diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 81329a840..01defa317 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -434,6 +434,18 @@ $graph: doc: | A path to a file whose contents must be piped into the command's standard input stream. + - name: stderr + type: ["null", string, "#Expression"] + doc: | + Capture the command's standard error stream to a file written to + the designated output directory. + + If `stderr` is a string, it specifies the file name to use. + + If `stderr` is an expression, the expression is evaluated and must + return a string with the file name to use to capture stderr. If the + return value is not a string, or the resulting path contains illegal + characters (such as the path separator `/`) it is an error. - name: stdout type: ["null", string, "#Expression"] doc: | diff --git a/draft-4/Process.yml b/draft-4/Process.yml index d50eaa289..d7ee85e6b 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -39,9 +39,11 @@ $graph: extends: "sld:PrimitiveType" symbols: - cwl:File + - cwl:Directory doc: - "Extends primitive types with the concept of a file as a first class type." - "File: A File object" + - "Directory: A Directory object" - name: File type: record @@ -111,12 +113,23 @@ $graph: runtime may perform exact file format matches. +- name: Dirent + type: record + fields: + - name: basename + type: string + jsonldPredicate: + "_id": cwl:name + - name: entry + type: [File, Directory] + + - name: Directory type: record docParent: "#CWLType" doc: | Represents a directory to present to a command line tool. This could be a virtual - directory, made of files assembled from a number of concrete directories. + directory, made of files assembled from multiple locations. fields: - name: class type: @@ -125,18 +138,22 @@ $graph: symbols: - cwl:Directory jsonldPredicate: - "_id": "@type" - "_type": "@vocab" + _id: "@type" + _type: "@vocab" doc: Must be `Directory` to indicate this object describes a Directory. - name: path - type: string + type: string? doc: The path to the directory. jsonldPredicate: - "_id": "cwl:path" - "_type": "@id" - # - name: size - # type: ["null", long] - # doc: Optional directory size. + _id: "cwl:path" + _type: "@id" + - name: listing + type: Dirent[]? + doc: List of files or subdirectories contained in this directory + jsonldPredicate: + _id: "cwl:listing" + mapSubject: basename + mapPredicate: entry - name: SchemaBase diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index e0d4043d2..cf5a294e8 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -1,63 +1,51 @@ -- args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', draft-4/rabix/tests/test-files/chr20.fa, - draft-4/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, draft-4/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] - job: draft-4/bwa-mem-job.json - stdout: output.sam +- job: draft-4/bwa-mem-job.json tool: draft-4/bwa-mem-tool.cwl + output: + args: [bwa, mem, -t, '4', -I, '1,2,3,4', -m, '3', + chr20.fa, + example_human_Illumina.pe_1.fastq, + example_human_Illumina.pe_2.fastq] doc: General test of command line generation -- args: [bwa, mem, draft-4/rabix/tests/test-files/chr20.fa, - "-XXX", - "-YYY", draft-4/rabix/tests/test-files/example_human_Illumina.pe_1.fastq, - "-YYY", draft-4/rabix/tests/test-files/example_human_Illumina.pe_2.fastq] +- output: + args: [bwa, mem, chr20.fa, + "-XXX", + "-YYY", example_human_Illumina.pe_1.fastq, + "-YYY", example_human_Illumina.pe_2.fastq] job: draft-4/bwa-mem-job.json tool: draft-4/binding-test.cwl doc: Test nested prefixes with arrays -- args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, +- output: + args: [tmap, mapall, stage1, map1, --min-seq-length, '20', map2, --min-seq-length, '20', stage2, map1, --max-seq-length, '20', --min-seq-length, '10', --seed-length, '16', map2, --max-seed-hits, '-1', --max-seq-length, '20', --min-seq-length, '10'] job: draft-4/tmap-job.json - stdin: draft-4/reads.fastq - stdout: output.sam tool: draft-4/tmap-tool.cwl - doc: Test nested command line bindings and stdin/stdout redirection + doc: Test nested command line bindings -- args: [cat, draft-4/hello.txt] +- output: + args: [cat, hello.txt] job: draft-4/cat-job.json - tool: draft-4/cat1-tool.cwl + tool: draft-4/cat1-testcli.cwl doc: Test command line with optional input (missing) -- args: [cat, -n, draft-4/hello.txt] +- output: + args: [cat, -n, hello.txt] job: draft-4/cat-n-job.json - tool: draft-4/cat1-tool.cwl + tool: draft-4/cat1-testcli.cwl doc: Test command line with optional input (provided) -- args: [cat] - job: draft-4/cat-job.json - stdin: draft-4/hello.txt - tool: draft-4/cat2-tool.cwl - doc: Test command line with stdin redirection - -- args: [cat, draft-4/hello.txt] - job: draft-4/cat-job.json - stdout: output.txt - tool: draft-4/cat3-tool.cwl - doc: Test command line with stdout redirection - -- args: [cat] - job: draft-4/cat-job.json - stdin: draft-4/hello.txt - stdout: output.txt - tool: draft-4/cat4-tool.cwl - doc: Test command line with stdin and stdout redirection - -- args: [cat, foo.txt] - createfiles: {foo.txt: 'The file is draft-4/hello.txt - - '} +- output: + "foo": { + "checksum": "sha1$63da67422622fbf9251a046d7a34b7ea0fd4fead", + "class": "File", + "path": "foo.txt", + "size": 22 + } job: draft-4/cat-job.json tool: draft-4/template-tool.cwl - doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig features + doc: Test CreateFileRequirement ExpressionEngineRequirement.engineConfig feature - job: draft-4/cat-job.json output: @@ -504,7 +492,7 @@ doc: | Test parameter evaluation, with support for JS expressions -- args: [cat, draft-4/hello.txt] +- output: {} job: draft-4/cat-job.json tool: draft-4/metadata.cwl doc: Test metadata diff --git a/draft-4/draft-4/args.py b/draft-4/draft-4/args.py new file mode 100755 index 000000000..1baf6a090 --- /dev/null +++ b/draft-4/draft-4/args.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +import sys +import json +import os +args = [os.path.basename(a) for a in sys.argv[1:]] +with open("cwl.output.json", "w") as f: + json.dump({"args": args}, f) diff --git a/draft-4/draft-4/binding-test.cwl b/draft-4/draft-4/binding-test.cwl index 085145df9..7e6c3c826 100755 --- a/draft-4/draft-4/binding-test.cwl +++ b/draft-4/draft-4/binding-test.cwl @@ -4,17 +4,26 @@ class: CommandLineTool cwlVersion: cwl:draft-4.dev2 inputs: - reference: + - id: reference type: File inputBinding: { position: 2 } - reads: + - id: reads type: type: array items: File inputBinding: { prefix: "-YYY" } inputBinding: { position: 3, prefix: "-XXX" } + - id: "#args.py" + type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + outputs: [] -baseCommand: [bwa, mem] +baseCommand: python +arguments: ["bwa", "mem"] diff --git a/draft-4/draft-4/bwa-mem-job.json b/draft-4/draft-4/bwa-mem-job.json index b6b290140..48d7d6cfe 100644 --- a/draft-4/draft-4/bwa-mem-job.json +++ b/draft-4/draft-4/bwa-mem-job.json @@ -1,18 +1,18 @@ { "reference": { "class": "File", - "path": "rabix/tests/test-files/chr20.fa", + "path": "chr20.fa", "size": 123, "checksum": "sha1$hash" }, "reads": [ { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_1.fastq" + "path": "example_human_Illumina.pe_1.fastq" }, { "class": "File", - "path": "rabix/tests/test-files/example_human_Illumina.pe_2.fastq" + "path": "example_human_Illumina.pe_2.fastq" } ], "min_std_max_min": [ diff --git a/draft-4/draft-4/bwa-mem-tool.cwl b/draft-4/draft-4/bwa-mem-tool.cwl index 830d071c8..b0722effa 100755 --- a/draft-4/draft-4/bwa-mem-tool.cwl +++ b/draft-4/draft-4/bwa-mem-tool.cwl @@ -1,47 +1,57 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: draft-4.dev2 class: CommandLineTool hints: - DockerRequirement: - dockerPull: images.sbgenomics.com/rabix/bwa - dockerImageId: 9d3b9b0359cf - - ResourceRequirement: + - class: ResourceRequirement coresMin: 4 inputs: - reference: + - id: reference type: File inputBinding: { position: 2 } - reads: + - id: reads type: type: array items: File inputBinding: { position: 3 } - minimum_seed_length: + - id: minimum_seed_length type: int inputBinding: { position: 1, prefix: -m } - min_std_max_min: + - id: min_std_max_min type: { type: array, items: int } inputBinding: position: 1 prefix: -I itemSeparator: "," -outputs: - sam: + - id: args.py type: File + default: + class: File + path: args.py + inputBinding: + position: -1 + +outputs: + - id: sam + type: ["null", File] outputBinding: { glob: output.sam } + - id: args + type: + type: array + items: string -baseCommand: [bwa, mem] +baseCommand: python arguments: + - bwa + - mem - valueFrom: $(runtime.cores) position: 1 prefix: -t diff --git a/draft-4/draft-4/cat1-testcli.cwl b/draft-4/draft-4/cat1-testcli.cwl new file mode 100755 index 000000000..3bad2b662 --- /dev/null +++ b/draft-4/draft-4/cat1-testcli.cwl @@ -0,0 +1,35 @@ +#!/usr/bin/env cwl-runner +{ + "class": "CommandLineTool", + "cwlVersion": "cwl:draft-4.dev2", + "description": "Print the contents of a file to stdout using 'cat' running in a docker container.", + "inputs": [ + { + "id": "file1", + "type": "File", + "inputBinding": {"position": 1} + }, + { + "id": "numbering", + "type": ["null", "boolean"], + "inputBinding": { + "position": 0, + "prefix": "-n" + } + }, + { + id: "args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 + } + } + ], + "outputs": [], + "baseCommand": "python", + "arguments": ["cat"] +} diff --git a/draft-4/draft-4/chr20.fa b/draft-4/draft-4/chr20.fa new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/example_human_Illumina.pe_1.fastq b/draft-4/draft-4/example_human_Illumina.pe_1.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/example_human_Illumina.pe_2.fastq b/draft-4/draft-4/example_human_Illumina.pe_2.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/reads.fastq b/draft-4/draft-4/reads.fastq new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index ac4014e54..7e3521382 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -11,9 +11,14 @@ requirements: - class: CreateFileRequirement fileDef: - filename: foo.txt - fileContent: $(t("The file is <%= inputs.file1.path %>\n")) + fileContent: > + $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) inputs: - id: file1 type: File -outputs: [] +outputs: + - id: foo + type: File + outputBinding: + glob: foo.txt baseCommand: [cat, foo.txt] diff --git a/draft-4/draft-4/tmap-tool.cwl b/draft-4/draft-4/tmap-tool.cwl index f33b01f5c..8f8aca16a 100755 --- a/draft-4/draft-4/tmap-tool.cwl +++ b/draft-4/draft-4/tmap-tool.cwl @@ -18,7 +18,18 @@ "type": "array", "items": "#Stage" } + }, + { + id: "#args.py", + type: File, + default: { + class: File, + path: args.py + }, + inputBinding: { + position: -1 } + } ], "outputs": [ { @@ -26,7 +37,7 @@ "outputBinding": { "glob": "output.sam" }, - "type": "File" + "type": ["null", "File"] } ], "requirements": [ @@ -235,7 +246,6 @@ ] } ]}], - "baseCommand": ["tmap", "mapall"], - "stdin": "$(inputs.reads.path)", - "stdout": "output.sam" + "baseCommand": "python", + "arguments": ["tmap", "mapall"] } From b6fd4570148f4c4533c7a9b6dd5bc7b3ea98d36b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 14 Jun 2016 08:18:26 -0400 Subject: [PATCH 03/38] Input directory staging works. --- cwltool/builder.py | 16 +++++++++++ cwltool/draft2tool.py | 40 +++++++++++++++----------- cwltool/job.py | 4 +-- cwltool/main.py | 67 +++++++++++++++++++++++++++++++++++-------- cwltool/pathmapper.py | 37 +++++++++++++++++------- cwltool/process.py | 16 +++++++++-- 6 files changed, 138 insertions(+), 42 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index 909464a7d..c950e39e4 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -29,6 +29,18 @@ def adjustFileObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None for d in rec: adjustFileObjs(d, op) +def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None + """Apply an update function to each Directory object in the object `rec`.""" + + if isinstance(rec, dict): + if rec.get("class") == "Directory": + op(rec) + for d in rec: + adjustDirObjs(rec[d], op) + if isinstance(rec, list): + for d in rec: + adjustDirObjs(d, op) + class Builder(object): def __init__(self): # type: () -> None @@ -140,6 +152,10 @@ def _capture_files(f): adjustFileObjs(datum.get("secondaryFiles", []), _capture_files) + if schema["type"] == "Directory": + self.files.append(datum) + + # Position to front of the sort key if binding: for bi in bindings: diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 08e1f7748..76c25362d 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -1,30 +1,33 @@ -import avro.schema +import shutil +from functools import partial import json import copy -from .flatten import flatten -from functools import partial import os -from .pathmapper import PathMapper -from .job import CommandLineJob -import yaml import glob import logging import hashlib import random -from .process import Process, shortname, uniquename, adjustFileObjs -from .errors import WorkflowException -import schema_salad.validate as validate -from .utils import aslist -from . import expression import re import urlparse import tempfile -from .builder import CONTENT_LIMIT, substitute, Builder -import shellescape import errno + +import avro.schema +import yaml +import schema_salad.validate as validate +import shellescape from typing import Callable, Any, Union, Generator, cast -import hashlib -import shutil + +from .process import Process, shortname, uniquename +from .errors import WorkflowException +from .utils import aslist +from . import expression +from .builder import CONTENT_LIMIT, substitute, Builder, adjustFileObjs, adjustDirObjs +from .pathmapper import PathMapper +from .job import CommandLineJob + + +from .flatten import flatten _logger = logging.getLogger("cwltool") @@ -243,7 +246,10 @@ def rm_pending_output_callback(output_callback, jobcachepending, # walk over input as implicit reassignment doesn't reach everything in builder.bindings def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] if not f.get("containerfs"): - f["path"] = builder.pathmapper.mapper(f["path"])[1] + if f["class"] == "Directory": + f["path"] = builder.pathmapper.mapper(f["id"])[1] + else: + f["path"] = builder.pathmapper.mapper(f["path"])[1] f["containerfs"] = True return f @@ -251,6 +257,8 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] adjustFileObjs(builder.files, _check_adjust) adjustFileObjs(builder.bindings, _check_adjust) + adjustDirObjs(builder.files, _check_adjust) + adjustDirObjs(builder.bindings, _check_adjust) _logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4)) diff --git a/cwltool/job.py b/cwltool/job.py index a80648231..7db5a6ec2 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -81,7 +81,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, for f in self.pathmapper.files(): p = self.pathmapper.mapper(f) - if not os.path.isfile(p[0]): + if not p[0].startswith("_dir:") and not os.path.isfile(p[0]): raise WorkflowException(u"Input file %s (at %s) not found or is not a regular file." % (f, self.pathmapper.mapper(f)[0])) img_id = None @@ -142,7 +142,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, p = self.pathmapper.mapper(f) if not os.path.exists(os.path.dirname(p[1])): os.makedirs(os.path.dirname(p[1]), 0755) - if not os.path.exists(p[1]): + if not p[0].startswith("_dir:") and not os.path.exists(p[1]): os.symlink(p[0], p[1]) diff --git a/cwltool/main.py b/cwltool/main.py index fc0e814d0..471f7403f 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -1,30 +1,35 @@ #!/usr/bin/env python -from . import draft2tool import argparse -from schema_salad.ref_resolver import Loader import string import json import os import sys import logging import copy +import tempfile +import urlparse +import hashlib +import pkg_resources # part of setuptools +import random + +import yaml +import rdflib +from typing import Union, Any, cast, Callable, Dict, Tuple, IO + +from schema_salad.ref_resolver import Loader +import schema_salad.validate as validate +import schema_salad.jsonld_context +import schema_salad.makedoc + from . import workflow from .errors import WorkflowException, UnsupportedRequirement from . import process from .cwlrdf import printrdf, printdot from .process import shortname, Process from .load_tool import fetch_document, validate_document, make_tool -import schema_salad.validate as validate -import tempfile -import schema_salad.jsonld_context -import schema_salad.makedoc -import yaml -import urlparse -import pkg_resources # part of setuptools -import rdflib -import hashlib -from typing import Union, Any, cast, Callable, Dict, Tuple, IO +from . import draft2tool +from .builder import adjustDirObjs _logger = logging.getLogger("cwltool") @@ -222,6 +227,19 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, {"class": "File", "path": values}) +class DirectoryAction(argparse.Action): + + def __init__(self, option_strings, dest, nargs=None, **kwargs): + # type: (List[str], str, Any, **Any) -> None + if nargs is not None: + raise ValueError("nargs not allowed") + super(DirectoryAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + # type: (argparse.ArgumentParser, argparse.Namespace, str, Any) -> None + setattr(namespace, self.dest, {"class": "Directory", "path": values}) + + class FileAppendAction(argparse.Action): def __init__(self, option_strings, dest, nargs=None, **kwargs): @@ -272,6 +290,8 @@ def generate_parser(toolparser, tool, namemap): if inptype == "File": action = cast(argparse.Action, FileAction) + elif inptype == "Directory": + action = cast(argparse.Action, DirectoryAction) elif isinstance(inptype, dict) and inptype["type"] == "array": if inptype["items"] == "File": action = cast(argparse.Action, FileAppendAction) @@ -385,6 +405,29 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, basedir=u"file://%s/" % input_basedir) return 0 + def getListing(rec): + if "listing" not in rec: + listing = [] + path = rec["path"][7:] if rec["path"].startswith("file://") else rec["path"] + for ld in os.listdir(path): + abspath = os.path.join(path, ld) + if os.path.isdir(abspath): + ent = {"class": "Directory", + "path": abspath, + "id": "_dir:%i" % random.randint(1, 1000000000)} + getListing(ent) + listing.append({"basename": os.path.basename(ld), + "entry": ent}) + else: + listing.append({"basename": os.path.basename(ld), + "entry": {"class": "File", "path": abspath}}) + rec["listing"] = listing + if "path" in rec: + del rec["path"] + rec["id"] = "_dir:%i" % random.randint(1, 1000000000) + + adjustDirObjs(job_order_object, getListing) + if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] if "id" in job_order_object: diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 700239124..b8cb8ffdf 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -47,21 +47,38 @@ def setup(self, referenced_files, basedir): for fob in referenced_files: stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) - def visit(path): - if path in self._pathmap: + if fob["class"] == "Directory": + def visit(obj, base): + self._pathmap[obj["id"]] = (obj["id"], base) + for ld in obj["listing"]: + tgt = os.path.join(base, ld["basename"]) + if ld["entry"]["class"] == "Directory": + visit(ld["entry"], tgt) + ab = ld["entry"]["id"] + self._pathmap[ab] = (ab, tgt) + else: + ab = ld["entry"]["path"] + self._pathmap[ab] = (ab, tgt) + + visit(fob, stagedir) + else: + def visit(path): + if path in self._pathmap: + return path + ab = abspath(path, basedir) + if self.scramble: + tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) + else: + tgt = os.path.join(stagedir, os.path.basename(path)) + self._pathmap[path] = (ab, tgt) return path - ab = abspath(path, basedir) - if self.scramble: - tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) - else: - tgt = os.path.join(stagedir, os.path.basename(path)) - self._pathmap[path] = (ab, tgt) - return path - adjustFiles(fob, visit) + adjustFiles(fob, visit) # Dereference symbolic links for path, (ab, tgt) in self._pathmap.items(): + if ab.startswith("_dir:"): + continue deref = ab st = os.lstat(deref) while stat.S_ISLNK(st.st_mode): diff --git a/cwltool/process.py b/cwltool/process.py index 73711123a..80b385911 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -76,6 +76,8 @@ SCHEMA_CACHE = {} # type: Dict[str, Tuple[Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode, Any], Loader]] SCHEMA_FILE = None # type: Dict[unicode, Any] +SCHEMA_DIR = None # type: Dict[unicode, Any] +SCHEMA_DIRENT = None # type: Dict[unicode, Any] SCHEMA_ANY = None # type: Dict[unicode, Any] def get_schema(version): @@ -108,9 +110,13 @@ def get_schema(version): SCHEMA_CACHE[version] = schema_salad.schema.load_schema( "https://w3id.org/cwl/CommonWorkflowLanguage.yml", cache=cache) - global SCHEMA_FILE, SCHEMA_ANY # pylint: disable=global-statement + global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY # pylint: disable=global-statement SCHEMA_FILE = cast(Dict[unicode, Any], SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#File"]) + SCHEMA_DIR = cast(Dict[unicode, Any], + SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Directory"]) + SCHEMA_DIRENT = cast(Dict[unicode, Any], + SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Dirent"]) SCHEMA_ANY = cast(Dict[unicode, Any], SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/salad#Any"]) @@ -219,7 +225,7 @@ def __init__(self, toolpath_object, **kwargs): self.metadata = kwargs.get("metadata", {}) # type: Dict[str,Any] self.names = None # type: avro.schema.Names names = schema_salad.schema.make_avro_schema( - [SCHEMA_FILE, SCHEMA_ANY], schema_salad.ref_resolver.Loader({}))[0] + [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY], schema_salad.ref_resolver.Loader({}))[0] if isinstance(names, avro.schema.SchemaParseException): raise names else: @@ -349,6 +355,12 @@ def _init_job(self, joborder, **kwargs): a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) + elif ("$(" in a) or ("${" in a): + builder.bindings.append({ + "position": [0, i], + "do_eval": a, + "valueFrom": None + }) else: builder.bindings.append({ "position": [0, i], From aa0b3c82ab97f0e297693b98d8d108ec55308f29 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 14 Jun 2016 08:23:53 -0400 Subject: [PATCH 04/38] Fix staging for Docker as well --- cwltool/job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index 7db5a6ec2..4964ab800 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -95,8 +95,9 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): - vol = self.pathmapper.mapper(src) - runtime.append(u"--volume=%s:%s:ro" % vol) + if not src.startswith("_dir:"): + vol = self.pathmapper.mapper(src) + runtime.append(u"--volume=%s:%s:ro" % vol) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) runtime.append(u"--workdir=%s" % ("/var/spool/cwl")) From 5110116ed50339cbd7660200a3f246119c54d07a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 14 Jun 2016 09:24:59 -0400 Subject: [PATCH 05/38] Move getListing to process module. Start looking at capturing output to directory objects. --- cwltool/draft2tool.py | 8 +++++- cwltool/main.py | 23 +---------------- cwltool/process.py | 56 +++++++++++++++++++++++++++++++----------- cwltool/stdfsaccess.py | 6 +++++ 4 files changed, 55 insertions(+), 38 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 76c25362d..2bb3cdb2b 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -358,11 +358,17 @@ def collect_output(self, schema, builder, outdir): raise WorkflowException("glob patterns must not start with '/'") try: r.extend([{"path": g, "class": "File", "hostfs": True} - for g in builder.fs_access.glob(os.path.join(outdir, gb))]) + for g in builder.fs_access.glob(os.path.join(outdir, gb)) + if builder.fs_access.isfile(g)]) + r.extend([{"path": g, "class": "Directory", "hostfs": True} + for g in builder.fs_access.glob(os.path.join(outdir, gb)) + if builder.fs_access.isdir(g)]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: + if files["class"] == "Directory": + continue checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) diff --git a/cwltool/main.py b/cwltool/main.py index 471f7403f..9a188d50c 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -26,7 +26,7 @@ from .errors import WorkflowException, UnsupportedRequirement from . import process from .cwlrdf import printrdf, printdot -from .process import shortname, Process +from .process import shortname, Process, getListing from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool from .builder import adjustDirObjs @@ -405,27 +405,6 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, basedir=u"file://%s/" % input_basedir) return 0 - def getListing(rec): - if "listing" not in rec: - listing = [] - path = rec["path"][7:] if rec["path"].startswith("file://") else rec["path"] - for ld in os.listdir(path): - abspath = os.path.join(path, ld) - if os.path.isdir(abspath): - ent = {"class": "Directory", - "path": abspath, - "id": "_dir:%i" % random.randint(1, 1000000000)} - getListing(ent) - listing.append({"basename": os.path.basename(ld), - "entry": ent}) - else: - listing.append({"basename": os.path.basename(ld), - "entry": {"class": "File", "path": abspath}}) - rec["listing"] = listing - if "path" in rec: - del rec["path"] - rec["id"] = "_dir:%i" % random.randint(1, 1000000000) - adjustDirObjs(job_order_object, getListing) if "cwl:tool" in job_order_object: diff --git a/cwltool/process.py b/cwltool/process.py index 80b385911..bcc2d2ecf 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -1,33 +1,37 @@ -import abc -import avro.schema + import os import json -import schema_salad.validate as validate import copy import yaml import copy import logging import pprint -from .utils import aslist, get_feature -import schema_salad.schema -from schema_salad.ref_resolver import Loader -import urlparse -import pprint -from pkg_resources import resource_stream import stat -from .builder import Builder, adjustFileObjs import tempfile import glob -from .errors import WorkflowException, UnsupportedRequirement -from .pathmapper import abspath, adjustFiles +import urlparse +import pprint +from collections import Iterable +import errno +import random + +import abc +import schema_salad.validate as validate +import schema_salad.schema +from schema_salad.ref_resolver import Loader +import avro.schema from typing import (Any, AnyStr, Callable, cast, Dict, List, Generator, IO, Tuple, Union) -from collections import Iterable from rdflib import URIRef from rdflib.namespace import RDFS, OWL -from .stdfsaccess import StdFsAccess -import errno from rdflib import Graph +from pkg_resources import resource_stream + +from .utils import aslist, get_feature +from .stdfsaccess import StdFsAccess +from .builder import Builder, adjustFileObjs +from .errors import WorkflowException, UnsupportedRequirement +from .pathmapper import abspath, adjustFiles _logger = logging.getLogger("cwltool") @@ -160,6 +164,28 @@ def adjustFilesWithSecondary(rec, op, primary=None): for d in rec: adjustFilesWithSecondary(d, op, primary) +def getListing(rec): + if "listing" not in rec: + listing = [] + path = rec["path"][7:] if rec["path"].startswith("file://") else rec["path"] + for ld in os.listdir(path): + abspath = os.path.join(path, ld) + if os.path.isdir(abspath): + ent = {"class": "Directory", + "path": abspath, + "id": "_dir:%i" % random.randint(1, 1000000000)} + getListing(ent) + listing.append({"basename": os.path.basename(ld), + "entry": ent}) + else: + listing.append({"basename": os.path.basename(ld), + "entry": {"class": "File", "path": abspath}}) + rec["listing"] = listing + if "path" in rec: + del rec["path"] + rec["id"] = "_dir:%i" % random.randint(1, 1000000000) + + def formatSubclassOf(fmt, cls, ontology, visited): # type: (str, str, Graph, Set[str]) -> bool """Determine if `fmt` is a subclass of `cls`.""" diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index 618d35ac0..1d7493888 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -20,3 +20,9 @@ def open(self, fn, mode): # type: (unicode, str) -> BinaryIO def exists(self, fn): # type: (unicode) -> bool return os.path.exists(self._abs(fn)) + + def isfile(self, fn): + return os.path.isfile(fn) + + def isdir(self, fn): + return os.path.isdir(fn) From 42d1d4384e35baaf48cb852efb1ae387c275944e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 15 Jun 2016 08:41:44 -0400 Subject: [PATCH 06/38] Refactor path mapping/file staging for both setting up inputs and moving outputs. --- cwltool/draft2tool.py | 75 +++++++++++++++++------------------------- cwltool/job.py | 10 ++---- cwltool/main.py | 73 +++++++++++++++++++--------------------- cwltool/pathmapper.py | 7 ++-- cwltool/process.py | 62 ++++++++++++++++++++++++++++++---- cwltool/stdfsaccess.py | 7 ++-- cwltool/workflow.py | 43 +----------------------- 7 files changed, 133 insertions(+), 144 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 2bb3cdb2b..bd0603491 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -18,7 +18,7 @@ import shellescape from typing import Callable, Any, Union, Generator, cast -from .process import Process, shortname, uniquename +from .process import Process, shortname, uniquename, getListing from .errors import WorkflowException from .utils import aslist from . import expression @@ -354,66 +354,51 @@ def collect_output(self, schema, builder, outdir): for gb in globpatterns: if gb.startswith(outdir): gb = gb[len(outdir)+1:] + elif gb == ".": + gb = outdir elif gb.startswith("/"): raise WorkflowException("glob patterns must not start with '/'") try: - r.extend([{"path": g, "class": "File", "hostfs": True} - for g in builder.fs_access.glob(os.path.join(outdir, gb)) - if builder.fs_access.isfile(g)]) - r.extend([{"path": g, "class": "Directory", "hostfs": True} - for g in builder.fs_access.glob(os.path.join(outdir, gb)) - if builder.fs_access.isdir(g)]) + r.extend([{"path": g, + "class": "File" if builder.fs_access.isfile(g) else "Directory", + "hostfs": True} + for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: if files["class"] == "Directory": - continue - checksum = hashlib.sha1() - with builder.fs_access.open(files["path"], "rb") as f: - contents = f.read(CONTENT_LIMIT) - if binding.get("loadContents"): - files["contents"] = contents - filesize = 0 - while contents != "": - checksum.update(contents) - filesize += len(contents) - contents = f.read(1024*1024) - files["checksum"] = "sha1$%s" % checksum.hexdigest() - files["size"] = filesize - if "format" in schema: - files["format"] = builder.do_eval(schema["format"], context=files) + getListing(builder.fs_access, files) + else: + checksum = hashlib.sha1() + with builder.fs_access.open(files["path"], "rb") as f: + contents = f.read(CONTENT_LIMIT) + if binding.get("loadContents"): + files["contents"] = contents + filesize = 0 + while contents != "": + checksum.update(contents) + filesize += len(contents) + contents = f.read(1024*1024) + files["checksum"] = "sha1$%s" % checksum.hexdigest() + files["size"] = filesize + if "format" in schema: + files["format"] = builder.do_eval(schema["format"], context=files) optional = False - singlefile = False + single = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True - if "File" in schema["type"]: - singlefile = True - elif schema["type"] == "File": - singlefile = True + if "File" in schema["type"] or "Directory" in schema["type"]: + single = True + elif schema["type"] == "File" or schema["type"] == "Directory": + single = True if "outputEval" in binding: - eout = builder.do_eval(binding["outputEval"], context=r) - if singlefile: - # Handle single file outputs not wrapped in a list - if eout is not None and not isinstance(eout, (list, tuple)): - r = [eout] - elif optional and eout is None: - pass - elif (eout is None or len(eout) != 1 or - not isinstance(eout[0], dict) - or "path" not in eout[0]): - raise WorkflowException( - u"Expression must return a file object for %s." - % schema["id"]) - else: - r = [eout] - else: - r = eout + r = builder.do_eval(binding["outputEval"], context=r) - if singlefile: + if single: if not r and not optional: raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns)) elif not r and optional: diff --git a/cwltool/job.py b/cwltool/job.py index 4964ab800..a5db4fc30 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -9,7 +9,7 @@ import sys import requests from . import docker -from .process import get_feature, empty_subtree +from .process import get_feature, empty_subtree, stageFiles from .errors import WorkflowException import shutil import stat @@ -139,13 +139,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, if key in vars_to_preserve and key not in env: env[key] = value - for f in self.pathmapper.files(): - p = self.pathmapper.mapper(f) - if not os.path.exists(os.path.dirname(p[1])): - os.makedirs(os.path.dirname(p[1]), 0755) - if not p[0].startswith("_dir:") and not os.path.exists(p[1]): - os.symlink(p[0], p[1]) - + stageFiles(self.pathmapper, os.symlink) stdin = None # type: Union[IO[Any],int] stderr = None # type: IO[Any] diff --git a/cwltool/main.py b/cwltool/main.py index 9a188d50c..90b9a78dd 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -12,6 +12,7 @@ import hashlib import pkg_resources # part of setuptools import random +import functools import yaml import rdflib @@ -24,12 +25,12 @@ from . import workflow from .errors import WorkflowException, UnsupportedRequirement -from . import process from .cwlrdf import printrdf, printdot -from .process import shortname, Process, getListing +from .process import shortname, Process, getListing, moveOutputs, cleanIntermediate from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool from .builder import adjustDirObjs +from .stdfsaccess import StdFsAccess _logger = logging.getLogger("cwltool") @@ -172,46 +173,40 @@ def output_callback(out, processStatus): if "basedir" not in kwargs: raise WorkflowException("Must provide 'basedir' in kwargs") - if kwargs.get("outdir"): - pass - elif kwargs.get("dry_run"): - kwargs["outdir"] = "/tmp" - else: - kwargs["outdir"] = tempfile.mkdtemp() + output_dirs = set() + finaloutdir = kwargs.get("outdir") + kwargs["outdir"] = tempfile.mkdtemp() + output_dirs.add(kwargs["outdir"]) jobiter = t.job(job_order_object, output_callback, **kwargs) - if kwargs.get("conformance_test"): - job = jobiter.next() - a = {"args": job.command_line} - if job.stdin: - a["stdin"] = job.pathmapper.mapper(job.stdin)[1] - if job.stderr: - a["stderr"] = job.stderr - if job.stdout: - a["stdout"] = job.stdout - if job.generatefiles: - a["createfiles"] = job.generatefiles - return a - else: - try: - for r in jobiter: - if r: - r.run(**kwargs) - else: - raise WorkflowException("Workflow cannot make any more progress.") - except WorkflowException: - raise - except Exception as e: - _logger.exception("Got workflow error") - raise WorkflowException(unicode(e)) + try: + for r in jobiter: + if r.outdir: + output_dirs.add(r.outdir) + + if r: + r.run(**kwargs) + else: + raise WorkflowException("Workflow cannot make any more progress.") + except WorkflowException: + raise + except Exception as e: + _logger.exception("Got workflow error") + raise WorkflowException(unicode(e)) + + if final_status[0] != "success": + raise WorkflowException(u"Process status is %s" % (final_status)) + + if kwargs.get("move_outputs") and final_output[0] and finaloutdir: + final_output[0] = moveOutputs(final_output[0], finaloutdir, output_dirs) - if final_status[0] != "success": - raise WorkflowException(u"Process status is %s" % (final_status)) + if kwargs.get("rm_tmpdir"): + cleanIntermediate(output_dirs) - return final_output[0] + return final_output[0] class FileAction(argparse.Action): @@ -405,7 +400,7 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, basedir=u"file://%s/" % input_basedir) return 0 - adjustDirObjs(job_order_object, getListing) + adjustDirObjs(job_order_object, functools.partial(getListing, StdFsAccess(input_basedir))) if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] @@ -423,7 +418,7 @@ def printdeps(obj, document_loader, stdout, relative_deps, basedir=None): def loadref(b, u): return document_loader.resolve_ref(u, base_url=b)[0] - sf = process.scandeps(basedir if basedir else obj["id"], obj, + sf = scandeps(basedir if basedir else obj["id"], obj, set(("$import", "run")), set(("$include", "$schemas", "path")), loadref) if sf: @@ -440,7 +435,7 @@ def makeRelative(u): if ":" in u.split("/")[0] and not u.startswith("file://"): return u return os.path.relpath(u, base) - process.adjustFiles(deps, makeRelative) + adjustFiles(deps, makeRelative) stdout.write(json.dumps(deps, indent=4)) @@ -484,7 +479,7 @@ def print_pack(document_loader, processobj, uri, metadata): def loadref(b, u): # type: (unicode, unicode) -> Union[Dict, List, unicode] return document_loader.resolve_ref(u, base_url=b)[0] - deps = process.scandeps(uri, processobj, + deps = scandeps(uri, processobj, set(("run",)), set(), loadref) fdeps = set((uri,)) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index b8cb8ffdf..efd72bf44 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -32,11 +32,12 @@ class PathMapper(object): """Mapping of files from relative path provided in the file to a tuple of (absolute local path, absolute container path)""" - def __init__(self, referenced_files, basedir, stagedir, scramble=False): + def __init__(self, referenced_files, basedir, stagedir, scramble=False, separateDirs=True): # type: (Set[Any], unicode, unicode) -> None self._pathmap = {} # type: Dict[unicode, Tuple[unicode, unicode]] self.stagedir = stagedir self.scramble = scramble + self.separateDirs = separateDirs self.setup(referenced_files, basedir) def setup(self, referenced_files, basedir): @@ -44,8 +45,10 @@ def setup(self, referenced_files, basedir): # Go through each file and set the target to its own directory along # with any secondary files. + stagedir = self.stagedir for fob in referenced_files: - stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) + if self.separateDirs: + stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) if fob["class"] == "Directory": def visit(obj, base): diff --git a/cwltool/process.py b/cwltool/process.py index bcc2d2ecf..2b94d2c00 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -14,6 +14,7 @@ from collections import Iterable import errno import random +import shutil import abc import schema_salad.validate as validate @@ -29,9 +30,9 @@ from .utils import aslist, get_feature from .stdfsaccess import StdFsAccess -from .builder import Builder, adjustFileObjs +from .builder import Builder, adjustFileObjs, adjustDirObjs from .errors import WorkflowException, UnsupportedRequirement -from .pathmapper import abspath, adjustFiles +from .pathmapper import PathMapper, abspath, adjustFiles _logger = logging.getLogger("cwltool") @@ -164,17 +165,17 @@ def adjustFilesWithSecondary(rec, op, primary=None): for d in rec: adjustFilesWithSecondary(d, op, primary) -def getListing(rec): +def getListing(fs_access, rec): if "listing" not in rec: listing = [] path = rec["path"][7:] if rec["path"].startswith("file://") else rec["path"] - for ld in os.listdir(path): + for ld in fs_access.listdir(path): abspath = os.path.join(path, ld) - if os.path.isdir(abspath): + if fs_access.isdir(abspath): ent = {"class": "Directory", "path": abspath, "id": "_dir:%i" % random.randint(1, 1000000000)} - getListing(ent) + getListing(fs_access, ent) listing.append({"basename": os.path.basename(ld), "entry": ent}) else: @@ -185,6 +186,55 @@ def getListing(rec): del rec["path"] rec["id"] = "_dir:%i" % random.randint(1, 1000000000) +def stageFiles(pm, stageFunc): + for f in pm.files(): + p = pm.mapper(f) + if not os.path.exists(os.path.dirname(p[1])): + os.makedirs(os.path.dirname(p[1]), 0755) + if not p[0].startswith("_dir:"): + stageFunc(p[0], p[1]) + +def collectFilesAndDirs(obj, out): + if isinstance(obj, dict): + if obj.get("class") in ("File", "Directory"): + out.append(obj) + else: + for v in obj.values(): + collectFilesAndDirs(v, out) + if isinstance(obj, list): + for l in obj: + collectFilesAndDirs(l, out) + +def moveOutputs(outputObj, outdir, output_dirs): + def moveIt(src, dst): + for a in output_dirs: + if src.startswith(a): + _logger.debug("Moving %s to %s", src, dst) + shutil.move(src, dst) + + outfiles = [] + collectFilesAndDirs(outputObj, outfiles) + pm = PathMapper(outfiles, "", outdir, separateDirs=False) + stageFiles(pm, moveIt) + + def _check_adjust(f): + if f["class"] == "Directory": + f["path"] = pm.mapper(f["id"])[1] + else: + f["path"] = pm.mapper(f["path"])[1] + return f + + adjustFileObjs(outputObj, _check_adjust) + adjustDirObjs(outputObj, _check_adjust) + + return outputObj + +def cleanIntermediate(output_dirs): + for a in output_dirs: + if os.path.exists(a) and empty_subtree(a): + _logger.debug(u"Removing intermediate output directory %s", a) + shutil.rmtree(a, True) + def formatSubclassOf(fmt, cls, ontology, visited): # type: (str, str, Graph, Set[str]) -> bool diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index 1d7493888..cd2d1e04b 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -22,7 +22,10 @@ def exists(self, fn): # type: (unicode) -> bool return os.path.exists(self._abs(fn)) def isfile(self, fn): - return os.path.isfile(fn) + return os.path.isfile(self._abs(fn)) def isdir(self, fn): - return os.path.isdir(fn) + return os.path.isdir(self._abs(fn)) + + def listdir(self, fn): + return os.listdir(self._abs(fn)) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 77d58fbbd..e5c1e66ac 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -296,7 +296,7 @@ def valueFromFunc(k, v): # type: (Any, Any) -> Any def run(self, **kwargs): _logger.debug(u"[%s] workflow starting", self.name) - def job(self, joborder, output_callback, move_outputs=True, **kwargs): + def job(self, joborder, output_callback, **kwargs): # type: (Dict[unicode, Any], Callable[[Any, Any], Any], bool, **Any) -> Generator[WorkflowJob, None, None] self.state = {} self.processStatus = "success" @@ -317,8 +317,6 @@ def job(self, joborder, output_callback, move_outputs=True, **kwargs): for out in s.tool["outputs"]: self.state[out["id"]] = None - output_dirs = set() - completed = 0 while completed < len(self.steps) and self.processStatus == "success": made_progress = False @@ -331,8 +329,6 @@ def job(self, joborder, output_callback, move_outputs=True, **kwargs): for newjob in step.iterable: if newjob: made_progress = True - if newjob.outdir: - output_dirs.add(newjob.outdir) yield newjob else: break @@ -349,43 +345,6 @@ def job(self, joborder, output_callback, move_outputs=True, **kwargs): if wo is None: raise WorkflowException("Output for workflow not available") - if move_outputs: - targets = set() # type: Set[str] - conflicts = set() - - outfiles = findfiles(wo) - - for f in outfiles: - for a in output_dirs: - if f["path"].startswith(a): - src = f["path"] - dst = os.path.join(self.outdir, src[len(a)+1:]) - if dst in targets: - conflicts.add(dst) - else: - targets.add(dst) - - for f in outfiles: - for a in output_dirs: - if f["path"].startswith(a): - src = f["path"] - dst = os.path.join(self.outdir, src[len(a)+1:]) - if dst in conflicts: - sp = os.path.splitext(dst) - dst = u"%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1]) - dirname = os.path.dirname(dst) - if not os.path.exists(dirname): - os.makedirs(dirname) - _logger.debug(u"[%s] Moving '%s' to '%s'", self.name, src, dst) - shutil.move(src, dst) - f["path"] = dst - - for a in output_dirs: - if os.path.exists(a) and empty_subtree(a): - if kwargs.get("rm_tmpdir", True): - _logger.debug(u"[%s] Removing intermediate output directory %s", self.name, a) - shutil.rmtree(a, True) - _logger.info(u"[%s] outdir is %s", self.name, self.outdir) output_callback(wo, self.processStatus) From fa70a557de603a256200fae25f7f9bc23e40dd72 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 20 Jun 2016 08:46:38 -0400 Subject: [PATCH 07/38] Fix --cachedir to work with changes to file staging. --- cwltool/draft2tool.py | 5 +++-- cwltool/job.py | 4 ++-- cwltool/main.py | 18 ++++++++++++------ cwltool/pathmapper.py | 2 +- cwltool/process.py | 28 ++++++++++++++++++---------- 5 files changed, 36 insertions(+), 21 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index bd0603491..cd39c1e97 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -142,8 +142,9 @@ def job(self, joborder, output_callback, **kwargs): cacheargs["tmpdir"] = "/tmp" cacheargs["stagedir"] = "/stage" cachebuilder = self._init_job(joborder, **cacheargs) - cachebuilder.pathmapper = PathMapper(set((f["path"] for f in cachebuilder.files)), - kwargs["basedir"]) + cachebuilder.pathmapper = PathMapper(cachebuilder.files, + kwargs["basedir"], + cachebuilder.stagedir) cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings)) (docker_req, docker_is_req) = self.get_requirement("DockerRequirement") diff --git a/cwltool/job.py b/cwltool/job.py index a5db4fc30..18df0e024 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -62,7 +62,7 @@ def __init__(self): # type: () -> None self.generatefiles = None # type: Dict[str,Union[Dict[str,str],str]] def run(self, dry_run=False, pull_image=True, rm_container=True, - rm_tmpdir=True, move_outputs=True, **kwargs): + rm_tmpdir=True, move_outputs="move", **kwargs): # type: (bool, bool, bool, bool, bool, **Any) -> Union[Tuple[str,Dict[None,None]],None] if not os.path.exists(self.outdir): os.makedirs(self.outdir) @@ -279,6 +279,6 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, _logger.debug(u"[job %s] Removing temporary directory %s", self.name, self.tmpdir) shutil.rmtree(self.tmpdir, True) - if move_outputs and empty_subtree(self.outdir): + if move_outputs == "move" and empty_subtree(self.outdir): _logger.debug(u"[job %s] Removing empty output directory %s", self.name, self.outdir) shutil.rmtree(self.outdir, True) diff --git a/cwltool/main.py b/cwltool/main.py index 90b9a78dd..f315d943b 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -26,7 +26,7 @@ from . import workflow from .errors import WorkflowException, UnsupportedRequirement from .cwlrdf import printrdf, printdot -from .process import shortname, Process, getListing, moveOutputs, cleanIntermediate +from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool from .builder import adjustDirObjs @@ -87,14 +87,18 @@ def arg_parser(): # type: () -> argparse.ArgumentParser dest="rm_tmpdir") exgroup = parser.add_mutually_exclusive_group() - exgroup.add_argument("--move-outputs", action="store_true", default=True, + exgroup.add_argument("--move-outputs", action="store_const", const="move", default="move", help="Move output files to the workflow output directory and delete intermediate output directories (default).", dest="move_outputs") - exgroup.add_argument("--leave-outputs", action="store_false", default=True, + exgroup.add_argument("--leave-outputs", action="store_const", const="leave", default="move", help="Leave output files in intermediate output directories.", dest="move_outputs") + exgroup.add_argument("--copy-outputs", action="store_const", const="copy", default="move", + help="Copy output files to the workflow output directory, don't delete intermediate output directories.", + dest="move_outputs") + exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--enable-pull", default=True, action="store_true", help="Try to pull Docker images", dest="enable_pull") @@ -200,8 +204,9 @@ def output_callback(out, processStatus): if final_status[0] != "success": raise WorkflowException(u"Process status is %s" % (final_status)) - if kwargs.get("move_outputs") and final_output[0] and finaloutdir: - final_output[0] = moveOutputs(final_output[0], finaloutdir, output_dirs) + if final_output[0] and finaloutdir: + final_output[0] = relocateOutputs(final_output[0], finaloutdir, + output_dirs, kwargs.get("move_outputs")) if kwargs.get("rm_tmpdir"): cleanIntermediate(output_dirs) @@ -660,7 +665,8 @@ def main(argsl=None, if args.cachedir: setattr(args, 'cachedir', os.path.abspath(args.cachedir)) - setattr(args, 'move_outputs', False) + if args.move_outputs == "move": + setattr(args, 'move_outputs', "copy") try: setattr(args, 'tmp_outdir_prefix', diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index efd72bf44..0696fda8a 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -80,7 +80,7 @@ def visit(path): # Dereference symbolic links for path, (ab, tgt) in self._pathmap.items(): - if ab.startswith("_dir:"): + if ab.startswith("_dir:"):# or not os.path.exists(ab): continue deref = ab st = os.lstat(deref) diff --git a/cwltool/process.py b/cwltool/process.py index 2b94d2c00..6ecf722a6 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -116,14 +116,15 @@ def get_schema(version): "https://w3id.org/cwl/CommonWorkflowLanguage.yml", cache=cache) global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY # pylint: disable=global-statement - SCHEMA_FILE = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#File"]) - SCHEMA_DIR = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Directory"]) - SCHEMA_DIRENT = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Dirent"]) SCHEMA_ANY = cast(Dict[unicode, Any], SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/salad#Any"]) + SCHEMA_FILE = cast(Dict[unicode, Any], + SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#File"]) + if version in ("draft-4"): + SCHEMA_DIR = cast(Dict[unicode, Any], + SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Directory"]) + SCHEMA_DIRENT = cast(Dict[unicode, Any], + SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Dirent"]) return SCHEMA_CACHE[version] @@ -205,12 +206,19 @@ def collectFilesAndDirs(obj, out): for l in obj: collectFilesAndDirs(l, out) -def moveOutputs(outputObj, outdir, output_dirs): +def relocateOutputs(outputObj, outdir, output_dirs, action): + if action not in ("move", "copy"): + return outputObj + def moveIt(src, dst): for a in output_dirs: if src.startswith(a): - _logger.debug("Moving %s to %s", src, dst) - shutil.move(src, dst) + if action == "move": + _logger.debug("Moving %s to %s", src, dst) + shutil.move(src, dst) + elif action == "copy": + _logger.debug("Copying %s to %s", src, dst) + shutil.copy(src, dst) outfiles = [] collectFilesAndDirs(outputObj, outfiles) @@ -301,7 +309,7 @@ def __init__(self, toolpath_object, **kwargs): self.metadata = kwargs.get("metadata", {}) # type: Dict[str,Any] self.names = None # type: avro.schema.Names names = schema_salad.schema.make_avro_schema( - [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY], schema_salad.ref_resolver.Loader({}))[0] + filter(lambda x: x is not None, [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY]), schema_salad.ref_resolver.Loader({}))[0] if isinstance(names, avro.schema.SchemaParseException): raise names else: From 7241deb009517ed8dfb8d751908df167a14c8e79 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 21 Jun 2016 08:24:41 -0400 Subject: [PATCH 08/38] Tweak cwltest to accomodate Directory objects. --- cwltool/cwltest.py | 5 +++++ cwltool/draft2tool.py | 1 + 2 files changed, 6 insertions(+) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index bb40262f9..bef447fe9 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -33,6 +33,11 @@ def compare(a, b): # type: (Any, Any) -> bool # ignore empty collections b = {k: v for k, v in b.iteritems() if not isinstance(v, (list, dict)) or len(v) > 0} + if a.get("class") == "Directory": + for d in ("id", "path"): + if d in b: + del b[d] + pass if len(a) != len(b): raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) for c in a: diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index cd39c1e97..f41f2f24a 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -332,6 +332,7 @@ def collect_output_ports(self, ports, builder, outdir): raise WorkflowException(u"Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e)) if ret: adjustFileObjs(ret, remove_hostfs) + adjustDirObjs(ret, remove_hostfs) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: From 6d89d5fd463a25f20bb9226cfc4a006428e37bc1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 22 Jun 2016 12:17:03 -0400 Subject: [PATCH 09/38] Adding convenience fields to file objects --- cwltool/draft2tool.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index f41f2f24a..899d076e7 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -251,6 +251,9 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] f["path"] = builder.pathmapper.mapper(f["id"])[1] else: f["path"] = builder.pathmapper.mapper(f["path"])[1] + # XXX should only add if >= draft-4 + f["basename"] = os.path.basename(f["path"]) + f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) f["containerfs"] = True return f @@ -363,6 +366,9 @@ def collect_output(self, schema, builder, outdir): try: r.extend([{"path": g, "class": "File" if builder.fs_access.isfile(g) else "Directory", + "basename": os.path.basename(g), + "nameroot": os.path.splitext(os.path.basename(g))[0], + "nameext": os.path.splitext(os.path.basename(g))[1], "hostfs": True} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: @@ -430,6 +436,8 @@ def collect_output(self, schema, builder, outdir): for sfitem in aslist(sfpath): if builder.fs_access.exists(sfitem["path"]): + sfitem["basename"] = os.path.basename(sfitem["path"]) + sfitem["nameroot"], sfitem["nameext"] = os.path.splitext(sfitem["basename"]) primary["secondaryFiles"].append(sfitem) if not r and optional: From 51ad4bca0db3052d3bb333401f19d1cbf65eff46 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 09:35:38 -0400 Subject: [PATCH 10/38] Squashed 'cwltool/schemas/' changes from a2263a9..8798dc8 8798dc8 Merge branch 'master' into directory 2d7ca73 Adjust Directory to also use a `location` field to identify it. 45af7a3 Add location, update text for dirname, basename, nameroot, nameext. 227a981 Add basename, nameroot, nameext to conformance tests. 832b52b Add initial conformance tests for input and output of directory objects. 272aefa Merge pull request #231 from common-workflow-language/restore-workflow-outparam-type 3940210 re-add types to ExpressionTool, InputParameter 0c275b4 clarify that outputBinding must not be set c30b79f finish restoring allowable types for parameters 06c9ba7 Merge pull request #230 from common-workflow-language/stderr-out-types cbd7bc2 only allow use of stderr/stdout shortcuts as types for the Outputs. Better place the docs 0ba703d jsonldPredicate did it! a5a7516 nope, use an enum instead 24a611d use long ids? 5ad6a16 Allow expressions in `arguments` 52f6411 extend doc to stderr psuedotype 9d0fbe0 initial stdout & stderr shortcut support 392be2f fixing site build? git-subtree-dir: cwltool/schemas git-subtree-split: 8798dc8435f5f5e768ce047681f72435444dde95 --- draft-3/examples/arguments.cwl | 1 + draft-4/CommandLineTool.yml | 92 ++++++++++-- draft-4/Process.yml | 138 +++++++++++++----- draft-4/Workflow.yml | 51 +++++++ draft-4/conformance_test_draft-4.yaml | 155 ++++++++++++++++++++- draft-4/draft-4/cat3-tool-mediumcut.cwl | 18 +++ draft-4/draft-4/cat3-tool-shortcut.cwl | 17 +++ draft-4/draft-4/dir-job.yml | 3 + draft-4/draft-4/dir.cwl | 18 +++ draft-4/draft-4/dir2.cwl | 20 +++ draft-4/draft-4/dir3-job.yml | 3 + draft-4/draft-4/dir3.cwl | 13 ++ draft-4/draft-4/egrep-stderr-mediumcut.cwl | 18 +++ draft-4/draft-4/egrep-stderr-shortcut.cwl | 16 +++ draft-4/draft-4/egrep-stderr.cwl | 15 ++ site/draft4-deps.json | 4 + 16 files changed, 532 insertions(+), 50 deletions(-) create mode 100755 draft-4/draft-4/cat3-tool-mediumcut.cwl create mode 100755 draft-4/draft-4/cat3-tool-shortcut.cwl create mode 100644 draft-4/draft-4/dir-job.yml create mode 100644 draft-4/draft-4/dir.cwl create mode 100644 draft-4/draft-4/dir2.cwl create mode 100644 draft-4/draft-4/dir3-job.yml create mode 100644 draft-4/draft-4/dir3.cwl create mode 100644 draft-4/draft-4/egrep-stderr-mediumcut.cwl create mode 100644 draft-4/draft-4/egrep-stderr-shortcut.cwl create mode 100644 draft-4/draft-4/egrep-stderr.cwl diff --git a/draft-3/examples/arguments.cwl b/draft-3/examples/arguments.cwl index 862540be2..23e91ebdd 100644 --- a/draft-3/examples/arguments.cwl +++ b/draft-3/examples/arguments.cwl @@ -1,5 +1,6 @@ cwlVersion: cwl:draft-3 class: CommandLineTool +label: Example trivial wrapper for Java 7 compiler baseCommand: javac hints: - class: DockerRequirement diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 01defa317..bedb5fc28 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -368,21 +368,95 @@ $graph: - specializeFrom: "#InputBinding" specializeTo: "#CommandLineBinding" - - type: record name: CommandOutputParameter extends: "#OutputParameter" doc: An output parameter for a CommandLineTool. specialize: - - specializeFrom: "#OutputRecordSchema" - specializeTo: "#CommandOutputRecordSchema" - - specializeFrom: "#OutputEnumSchema" - specializeTo: "#CommandOutputEnumSchema" - - specializeFrom: "#OutputArraySchema" - specializeTo: "#CommandOutputArraySchema" - specializeFrom: "#OutputBinding" specializeTo: "#CommandOutputBinding" + fields: + - name: type + type: + - "null" + - "#CWLType" + - "#stdout" + - "#stderr" + - "#CommandOutputRecordSchema" + - "#CommandOutputEnumSchema" + - "#CommandOutputArraySchema" + - string + - type: array + items: + - "#CWLType" + - "#CommandOutputRecordSchema" + - "#CommandOutputEnumSchema" + - "#CommandOutputArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. +- name: stdout + type: enum + symbols: [ "cwl:stdout" ] + docParent: "#CommandOutputParameter" + doc: | + Only valid as a `type` for a `CommandLineTool` output with no + `outputBinding` set. + + The following + ``` + outputs: + an_output_name: + type: stdout + + stdout: a_stdout_file + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + outputBinding: + glob: a_stdout_file + + stdout: a_stdout_file + ``` + + If there is no `stdout` name provided, a random filename will be created. + + +- name: stderr + type: enum + symbols: [ "cwl:stderr" ] + docParent: "#CommandOutputParameter" + doc: | + Only valid as a `type` for a `CommandLineTool` output with no + `outputBinding` set. + + The following + ``` + outputs: + an_output_name: + type: stderr + + stderr: a_stderr_file + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + outputBinding: + glob: a_stderr_file + ``` + + If there is no `stderr` name provided, a random filename will be created. - type: record name: CommandLineTool @@ -425,7 +499,7 @@ $graph: type: - "null" - type: array - items: [string, "#CommandLineBinding"] + items: [string, "#Expression", "#CommandLineBinding"] jsonldPredicate: "_id": "cwl:arguments" "_container": "@list" @@ -436,6 +510,7 @@ $graph: standard input stream. - name: stderr type: ["null", string, "#Expression"] + jsonldPredicate: "https://w3id.org/cwl/cwl#stderr" doc: | Capture the command's standard error stream to a file written to the designated output directory. @@ -448,6 +523,7 @@ $graph: characters (such as the path separator `/`) it is an error. - name: stdout type: ["null", string, "#Expression"] + jsonldPredicate: "https://w3id.org/cwl/cwl#stdout" doc: | Capture the command's standard output stream to a file written to the designated output directory. diff --git a/draft-4/Process.yml b/draft-4/Process.yml index d7ee85e6b..a7e25b7d5 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -60,20 +60,82 @@ $graph: symbols: - cwl:File jsonldPredicate: - "_id": "@type" - "_type": "@vocab" + _id: "@type" + _type: "@vocab" doc: Must be `File` to indicate this object describes a file. + - name: location + type: string? + doc: | + A URI that identifies the file resource. This may be a relative + reference, in which case it must be resolved using the base URI of the + document. The location may refer to a local or remote resource; the + implementation must use the URI to retrieve file content. If an + implementation is unable to retrieve the file content stored at a + remote resource (due to unsupported protocol, access denied, or other + issue) it must signal an error. + jsonldPredicate: + _id: "@id" + _type: "@id" - name: path - type: string - doc: The path to the file. + type: string? + doc: | + The local path where the File is made available prior to executing a + CommandLineTool. This field must not be used in any other context. The + command line tool being executed must be able to to access the file at + `path` using the POSIX `open(2)` syscall. jsonldPredicate: "_id": "cwl:path" "_type": "@id" + - name: basename + type: string? + doc: | + The base name of the file, that is, the path component following the + final slash in the path. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: dirname + type: string? + doc: | + The name of the directory containing file, that is, the path leading up + to the final slash in the path such that `dirname + '/' + basename = + path`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameroot + type: string? + doc: | + The basename root such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have a nameroot of `.cshrc`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameext + type: string? + doc: | + The basename extension such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have an empty `nameext`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. - name: checksum type: ["null", string] doc: | Optional hash code for validating file integrity. Currently must be in the form - "sha1$ + hexidecimal string" using the SHA-1 algorithm. + "sha1$ + hexadecimal string" using the SHA-1 algorithm. - name: size type: ["null", long] doc: Optional file size. @@ -141,6 +203,19 @@ $graph: _id: "@type" _type: "@vocab" doc: Must be `Directory` to indicate this object describes a Directory. + - name: location + type: string + doc: | + A URI that identifies the directory resource. This may be a relative + reference, in which case it must be resolved using the base URI of the + document. The location may refer to a local or remote resource. If + the `listing` field is not set, the implementation must use the URI to + retrieve directory listing. If an implementation is unable to retrieve + the file content stored at a remote resource (due to unsupported + protocol, access denied, or other issue) it must signal an error. + jsonldPredicate: + _id: "@id" + _type: "@id" - name: path type: string? doc: The path to the directory. @@ -155,7 +230,6 @@ $graph: mapSubject: basename mapPredicate: entry - - name: SchemaBase type: record abstract: true @@ -226,29 +300,6 @@ $graph: Define an input or output parameter to a process. fields: - - name: type - type: - - "null" - - "#CWLType" - - "sld:RecordSchema" - - "sld:EnumSchema" - - "sld:ArraySchema" - - string - - type: array - items: - - "#CWLType" - - "sld:RecordSchema" - - "sld:EnumSchema" - - "sld:ArraySchema" - - string - jsonldPredicate: - "_id": "sld:type" - "_type": "@vocab" - refScope: 2 - typeDSL: True - doc: | - Specify valid types of data that may be assigned to this parameter. - - name: label type: - "null" @@ -411,13 +462,6 @@ $graph: - name: InputParameter type: record extends: "#Parameter" - specialize: - - specializeFrom: "sld:RecordSchema" - specializeTo: "#InputRecordSchema" - - specializeFrom: "sld:EnumSchema" - specializeTo: "#InputEnumSchema" - - specializeFrom: "sld:ArraySchema" - specializeTo: "#InputArraySchema" fields: - name: id type: string @@ -438,6 +482,28 @@ $graph: The default value for this parameter if not provided in the input object. + - name: type + type: + - "null" + - "#CWLType" + - "#InputRecordSchema" + - "#InputEnumSchema" + - "#InputArraySchema" + - string + - type: array + items: + - "#CWLType" + - "#InputRecordSchema" + - "#InputEnumSchema" + - "#InputArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. - name: OutputParameter type: record diff --git a/draft-4/Workflow.yml b/draft-4/Workflow.yml index 9103eb0dd..6e8510cff 100644 --- a/draft-4/Workflow.yml +++ b/draft-4/Workflow.yml @@ -50,10 +50,39 @@ $graph: - {$include: concepts.md} +- name: ExpressionToolOutputParameter + type: record + extends: Parameter + fields: + - name: type + type: + - "null" + - "#CWLType" + - "sld:RecordSchema" + - "sld:EnumSchema" + - "sld:ArraySchema" + - string + - type: array + items: + - "#CWLType" + - "sld:RecordSchema" + - "sld:EnumSchema" + - "sld:ArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. - type: record name: ExpressionTool extends: Process + specialize: + - specializeFrom: "#OutputParameter" + specializeTo: "#ExpressionToolOutputParameter" documentRoot: true doc: | Execute an expression as a process step. @@ -104,6 +133,28 @@ $graph: doc: | The method to use to merge multiple sources into a single array. If not specified, the default method is "merge_nested". + - name: type + type: + - "null" + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + - type: array + items: + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. - name: Sink diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index cf5a294e8..ea5b78c95 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -41,7 +41,10 @@ "checksum": "sha1$63da67422622fbf9251a046d7a34b7ea0fd4fead", "class": "File", "path": "foo.txt", - "size": 22 + "size": 22, + "basename": "foo.txt", + "nameext": ".txt", + "nameroot": "foo" } job: draft-4/cat-job.json tool: draft-4/template-tool.cwl @@ -54,9 +57,49 @@ checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b path: output.txt size: 13 + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/cat3-tool.cwl doc: Test command execution in Docker with stdout redirection +- job: draft-4/cat-job.json + tool: draft-4/cat3-tool-shortcut.cwl + doc: Test command execution in Docker with stdout redirection + +- job: draft-4/cat-job.json + output: + output_file: + class: File + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b + path: cat-out + size: 13 + tool: draft-4/cat3-tool-mediumcut.cwl + doc: Test command execution in Docker with stdout redirection + +- args: [egrep] + stderr: error.txt + job: + tool: draft-4/egrep-stderr.cwl + doc: Test command line with stderr redirection + +- args: [egrep] + job: + tool: draft-4/egrep-stderr-shortcut.cwl + doc: Test command line with stderr redirection, brief syntax + +- args: [egrep] + stderr: std.err + output: + output_file: + class: File + size: 84 + checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 + path: std.err + job: + tool: draft-4/egrep-stderr-mediumcut.cwl + doc: Test command line with stderr redirection, named brief syntax + - job: draft-4/cat-job.json output: output_txt: @@ -64,6 +107,9 @@ checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b path: output.txt size: 13 + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/cat4-tool.cwl doc: Test command execution in Docker with stdin and stdout redirection @@ -109,6 +155,9 @@ checksum: sha1$631bfbac524e2d04cdcc5ec33ade827fc10b06ae path: output size: 15 + basename: output + nameext: "" + nameroot: output tool: draft-4/wc-tool.cwl doc: Test command execution in with stdin and stdout redirection @@ -179,6 +228,9 @@ checksum: sha1$b3ec4ed1749c207e52b3a6d08c59f31d83bff519 path: out size: 15 + basename: out + nameext: "" + nameroot: out tool: draft-4/env-tool1.cwl doc: Test EnvVarRequirement @@ -224,6 +276,9 @@ checksum: sha1$b3ec4ed1749c207e52b3a6d08c59f31d83bff519 path: out size: 15 + basename: out + nameext: "" + nameroot: out tool: draft-4/env-wf1.cwl doc: Test requirement priority @@ -234,6 +289,9 @@ checksum: sha1$cdc1e84968261d6a7575b5305945471f8be199b6 path: out size: 9 + basename: out + nameext: "" + nameroot: out tool: draft-4/env-wf2.cwl doc: Test requirements override hints @@ -249,6 +307,9 @@ checksum: sha1$b9214658cc453331b62c2282b772a5c063dbd284 path: output.txt size: 1111 + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/revsort.cwl doc: Test sample workflows from the specification @@ -259,6 +320,9 @@ checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b path: output.txt size: 13 + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/cat5-tool.cwl doc: Test unknown hints are ignored. @@ -269,30 +333,51 @@ checksum: sha1$e2dc9daaef945ac15f01c238ed2f1660f60909a0 path: result.txt size: 142 + basename: result.txt + nameext: .txt + nameroot: result indexedfile: { "path": "input.txt", + "basename": "input.txt", + "nameext": ".txt", + "nameroot": "input", "class": "File", "checksum": "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376", "secondaryFiles": [ { "path": "input.txt.idx1", - "class": "File" + "class": "File", + "basename": "input.txt.idx1", + "nameext": ".idx1", + "nameroot": "input.txt", }, { "path": "input.idx2", - "class": "File" + "class": "File", + "basename": "input.idx2", + "nameext": ".idx2", + "nameroot": "input", }, { "path": "input.txt.idx3", - "class": "File" + "class": "File", + "basename": "input.txt.idx3", + "nameext": ".idx3", + "nameroot": "input.txt", }, { "path": "input.txt.idx4", - "class": "File" + "class": "File", + "basename": "input.txt.idx4", + "nameext": ".idx4", + "nameroot": "input.txt", }, { "path": "input.txt.idx5", - "class": "File" + "class": "File", + "basename": "input.txt.idx5", + "nameext": ".idx5", + "nameroot": "input.txt", } ], "size": 1111 @@ -309,6 +394,9 @@ checksum: sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376 path: fish.txt size: 1111 + basename: fish.txt + nameext: .txt + nameroot: fish tool: draft-4/rename.cwl doc: | Test CreateFileRequirement with expression in filename. @@ -327,6 +415,9 @@ size: 12 class: File checksum: "sha1$f12e6cfe70f3253f70b0dbde17c692e7fb0f1e5e" + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/schemadef-tool.cwl doc: | Test SchemaDefRequirement definition used in tool parameter @@ -338,6 +429,9 @@ size: 12 class: File checksum: "sha1$f12e6cfe70f3253f70b0dbde17c692e7fb0f1e5e" + basename: output.txt + nameext: .txt + nameroot: output tool: draft-4/schemadef-wf.cwl doc: | Test SchemaDefRequirement definition used in workflow parameter @@ -542,6 +636,9 @@ size: 13 class: "File" checksum: "sha1$47a013e660d408619d894b20806b1d5086aab03b" + basename: output.txt + nameext: .txt + nameroot: output doc: | Test optional output file and optional secondaryFile on output. @@ -647,3 +744,49 @@ } tool: "draft-4/conflict-wf.cwl#collision" doc: Test workflow two input files with same name. + +- job: draft-4/dir-job.yml + output: + "outlist": { + "size": 20, + "path": "output.txt", + "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", + "class": "File" + } + tool: draft-4/dir.cwl + doc: Test directory input + +- job: draft-4/dir-job.yml + output: + "outlist": { + "size": 20, + "path": "output.txt", + "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", + "class": "File" + } + tool: draft-4/dir2.cwl + doc: Test directory input in Docker + +- job: draft-4/dir3-job.yml + output: + "outdir": { + "class": "Directory", + "listing": [ + { + "basename": "hello.txt", + "entry": { + "class": "File", + "path": "hello.txt" + } + }, + { + "basename": "goodbye.txt", + "entry": { + "class": "File", + "path": "goodbye.txt" + } + } + ], + } + tool: draft-4/dir3.cwl + doc: Test directory input in Docker \ No newline at end of file diff --git a/draft-4/draft-4/cat3-tool-mediumcut.cwl b/draft-4/draft-4/cat3-tool-mediumcut.cwl new file mode 100755 index 000000000..1c29102eb --- /dev/null +++ b/draft-4/draft-4/cat3-tool-mediumcut.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: cwl:draft-4.dev2 +description: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: + file1: + type: File + label: Input File + description: "The file that will be copied using 'cat'" + inputBinding: {position: 1} +outputs: + output_file: + type: stdout +baseCommand: cat +stdout: cat-out diff --git a/draft-4/draft-4/cat3-tool-shortcut.cwl b/draft-4/draft-4/cat3-tool-shortcut.cwl new file mode 100755 index 000000000..4d12a4df4 --- /dev/null +++ b/draft-4/draft-4/cat3-tool-shortcut.cwl @@ -0,0 +1,17 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: cwl:draft-4.dev2 +description: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: + file1: + type: File + label: Input File + description: "The file that will be copied using 'cat'" + inputBinding: {position: 1} +outputs: + output_file: + type: stdout +baseCommand: cat diff --git a/draft-4/draft-4/dir-job.yml b/draft-4/draft-4/dir-job.yml new file mode 100644 index 000000000..cd569e492 --- /dev/null +++ b/draft-4/draft-4/dir-job.yml @@ -0,0 +1,3 @@ +indir: + class: Directory + path: testdir \ No newline at end of file diff --git a/draft-4/draft-4/dir.cwl b/draft-4/draft-4/dir.cwl new file mode 100644 index 000000000..662f06b3c --- /dev/null +++ b/draft-4/draft-4/dir.cwl @@ -0,0 +1,18 @@ +class: CommandLineTool +cwlVersion: draft-4.dev2 +requirements: + - class: ShellCommandRequirement +inputs: + indir: Directory +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["cd", "$(inputs.indir.path)", + {shellQuote: false, valueFrom: "&&"}, + "find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/dir2.cwl b/draft-4/draft-4/dir2.cwl new file mode 100644 index 000000000..3a0336691 --- /dev/null +++ b/draft-4/draft-4/dir2.cwl @@ -0,0 +1,20 @@ +class: CommandLineTool +cwlVersion: draft-4.dev2 +hints: + DockerRequirement: + dockerPull: debian:8 + ShellCommandRequirement: {} +inputs: + indir: Directory +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["cd", "$(inputs.indir.path)", + {shellQuote: false, valueFrom: "&&"}, + "find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/dir3-job.yml b/draft-4/draft-4/dir3-job.yml new file mode 100644 index 000000000..041012e9f --- /dev/null +++ b/draft-4/draft-4/dir3-job.yml @@ -0,0 +1,3 @@ +inf: + class: File + path: hello.tar \ No newline at end of file diff --git a/draft-4/draft-4/dir3.cwl b/draft-4/draft-4/dir3.cwl new file mode 100644 index 000000000..de0c5dcdd --- /dev/null +++ b/draft-4/draft-4/dir3.cwl @@ -0,0 +1,13 @@ +class: CommandLineTool +cwlVersion: draft-4.dev2 +baseCommand: [tar, xvf] +inputs: + inf: + type: File + inputBinding: + position: 1 +outputs: + outdir: + type: Directory + outputBinding: + glob: . diff --git a/draft-4/draft-4/egrep-stderr-mediumcut.cwl b/draft-4/draft-4/egrep-stderr-mediumcut.cwl new file mode 100644 index 000000000..f780c7319 --- /dev/null +++ b/draft-4/draft-4/egrep-stderr-mediumcut.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: cwl:draft-4.dev2 +description: "Test of capturing stderr output in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy + +inputs: [] + +outputs: + output_file: + type: stderr + +baseCommand: egrep +successCodes: [2] + +stderr: std.err diff --git a/draft-4/draft-4/egrep-stderr-shortcut.cwl b/draft-4/draft-4/egrep-stderr-shortcut.cwl new file mode 100644 index 000000000..9a70aa1d5 --- /dev/null +++ b/draft-4/draft-4/egrep-stderr-shortcut.cwl @@ -0,0 +1,16 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: cwl:draft-4.dev2 +description: "Test of capturing stderr output in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy + +inputs: [] + +outputs: + output_file: + type: stderr + +baseCommand: egrep +successCodes: [2] diff --git a/draft-4/draft-4/egrep-stderr.cwl b/draft-4/draft-4/egrep-stderr.cwl new file mode 100644 index 000000000..4bba74962 --- /dev/null +++ b/draft-4/draft-4/egrep-stderr.cwl @@ -0,0 +1,15 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: cwl:draft-4.dev2 +description: "Test of capturing stderr output in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: [] +outputs: + output_file: + type: File + outputBinding: {glob: error.txt} +baseCommand: egrep +successCodes: [2] +stderr: error.txt diff --git a/site/draft4-deps.json b/site/draft4-deps.json index 72477ac88..157ccb1ec 100644 --- a/site/draft4-deps.json +++ b/site/draft4-deps.json @@ -11,6 +11,10 @@ "path": "../draft-4/salad/schema_salad/metaschema/metaschema.yml", "class": "File" }, + { + "path": "../draft-4/salad/schema_salad/metaschema/metaschema_base.yml", + "class": "File" + }, { "path": "../draft-4/salad/schema_salad/metaschema/salad.md", "class": "File" From 98304026fc50717652591e2feb99dc75ac71b95a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 10:21:33 -0400 Subject: [PATCH 11/38] Update to use 'location' URI consistently for identifying files and directories. --- cwltool/draft2tool.py | 53 ++++++++++++++++-------------------------- cwltool/main.py | 6 ++--- cwltool/process.py | 21 +++++++---------- cwltool/stdfsaccess.py | 4 ++-- tests/echo.cwl | 2 +- 5 files changed, 34 insertions(+), 52 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 899d076e7..900d87945 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -71,10 +71,9 @@ def job(self, joborder, output_callback, **kwargs): yield j -def remove_hostfs(f): # type: (Dict[str, Any]) -> None - if "hostfs" in f: - del f["hostfs"] - +def remove_path(f): # type: (Dict[str, Any]) -> None + if "path" in f: + del f["path"] def revmap_file(builder, outdir, f): # type: (Builder,str,Dict[str,Any]) -> Union[Dict[str,Any],None] @@ -84,17 +83,15 @@ def revmap_file(builder, outdir, f): to the external directory. """ - if f.get("hostfs"): - return None + if "location" in f: + return f revmap_f = builder.pathmapper.reversemap(f["path"]) if revmap_f: - f["path"] = revmap_f[1] - f["hostfs"] = True + f["location"] = revmap_f[1] return f elif f["path"].startswith(builder.outdir): - f["path"] = os.path.join(outdir, f["path"][len(builder.outdir)+1:]) - f["hostfs"] = True + f["location"] = os.path.join(outdir, f["path"][len(builder.outdir)+1:]) return f else: raise WorkflowException(u"Output file path %s must be within designated output directory (%s) or an input file pass through." % (f["path"], builder.outdir)) @@ -247,14 +244,11 @@ def rm_pending_output_callback(output_callback, jobcachepending, # walk over input as implicit reassignment doesn't reach everything in builder.bindings def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] if not f.get("containerfs"): - if f["class"] == "Directory": - f["path"] = builder.pathmapper.mapper(f["id"])[1] - else: - f["path"] = builder.pathmapper.mapper(f["path"])[1] + f["path"] = builder.pathmapper.mapper(f["location"])[1] + if f["class"] == "File": # XXX should only add if >= draft-4 - f["basename"] = os.path.basename(f["path"]) + f["dirname"], f["basename"] = os.path.split(f["path"]) f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) - f["containerfs"] = True return f _logger.debug(u"[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4)) @@ -318,12 +312,11 @@ def collect_output_ports(self, ports, builder, outdir): with builder.fs_access.open(custom_output, "r") as f: ret = json.load(f) _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) - adjustFileObjs(ret, remove_hostfs) adjustFileObjs(ret, cast(Callable[[Any], Any], # known bug in mypy # https://github.com/python/mypy/issues/797 partial(revmap_file, builder, outdir))) - adjustFileObjs(ret, remove_hostfs) + adjustFileObjs(ret, remove_path) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret @@ -334,8 +327,8 @@ def collect_output_ports(self, ports, builder, outdir): except Exception as e: raise WorkflowException(u"Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e)) if ret: - adjustFileObjs(ret, remove_hostfs) - adjustDirObjs(ret, remove_hostfs) + adjustFileObjs(ret, remove_path) + adjustDirObjs(ret, remove_path) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: @@ -364,22 +357,18 @@ def collect_output(self, schema, builder, outdir): elif gb.startswith("/"): raise WorkflowException("glob patterns must not start with '/'") try: - r.extend([{"path": g, - "class": "File" if builder.fs_access.isfile(g) else "Directory", - "basename": os.path.basename(g), - "nameroot": os.path.splitext(os.path.basename(g))[0], - "nameext": os.path.splitext(os.path.basename(g))[1], - "hostfs": True} + r.extend([{"location": g, + "class": "File" if builder.fs_access.isfile(g) else "Directory"} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: - if files["class"] == "Directory": + if files["class"] == "Directory" and "listing" not in files: getListing(builder.fs_access, files) else: checksum = hashlib.sha1() - with builder.fs_access.open(files["path"], "rb") as f: + with builder.fs_access.open(files["location"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents @@ -430,14 +419,12 @@ def collect_output(self, schema, builder, outdir): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = builder.do_eval(sf, context=r) if isinstance(sfpath, basestring): - sfpath = revmap({"path": sfpath, "class": "File"}) + sfpath = revmap({"location": sfpath, "class": "File"}) else: - sfpath = {"path": substitute(primary["path"], sf), "class": "File", "hostfs": True} + sfpath = {"location": substitute(primary["location"], sf), "class": "File", "hostfs": True} for sfitem in aslist(sfpath): - if builder.fs_access.exists(sfitem["path"]): - sfitem["basename"] = os.path.basename(sfitem["path"]) - sfitem["nameroot"], sfitem["nameext"] = os.path.splitext(sfitem["basename"]) + if builder.fs_access.exists(sfitem["location"]): primary["secondaryFiles"].append(sfitem) if not r and optional: diff --git a/cwltool/main.py b/cwltool/main.py index f315d943b..6bcc50a2d 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -224,7 +224,7 @@ def __init__(self, option_strings, dest, nargs=None, **kwargs): def __call__(self, parser, namespace, values, option_string=None): # type: (argparse.ArgumentParser, argparse.Namespace, str, Any) -> None - setattr(namespace, self.dest, {"class": "File", "path": values}) + setattr(namespace, self.dest, {"class": "File", "location": "file://%s" % os.path.abspath(values)}) class DirectoryAction(argparse.Action): @@ -237,7 +237,7 @@ def __init__(self, option_strings, dest, nargs=None, **kwargs): def __call__(self, parser, namespace, values, option_string=None): # type: (argparse.ArgumentParser, argparse.Namespace, str, Any) -> None - setattr(namespace, self.dest, {"class": "Directory", "path": values}) + setattr(namespace, self.dest, {"class": "Directory", "location": "file://%s" % os.path.abspath(values)}) class FileAppendAction(argparse.Action): @@ -254,7 +254,7 @@ def __call__(self, parser, namespace, values, option_string=None): if not g: g = [] setattr(namespace, self.dest, g) - g.append({"class": "File", "path": values}) + g.append({"class": "File", "location": "file://%s" % os.path.abspath(values)}) def generate_parser(toolparser, tool, namemap): diff --git a/cwltool/process.py b/cwltool/process.py index 6ecf722a6..3a8ad3f5f 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -169,23 +169,18 @@ def adjustFilesWithSecondary(rec, op, primary=None): def getListing(fs_access, rec): if "listing" not in rec: listing = [] - path = rec["path"][7:] if rec["path"].startswith("file://") else rec["path"] - for ld in fs_access.listdir(path): - abspath = os.path.join(path, ld) - if fs_access.isdir(abspath): + loc = rec["location"] + for ld in fs_access.listdir(): + if fs_access.isdir(ld): ent = {"class": "Directory", - "path": abspath, - "id": "_dir:%i" % random.randint(1, 1000000000)} + "location": loc + "/" + os.path.basename(ld)} getListing(fs_access, ent) listing.append({"basename": os.path.basename(ld), "entry": ent}) else: listing.append({"basename": os.path.basename(ld), - "entry": {"class": "File", "path": abspath}}) + "entry": {"class": "File", "location": ld}}) rec["listing"] = listing - if "path" in rec: - del rec["path"] - rec["id"] = "_dir:%i" % random.randint(1, 1000000000) def stageFiles(pm, stageFunc): for f in pm.files(): @@ -561,7 +556,7 @@ def scandeps(base, doc, reffields, urlfields, loadref): if base != df: r.append({ "class": "File", - "path": df + "location": df }) base = df @@ -575,7 +570,7 @@ def scandeps(base, doc, reffields, urlfields, loadref): subid = urlparse.urljoin(base, u) deps = { "class": "File", - "path": subid + "location": subid } # type: Dict[str, Any] sf = scandeps(subid, sub, reffields, urlfields, loadref) if sf: @@ -585,7 +580,7 @@ def scandeps(base, doc, reffields, urlfields, loadref): for u in aslist(v): r.append({ "class": "File", - "path": urlparse.urljoin(base, u) + "location": urlparse.urljoin(base, u) }) else: r.extend(scandeps(base, v, reffields, urlfields, loadref)) diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index cd2d1e04b..fcc1c2c1f 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -13,7 +13,7 @@ def _abs(self, p): # type: (unicode) -> unicode return abspath(p, self.basedir) def glob(self, pattern): # type: (unicode) -> List[unicode] - return glob.glob(self._abs(pattern)) + return ["file://%s" % self._abs(l) for l in glob.glob(self._abs(pattern))] def open(self, fn, mode): # type: (unicode, str) -> BinaryIO return open(self._abs(fn), mode) @@ -28,4 +28,4 @@ def isdir(self, fn): return os.path.isdir(self._abs(fn)) def listdir(self, fn): - return os.listdir(self._abs(fn)) + return ["file://%s" % self._abs(l) for l in os.listdir(self._abs(fn))] diff --git a/tests/echo.cwl b/tests/echo.cwl index 6eb6a23a6..da6943328 100644 --- a/tests/echo.cwl +++ b/tests/echo.cwl @@ -1,4 +1,4 @@ -cwlVersion: cwl:draft-3 +cwlVersion: cwl:draft-4.dev2 class: CommandLineTool inputs: - id: inp From 049e6e624c2e986acb85c95b63414aea2e87733f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 10:21:36 -0400 Subject: [PATCH 12/38] Squashed 'cwltool/schemas/' changes from 8798dc8..b449b3d b449b3d location field is required on File and Directory. Rename conflicting basename to entryname on Dirent. git-subtree-dir: cwltool/schemas git-subtree-split: b449b3d97044d7d153c980a6965912d01990cdda --- draft-4/Process.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/draft-4/Process.yml b/draft-4/Process.yml index a7e25b7d5..37b7f74f4 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -64,7 +64,7 @@ $graph: _type: "@vocab" doc: Must be `File` to indicate this object describes a file. - name: location - type: string? + type: string doc: | A URI that identifies the file resource. This may be a relative reference, in which case it must be resolved using the base URI of the @@ -178,10 +178,10 @@ $graph: - name: Dirent type: record fields: - - name: basename + - name: entryname type: string jsonldPredicate: - "_id": cwl:name + "_id": cwl:entryname - name: entry type: [File, Directory] From 07d4505b5c3b7adc2081bada50efb758e3d51388 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 10:33:25 -0400 Subject: [PATCH 13/38] Unit tests pass --- cwltool/builder.py | 26 +------------------------- cwltool/main.py | 12 +++++++----- cwltool/pathmapper.py | 32 ++++++++++++++++++++++++++++---- cwltool/process.py | 7 ++----- tests/test_toolargparse.py | 4 ++-- 5 files changed, 40 insertions(+), 41 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index c950e39e4..6c68f648e 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -6,7 +6,7 @@ from typing import Any, Union, AnyStr, Callable from .errors import WorkflowException from .stdfsaccess import StdFsAccess -from .pathmapper import PathMapper +from .pathmapper import PathMapper, adjustFileObjs, adjustDirObjs CONTENT_LIMIT = 64 * 1024 @@ -17,30 +17,6 @@ def substitute(value, replace): # type: (str, str) -> str else: return value + replace -def adjustFileObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None - """Apply an update function to each File object in the object `rec`.""" - - if isinstance(rec, dict): - if rec.get("class") == "File": - op(rec) - for d in rec: - adjustFileObjs(rec[d], op) - if isinstance(rec, list): - for d in rec: - adjustFileObjs(d, op) - -def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None - """Apply an update function to each Directory object in the object `rec`.""" - - if isinstance(rec, dict): - if rec.get("class") == "Directory": - op(rec) - for d in rec: - adjustDirObjs(rec[d], op) - if isinstance(rec, list): - for d in rec: - adjustDirObjs(d, op) - class Builder(object): def __init__(self): # type: () -> None diff --git a/cwltool/main.py b/cwltool/main.py index 6bcc50a2d..2458df284 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -29,7 +29,7 @@ from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool -from .builder import adjustDirObjs +from .builder import adjustFileObjs, adjustDirObjs from .stdfsaccess import StdFsAccess _logger = logging.getLogger("cwltool") @@ -436,11 +436,13 @@ def loadref(b, u): base = "file://" + os.getcwd() else: raise Exception(u"Unknown relative_deps %s" % relative_deps) - def makeRelative(u): + def makeRelative(ob): + u = ob["location"] if ":" in u.split("/")[0] and not u.startswith("file://"): - return u - return os.path.relpath(u, base) - adjustFiles(deps, makeRelative) + pass + else: + ob["location"] = os.path.relpath(u, base) + adjustFileObjs(deps, makeRelative) stdout.write(json.dumps(deps, indent=4)) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 0696fda8a..d084c7157 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -18,6 +18,30 @@ def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None for d in rec: adjustFiles(d, op) +def adjustFileObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None + """Apply an update function to each File object in the object `rec`.""" + + if isinstance(rec, dict): + if rec.get("class") == "File": + op(rec) + for d in rec: + adjustFileObjs(rec[d], op) + if isinstance(rec, list): + for d in rec: + adjustFileObjs(d, op) + +def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None + """Apply an update function to each Directory object in the object `rec`.""" + + if isinstance(rec, dict): + if rec.get("class") == "Directory": + op(rec) + for d in rec: + adjustDirObjs(rec[d], op) + if isinstance(rec, list): + for d in rec: + adjustDirObjs(d, op) + def abspath(src, basedir): # type: (unicode, unicode) -> unicode if src.startswith(u"file://"): @@ -65,18 +89,18 @@ def visit(obj, base): visit(fob, stagedir) else: - def visit(path): + def visit(ob): + path = ob["location"] if path in self._pathmap: - return path + return ab = abspath(path, basedir) if self.scramble: tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) else: tgt = os.path.join(stagedir, os.path.basename(path)) self._pathmap[path] = (ab, tgt) - return path - adjustFiles(fob, visit) + adjustFileObjs(fob, visit) # Dereference symbolic links for path, (ab, tgt) in self._pathmap.items(): diff --git a/cwltool/process.py b/cwltool/process.py index 3a8ad3f5f..6ec764810 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -32,7 +32,7 @@ from .stdfsaccess import StdFsAccess from .builder import Builder, adjustFileObjs, adjustDirObjs from .errors import WorkflowException, UnsupportedRequirement -from .pathmapper import PathMapper, abspath, adjustFiles +from .pathmapper import PathMapper, abspath _logger = logging.getLogger("cwltool") @@ -221,10 +221,7 @@ def moveIt(src, dst): stageFiles(pm, moveIt) def _check_adjust(f): - if f["class"] == "Directory": - f["path"] = pm.mapper(f["id"])[1] - else: - f["path"] = pm.mapper(f["path"])[1] + f["location"] = "file://" + pm.mapper(f["location"])[1] return f adjustFileObjs(outputObj, _check_adjust) diff --git a/tests/test_toolargparse.py b/tests/test_toolargparse.py index 6c3e05680..dfa108318 100644 --- a/tests/test_toolargparse.py +++ b/tests/test_toolargparse.py @@ -8,7 +8,7 @@ class ToolArgparse(unittest.TestCase): script=''' #!/usr/bin/env cwl-runner -cwlVersion: "draft-3" +cwlVersion: "draft-4.dev2" class: CommandLineTool description: "This tool is developed for SMC-RNA Challenge for detecting gene fusions (STAR fusion)" inputs: @@ -28,7 +28,7 @@ class ToolArgparse(unittest.TestCase): script2=''' #!/usr/bin/env cwl-runner -cwlVersion: 'cwl:draft-3' +cwlVersion: 'cwl:draft-4.dev2' class: CommandLineTool inputs: - id: bdg From 680ad87705b761e449900c75563b79ad745ab228 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 11:53:01 -0400 Subject: [PATCH 14/38] Add updater to draft-4.dev3 --- cwltool/update.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/cwltool/update.py b/cwltool/update.py index 19a70a56b..c11c02d87 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -360,6 +360,25 @@ def draft4Dev1toDev2(doc, loader, baseuri): """Public updater for draft-4.dev1 to draft-4.dev2.""" return (_draft4Dev1toDev2(doc, loader, baseuri), "draft-4.dev2") + +def _draft4Dev2toDev3(doc, loader, baseuri): + # type: (Any, Loader, str) -> Any + if isinstance(doc, dict): + if "class" in doc and doc["class"] == "File": + doc["location"] = doc["path"] + del doc["path"] + for key, value in doc.items(): + doc[key] = _draft4Dev2toDev3(value, loader, baseuri) + elif isinstance(doc, list): + doc = [_draft4Dev2toDev3(item, loader, baseuri) for item in doc] + + return doc + +def draft4Dev2toDev3(doc, loader, baseuri): + # type: (Any, Loader, str) -> Tuple[Any, str] + """Public updater for draft-4.dev2 to draft-4.dev3.""" + return (_draft4Dev2toDev3(doc, loader, baseuri), "draft-4.dev3") + UPDATES = { "draft-2": draft2toDraft3dev1, "draft-3": draft3toDraft4dev1 @@ -372,13 +391,14 @@ def draft4Dev1toDev2(doc, loader, baseuri): "draft-3.dev4": draftDraft3dev4toDev5, "draft-3.dev5": draftDraft3dev5toFinal, "draft-4.dev1": draft4Dev1toDev2, - "draft-4.dev2": None + "draft-4.dev2": draft4Dev2toDev3, + "draft-4.dev3": None } # type: Dict[unicode, Callable[[Any, Loader, str], Tuple[Any, str]]] ALLUPDATES = UPDATES.copy() ALLUPDATES.update(DEVUPDATES) -LATEST = "draft-4.dev2" +LATEST = "draft-4.dev3" def identity(doc, loader, baseuri): # pylint: disable=unused-argument # type: (Any, Loader, str) -> Tuple[Any, Union[str, unicode]] From 7b42788f4f1a46381854c0797794398d72aba98d Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 11:53:08 -0400 Subject: [PATCH 15/38] Squashed 'cwltool/schemas/' changes from b449b3d..c55f733 c55f733 Add draft-4.dev3 git-subtree-dir: cwltool/schemas git-subtree-split: c55f733d182c1660d435fe91f61ce7f739ddc035 --- draft-4/Process.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 37b7f74f4..a01c24b6b 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -33,6 +33,7 @@ $graph: - cwl:draft-3 - cwl:draft-4.dev1 - cwl:draft-4.dev2 + - cwl:draft-4.dev3 - name: CWLType type: enum From 319ad2671a47d5479cc901229c3b788bb083fe05 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 14:09:46 -0400 Subject: [PATCH 16/38] cwltest support for "location" --- cwltool/cwltest.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index bef447fe9..ac6fba35b 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -28,20 +28,19 @@ def compare(a, b): # type: (Any, Any) -> bool try: if isinstance(a, dict): if a.get("class") == "File": - if not (b["path"].endswith("/" + a["path"]) or ("/" not in b["path"] and a["path"] == b["path"])): - raise CompareFail(u"%s does not end with %s" %(b["path"], a["path"])) + if "path" in b: + comp = "path" + else: + comp = "location" + if not (b[comp].endswith("/" + a[comp]) or ("/" not in b[comp] and a[comp] == b[comp])): + raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) # ignore empty collections b = {k: v for k, v in b.iteritems() if not isinstance(v, (list, dict)) or len(v) > 0} - if a.get("class") == "Directory": - for d in ("id", "path"): - if d in b: - del b[d] - pass if len(a) != len(b): raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) for c in a: - if a.get("class") != "File" or c != "path": + if a.get("class") != "File" or c not in ("path", "location"): if c not in b: raise CompareFail(u"%s not in %s" % (c, b)) if not compare(a[c], b[c]): From 71119ba070d5e1e41075fc5c87966f7bc5ab27ee Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 23 Jun 2016 22:17:51 -0400 Subject: [PATCH 17/38] path -> location fixes for File and Directory. Working on passing conformance tests. --- cwltool/builder.py | 6 +++--- cwltool/cwltest.py | 2 +- cwltool/draft2tool.py | 8 ++++---- cwltool/job.py | 4 ++-- cwltool/pathmapper.py | 8 ++++---- cwltool/process.py | 6 +++--- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index a04eb6227..a417eeedb 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -102,7 +102,7 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): if schema["type"] == "File": self.files.append(datum) if binding and binding.get("loadContents"): - with self.fs_access.open(datum["path"], "rb") as f: + with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: @@ -112,11 +112,11 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: secondary_eval = self.do_eval(sf, context=datum) if isinstance(secondary_eval, basestring): - sfpath = {"path": secondary_eval, "class": "File"} + sfpath = {"location": secondary_eval, "class": "File"} else: sfpath = secondary_eval else: - sfpath = {"path": substitute(datum["path"], sf), "class": "File"} + sfpath = {"location": substitute(datum["location"], sf), "class": "File"} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index 010989a86..cd7a1f4d6 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -32,7 +32,7 @@ def compare(a, b): # type: (Any, Any) -> bool comp = "path" else: comp = "location" - if not (b[comp].endswith("/" + a[comp]) or ("/" not in b[comp] and a[comp] == b[comp])): + if a[comp] and (not (b[comp].endswith("/" + a[comp]) or ("/" not in b[comp] and a[comp] == b[comp]))): raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) # ignore empty collections b = {k: v for k, v in b.iteritems() diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 900d87945..3e6b5d239 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -223,10 +223,6 @@ def rm_pending_output_callback(output_callback, jobcachepending, builder.pathmapper = None - if self.tool.get("stdin"): - j.stdin = builder.do_eval(self.tool["stdin"]) - reffiles.append({"class": "File", "path": j.stdin}) - if self.tool.get("stderr"): j.stderr = builder.do_eval(self.tool["stderr"]) if os.path.isabs(j.stderr) or ".." in j.stderr: @@ -258,6 +254,10 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] adjustDirObjs(builder.files, _check_adjust) adjustDirObjs(builder.bindings, _check_adjust) + if self.tool.get("stdin"): + j.stdin = builder.do_eval(self.tool["stdin"]) + reffiles.append({"class": "File", "path": j.stdin}) + _logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4)) dockerReq, _ = self.get_requirement("DockerRequirement") diff --git a/cwltool/job.py b/cwltool/job.py index 7ce12de74..59bc62a65 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -156,7 +156,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, self.name, self.outdir, " \\\n ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]), - u' < %s' % self.pathmapper.mapper(self.stdin)[1] if self.stdin else '', + u' < %s' % self.stdin if self.stdin else '', u' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '', u' 2> %s' % os.path.join(self.outdir, self.stderr) if self.stderr else '') @@ -181,7 +181,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, raise Exception("Unhandled type %s", type(entry)) if self.stdin: - stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") + stdin = open(self.pathmapper.reversemap(self.stdin)[1], "rb") else: stdin = subprocess.PIPE diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index d084c7157..a71f3a496 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -76,15 +76,15 @@ def setup(self, referenced_files, basedir): if fob["class"] == "Directory": def visit(obj, base): - self._pathmap[obj["id"]] = (obj["id"], base) + self._pathmap[obj["location"]] = (obj["location"], base) for ld in obj["listing"]: - tgt = os.path.join(base, ld["basename"]) + tgt = os.path.join(base, ld["entryname"]) if ld["entry"]["class"] == "Directory": visit(ld["entry"], tgt) - ab = ld["entry"]["id"] + ab = ld["entry"]["location"] self._pathmap[ab] = (ab, tgt) else: - ab = ld["entry"]["path"] + ab = ld["entry"]["location"] self._pathmap[ab] = (ab, tgt) visit(fob, stagedir) diff --git a/cwltool/process.py b/cwltool/process.py index 6ec764810..28552dd7c 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -170,15 +170,15 @@ def getListing(fs_access, rec): if "listing" not in rec: listing = [] loc = rec["location"] - for ld in fs_access.listdir(): + for ld in fs_access.listdir(loc): if fs_access.isdir(ld): ent = {"class": "Directory", "location": loc + "/" + os.path.basename(ld)} getListing(fs_access, ent) - listing.append({"basename": os.path.basename(ld), + listing.append({"entryname": os.path.basename(ld), "entry": ent}) else: - listing.append({"basename": os.path.basename(ld), + listing.append({"entryname": os.path.basename(ld), "entry": {"class": "File", "location": ld}}) rec["listing"] = listing From 25f2df23b3ffe23587aac9fee15c4eecfbd76bfe Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 24 Jun 2016 09:17:00 -0400 Subject: [PATCH 18/38] Fixing more path/location bugs and accomodations across versions. --- cwltool/cwltest.py | 6 +++++- cwltool/job.py | 4 ++-- cwltool/main.py | 16 ++++++++++++++++ cwltool/pathmapper.py | 24 +++++++++++++----------- cwltool/process.py | 41 ++++++++++++++++++++++------------------- cwltool/stdfsaccess.py | 2 +- 6 files changed, 59 insertions(+), 34 deletions(-) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index cd7a1f4d6..2e1c503a7 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -28,10 +28,14 @@ def compare(a, b): # type: (Any, Any) -> bool try: if isinstance(a, dict): if a.get("class") == "File": - if "path" in b: + if "path" in a: comp = "path" + if "location" in b: + del b["location"] else: comp = "location" + if "path" in b: + del b["path"] if a[comp] and (not (b[comp].endswith("/" + a[comp]) or ("/" not in b[comp] and a[comp] == b[comp]))): raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) # ignore empty collections diff --git a/cwltool/job.py b/cwltool/job.py index 59bc62a65..d3fe43085 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -81,7 +81,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, for f in self.pathmapper.files(): p = self.pathmapper.mapper(f) - if not p[0].startswith("_dir:") and not os.path.isfile(p[0]): + if p.type == "File" and not os.path.isfile(p[0]): raise WorkflowException(u"Input file %s (at %s) not found or is not a regular file." % (f, self.pathmapper.mapper(f)[0])) img_id = None @@ -97,7 +97,7 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, for src in self.pathmapper.files(): if not src.startswith("_dir:"): vol = self.pathmapper.mapper(src) - runtime.append(u"--volume=%s:%s:ro" % vol) + runtime.append(u"--volume=%s:%s:ro" % (vol.resolved, vol.target)) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) runtime.append(u"--workdir=%s" % ("/var/spool/cwl")) diff --git a/cwltool/main.py b/cwltool/main.py index 2458df284..c240ca658 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -337,6 +337,7 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, else: jobloaderctx = { u"path": {u"@type": u"@id"}, + u"location": {u"@type": u"@id"}, u"format": {u"@type": u"@id"}, u"id": u"@id"} jobloaderctx.update(t.metadata.get("$namespaces", {})) @@ -405,6 +406,13 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, basedir=u"file://%s/" % input_basedir) return 0 + def pathToLoc(p): + if "location" not in p: + p["location"] = p["path"] + del p["path"] + + adjustDirObjs(job_order_object, pathToLoc) + adjustFileObjs(job_order_object, pathToLoc) adjustDirObjs(job_order_object, functools.partial(getListing, StdFsAccess(input_basedir))) if "cwl:tool" in job_order_object: @@ -680,8 +688,16 @@ def main(argsl=None, makeTool=makeTool, select_resources=selectResources, **vars(args)) + # This is the workflow output, it needs to be written if out is not None: + def locToPath(p): + if p["location"].startswith("file://"): + p["path"] = p["location"][7:] + + adjustDirObjs(out, locToPath) + adjustFileObjs(out, locToPath) + if isinstance(out, basestring): stdout.write(out) else: diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index a71f3a496..3230e08e1 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -2,10 +2,13 @@ import random import logging import stat +import collections from typing import Tuple, Set, Union, Any _logger = logging.getLogger("cwltool") +MapperEnt = collections.namedtuple("MapperEnt", ("resolved", "target", "type")) + def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None """Apply a mapping function to each File path in the object `rec`.""" @@ -76,16 +79,15 @@ def setup(self, referenced_files, basedir): if fob["class"] == "Directory": def visit(obj, base): - self._pathmap[obj["location"]] = (obj["location"], base) + self._pathmap[obj["location"]] = MapperEnt(obj["location"], base, "Directory") for ld in obj["listing"]: tgt = os.path.join(base, ld["entryname"]) if ld["entry"]["class"] == "Directory": visit(ld["entry"], tgt) - ab = ld["entry"]["location"] - self._pathmap[ab] = (ab, tgt) - else: - ab = ld["entry"]["location"] - self._pathmap[ab] = (ab, tgt) + ab = ld["entry"]["location"] + if ab.startswith("file://"): + ab = ab[7:] + self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) visit(fob, stagedir) else: @@ -98,13 +100,13 @@ def visit(ob): tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) else: tgt = os.path.join(stagedir, os.path.basename(path)) - self._pathmap[path] = (ab, tgt) + self._pathmap[path] = MapperEnt(ab, tgt, "File") adjustFileObjs(fob, visit) # Dereference symbolic links - for path, (ab, tgt) in self._pathmap.items(): - if ab.startswith("_dir:"):# or not os.path.exists(ab): + for path, (ab, tgt, type) in self._pathmap.items(): + if type == "Directory": # or not os.path.exists(ab): continue deref = ab st = os.lstat(deref) @@ -114,13 +116,13 @@ def visit(ob): os.path.dirname(deref), rl) st = os.lstat(deref) - self._pathmap[path] = (deref, tgt) + self._pathmap[path] = MapperEnt(deref, tgt, "File") def mapper(self, src): # type: (unicode) -> Tuple[unicode, unicode] if u"#" in src: i = src.index(u"#") p = self._pathmap[src[:i]] - return (p[0], p[1] + src[i:]) + return (p.resolved, p.target + src[i:]) else: return self._pathmap[src] diff --git a/cwltool/process.py b/cwltool/process.py index 28552dd7c..95b5a3ecf 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -115,24 +115,13 @@ def get_schema(version): SCHEMA_CACHE[version] = schema_salad.schema.load_schema( "https://w3id.org/cwl/CommonWorkflowLanguage.yml", cache=cache) - global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY # pylint: disable=global-statement - SCHEMA_ANY = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/salad#Any"]) - SCHEMA_FILE = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#File"]) - if version in ("draft-4"): - SCHEMA_DIR = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Directory"]) - SCHEMA_DIRENT = cast(Dict[unicode, Any], - SCHEMA_CACHE[version][3].idx["https://w3id.org/cwl/cwl#Dirent"]) - return SCHEMA_CACHE[version] def shortname(inputid): # type: (unicode) -> unicode d = urlparse.urlparse(inputid) if d.fragment: - return d.fragment.split(u"/")[-1].split(u".")[-1] + return d.fragment.split(u"/")[-1] else: return d.path.split(u"/")[-1] @@ -171,9 +160,10 @@ def getListing(fs_access, rec): listing = [] loc = rec["location"] for ld in fs_access.listdir(loc): + print loc, ld if fs_access.isdir(ld): ent = {"class": "Directory", - "location": loc + "/" + os.path.basename(ld)} + "location": ld} getListing(fs_access, ent) listing.append({"entryname": os.path.basename(ld), "entry": ent}) @@ -185,10 +175,10 @@ def getListing(fs_access, rec): def stageFiles(pm, stageFunc): for f in pm.files(): p = pm.mapper(f) - if not os.path.exists(os.path.dirname(p[1])): - os.makedirs(os.path.dirname(p[1]), 0755) - if not p[0].startswith("_dir:"): - stageFunc(p[0], p[1]) + if not os.path.exists(os.path.dirname(p.target)): + os.makedirs(os.path.dirname(p.target), 0755) + if p.type == "File": + stageFunc(p.resolved, p.target) def collectFilesAndDirs(obj, out): if isinstance(obj, dict): @@ -300,8 +290,21 @@ def __init__(self, toolpath_object, **kwargs): # type: (Dict[unicode, Any], **Any) -> None self.metadata = kwargs.get("metadata", {}) # type: Dict[str,Any] self.names = None # type: avro.schema.Names - names = schema_salad.schema.make_avro_schema( - filter(lambda x: x is not None, [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY]), schema_salad.ref_resolver.Loader({}))[0] + + if SCHEMA_FILE is None: + global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY # pylint: disable=global-statement + get_schema("draft-4") + SCHEMA_ANY = cast(Dict[unicode, Any], + SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/salad#Any"]) + SCHEMA_FILE = cast(Dict[unicode, Any], + SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#File"]) + SCHEMA_DIR = cast(Dict[unicode, Any], + SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Directory"]) + SCHEMA_DIRENT = cast(Dict[unicode, Any], + SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Dirent"]) + + names = schema_salad.schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY], + schema_salad.ref_resolver.Loader({}))[0] if isinstance(names, avro.schema.SchemaParseException): raise names else: diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index fcc1c2c1f..cfdcb657b 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -28,4 +28,4 @@ def isdir(self, fn): return os.path.isdir(self._abs(fn)) def listdir(self, fn): - return ["file://%s" % self._abs(l) for l in os.listdir(self._abs(fn))] + return [abspath(l, fn) for l in os.listdir(self._abs(fn))] From 8ccd28c65a6827b10f77fc0c95c926766982304b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 24 Jun 2016 17:36:07 -0400 Subject: [PATCH 19/38] Add fixup to updater for expressions in secondaryFiles. --- cwltool/draft2tool.py | 5 +++-- cwltool/update.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 3e6b5d239..ced437773 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -325,6 +325,7 @@ def collect_output_ports(self, ports, builder, outdir): try: ret[fragment] = self.collect_output(port, builder, outdir) except Exception as e: + _logger.debug(u"Error collecting output for parameter '%s'" % shortname(port["id"]), exc_info=e) raise WorkflowException(u"Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e)) if ret: adjustFileObjs(ret, remove_path) @@ -417,11 +418,11 @@ def collect_output(self, schema, builder, outdir): primary["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: - sfpath = builder.do_eval(sf, context=r) + sfpath = builder.do_eval(sf, context=primary) if isinstance(sfpath, basestring): sfpath = revmap({"location": sfpath, "class": "File"}) else: - sfpath = {"location": substitute(primary["location"], sf), "class": "File", "hostfs": True} + sfpath = {"location": substitute(primary["location"], sf), "class": "File"} for sfitem in aslist(sfpath): if builder.fs_access.exists(sfitem["location"]): diff --git a/cwltool/update.py b/cwltool/update.py index c11c02d87..bfed3c7f6 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -367,6 +367,10 @@ def _draft4Dev2toDev3(doc, loader, baseuri): if "class" in doc and doc["class"] == "File": doc["location"] = doc["path"] del doc["path"] + if "secondaryFiles" in doc: + for i, sf in enumerate(doc["secondaryFiles"]): + if "$(" in sf or "${" in sf: + doc["secondaryFiles"][i] = sf.replace('"path"', '"location"').replace(".path", ".location") for key, value in doc.items(): doc[key] = _draft4Dev2toDev3(value, loader, baseuri) elif isinstance(doc, list): From 21081cca069965de151ceedb3fa1add2eb5fb220 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 24 Jun 2016 17:51:24 -0400 Subject: [PATCH 20/38] Fix directory handling for location and MapperEnt changes. Fix cwltest to filter path/location after checking. --- cwltool/cwltest.py | 10 ++++++---- cwltool/job.py | 4 ++-- cwltool/process.py | 1 - 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index 2e1c503a7..d4760d1ef 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -30,17 +30,19 @@ def compare(a, b): # type: (Any, Any) -> bool if a.get("class") == "File": if "path" in a: comp = "path" - if "location" in b: - del b["location"] else: comp = "location" - if "path" in b: - del b["path"] if a[comp] and (not (b[comp].endswith("/" + a[comp]) or ("/" not in b[comp] and a[comp] == b[comp]))): raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) # ignore empty collections b = {k: v for k, v in b.iteritems() if not isinstance(v, (list, dict)) or len(v) > 0} + + a = {k: v for k, v in a.iteritems() + if k not in ("path", "location")} + b = {k: v for k, v in b.iteritems() + if k not in ("path", "location")} + if len(a) != len(b): raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) for c in a: diff --git a/cwltool/job.py b/cwltool/job.py index d3fe43085..63a6fb773 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -95,8 +95,8 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): - if not src.startswith("_dir:"): - vol = self.pathmapper.mapper(src) + vol = self.pathmapper.mapper(src) + if vol.type == "File": runtime.append(u"--volume=%s:%s:ro" % (vol.resolved, vol.target)) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) diff --git a/cwltool/process.py b/cwltool/process.py index 95b5a3ecf..42ab22048 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -160,7 +160,6 @@ def getListing(fs_access, rec): listing = [] loc = rec["location"] for ld in fs_access.listdir(loc): - print loc, ld if fs_access.isdir(ld): ent = {"class": "Directory", "location": ld} From f8a4fbe8aed11873a87f865f0e7815e407cd8663 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Sun, 26 Jun 2016 21:32:47 -0400 Subject: [PATCH 21/38] Update updater so draft-2 still works. --- cwltool/update.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cwltool/update.py b/cwltool/update.py index bfed3c7f6..9fa4cfd6a 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -324,11 +324,27 @@ def _draft3toDraft4dev1(doc, loader, baseuri): # type: (Any, Loader, str) -> Any if isinstance(doc, dict): if "class" in doc and doc["class"] == "Workflow": + def fixup(f): + doc, frg = urlparse.urldefrag(f) + frg = '/'.join(frg.rsplit('.', 1)) + return doc + "#" + frg + for step in doc["steps"]: step["in"] = step["inputs"] step["out"] = step["outputs"] del step["inputs"] del step["outputs"] + for io in ("in", "out"): + for i in step[io]: + i["id"] = fixup(i["id"]) + if "source" in i: + i["source"] = [fixup(s) for s in aslist(i["source"])] + if len(i["source"]) == 1: + i["source"] = i["source"][0] + if "scatter" in step: + step["scatter"] = [fixup(s) for s in aslist(step["scatter"])] + for out in doc["outputs"]: + out["source"] = fixup(out["source"]) for key, value in doc.items(): doc[key] = _draft3toDraft4dev1(value, loader, baseuri) elif isinstance(doc, list): From a14867cd12203bb090d12e8f1dff0f4150d05a81 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 27 Jun 2016 08:49:06 -0400 Subject: [PATCH 22/38] Check for illegal characters in input file name. --- cwltool/draft2tool.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index ced437773..f362ce85e 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -26,6 +26,7 @@ from .pathmapper import PathMapper from .job import CommandLineJob +WHITELIST_RE = re.compile(r"^[a-zA-Z0-9._-]+$") from .flatten import flatten @@ -239,12 +240,12 @@ def rm_pending_output_callback(output_callback, jobcachepending, # map files to assigned path inside a container. We need to also explicitly # walk over input as implicit reassignment doesn't reach everything in builder.bindings def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] - if not f.get("containerfs"): - f["path"] = builder.pathmapper.mapper(f["location"])[1] - if f["class"] == "File": - # XXX should only add if >= draft-4 - f["dirname"], f["basename"] = os.path.split(f["path"]) - f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) + f["path"] = builder.pathmapper.mapper(f["location"])[1] + f["dirname"], f["basename"] = os.path.split(f["path"]) + if f["class"] == "File": + f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) + if not WHITELIST_RE.match(f["basename"]): + raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"])) return f _logger.debug(u"[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4)) From 6c174f0e1dfd18c18e58b9020f2048352ae2eaae Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 27 Jun 2016 17:50:43 -0400 Subject: [PATCH 23/38] Squashed 'cwltool/schemas/' changes from 41c119b..4efa80b 4efa80b Allow "Directory" in secondaryFiles to describe structured dependencies. 524d2c0 Merge branch 'master' into directory ff5ddf3 Bring up to date with draft-4 changes. 32c4a37 Updating conformance tests to draft-4.dev3, path -> location c55f733 Add draft-4.dev3 b449b3d location field is required on File and Directory. Rename conflicting basename to entryname on Dirent. 8798dc8 Merge branch 'master' into directory 2d7ca73 Adjust Directory to also use a `location` field to identify it. 45af7a3 Add location, update text for dirname, basename, nameroot, nameext. 227a981 Add basename, nameroot, nameext to conformance tests. 832b52b Add initial conformance tests for input and output of directory objects. 5ad6a16 Allow expressions in `arguments` a2263a9 Make path/listing optional (sort of). de7cb76 Add listing to directory type git-subtree-dir: cwltool/schemas git-subtree-split: 4efa80be542b42c1b4b0c7d824c4e4a2df25e579 --- draft-3/examples/arguments.cwl | 1 + draft-4/CommandLineTool.yml | 2 +- draft-4/Process.yml | 122 +++++++++++++-- draft-4/conformance_test_draft-4.yaml | 170 ++++++++++++++------- draft-4/draft-4/binding-test.cwl | 4 +- draft-4/draft-4/bwa-mem-job.json | 6 +- draft-4/draft-4/bwa-mem-tool.cwl | 4 +- draft-4/draft-4/cat-job.json | 2 +- draft-4/draft-4/cat-n-job.json | 2 +- draft-4/draft-4/cat1-testcli.cwl | 4 +- draft-4/draft-4/cat1-tool.cwl | 2 +- draft-4/draft-4/cat2-tool.cwl | 2 +- draft-4/draft-4/cat3-tool-mediumcut.cwl | 2 +- draft-4/draft-4/cat3-tool-shortcut.cwl | 2 +- draft-4/draft-4/cat3-tool.cwl | 2 +- draft-4/draft-4/cat4-tool.cwl | 2 +- draft-4/draft-4/cat5-tool.cwl | 2 +- draft-4/draft-4/conflict-wf.cwl | 2 +- draft-4/draft-4/count-lines1-wf.cwl | 2 +- draft-4/draft-4/count-lines2-wf.cwl | 2 +- draft-4/draft-4/count-lines3-job.json | 4 +- draft-4/draft-4/count-lines3-wf.cwl | 2 +- draft-4/draft-4/count-lines4-job.json | 4 +- draft-4/draft-4/count-lines4-wf.cwl | 2 +- draft-4/draft-4/count-lines5-wf.cwl | 4 +- draft-4/draft-4/count-lines6-job.json | 8 +- draft-4/draft-4/count-lines6-wf.cwl | 2 +- draft-4/draft-4/count-lines7-wf.cwl | 2 +- draft-4/draft-4/count-lines8-wf.cwl | 2 +- draft-4/draft-4/count-lines9-wf.cwl | 4 +- draft-4/draft-4/dir-job.yml | 3 + draft-4/draft-4/dir.cwl | 18 +++ draft-4/draft-4/dir2.cwl | 20 +++ draft-4/draft-4/dir3-job.yml | 3 + draft-4/draft-4/dir3.cwl | 13 ++ draft-4/draft-4/echo-tool.cwl | 2 +- draft-4/draft-4/egrep-stderr-mediumcut.cwl | 2 +- draft-4/draft-4/egrep-stderr-shortcut.cwl | 2 +- draft-4/draft-4/egrep-stderr.cwl | 2 +- draft-4/draft-4/env-tool1.cwl | 2 +- draft-4/draft-4/env-tool2.cwl | 2 +- draft-4/draft-4/env-wf1.cwl | 2 +- draft-4/draft-4/env-wf2.cwl | 2 +- draft-4/draft-4/formattest-job.json | 2 +- draft-4/draft-4/formattest.cwl | 2 +- draft-4/draft-4/formattest2-job.json | 2 +- draft-4/draft-4/formattest2.cwl | 2 +- draft-4/draft-4/formattest3.cwl | 2 +- draft-4/draft-4/glob-expr-list.cwl | 2 +- draft-4/draft-4/metadata.cwl | 2 +- draft-4/draft-4/null-expression1-tool.cwl | 2 +- draft-4/draft-4/null-expression2-tool.cwl | 2 +- draft-4/draft-4/optional-output.cwl | 2 +- draft-4/draft-4/params.cwl | 2 +- draft-4/draft-4/params2.cwl | 2 +- draft-4/draft-4/parseInt-job.json | 2 +- draft-4/draft-4/parseInt-tool.cwl | 2 +- draft-4/draft-4/record-output-job.json | 4 +- draft-4/draft-4/record-output.cwl | 2 +- draft-4/draft-4/rename-job.json | 2 +- draft-4/draft-4/rename.cwl | 2 +- draft-4/draft-4/revsort-job.json | 2 +- draft-4/draft-4/revsort.cwl | 2 +- draft-4/draft-4/revtool.cwl | 2 +- draft-4/draft-4/scatter-valuefrom-wf1.cwl | 2 +- draft-4/draft-4/scatter-valuefrom-wf2.cwl | 2 +- draft-4/draft-4/scatter-valuefrom-wf3.cwl | 2 +- draft-4/draft-4/scatter-valuefrom-wf4.cwl | 2 +- draft-4/draft-4/scatter-wf1.cwl | 2 +- draft-4/draft-4/scatter-wf2.cwl | 2 +- draft-4/draft-4/scatter-wf3.cwl | 2 +- draft-4/draft-4/scatter-wf4.cwl | 2 +- draft-4/draft-4/schemadef-tool.cwl | 2 +- draft-4/draft-4/schemadef-wf.cwl | 2 +- draft-4/draft-4/search-job.json | 2 +- draft-4/draft-4/search.cwl | 18 +-- draft-4/draft-4/shelltest.cwl | 2 +- draft-4/draft-4/sorttool.cwl | 2 +- draft-4/draft-4/step-valuefrom-wf.cwl | 2 +- draft-4/draft-4/step-valuefrom-wf.json | 2 +- draft-4/draft-4/step-valuefrom2-wf.cwl | 2 +- draft-4/draft-4/step-valuefrom3-wf.cwl | 2 +- draft-4/draft-4/template-tool.cwl | 2 +- draft-4/draft-4/test-cwl-out.cwl | 2 +- draft-4/draft-4/tmap-job.json | 2 +- draft-4/draft-4/tmap-tool.cwl | 4 +- draft-4/draft-4/wc-job.json | 2 +- draft-4/draft-4/wc-tool.cwl | 2 +- draft-4/draft-4/wc2-tool.cwl | 2 +- draft-4/draft-4/wc3-tool.cwl | 2 +- draft-4/draft-4/wc4-tool.cwl | 2 +- 91 files changed, 382 insertions(+), 178 deletions(-) create mode 100644 draft-4/draft-4/dir-job.yml create mode 100644 draft-4/draft-4/dir.cwl create mode 100644 draft-4/draft-4/dir2.cwl create mode 100644 draft-4/draft-4/dir3-job.yml create mode 100644 draft-4/draft-4/dir3.cwl diff --git a/draft-3/examples/arguments.cwl b/draft-3/examples/arguments.cwl index 862540be2..23e91ebdd 100644 --- a/draft-3/examples/arguments.cwl +++ b/draft-3/examples/arguments.cwl @@ -1,5 +1,6 @@ cwlVersion: cwl:draft-3 class: CommandLineTool +label: Example trivial wrapper for Java 7 compiler baseCommand: javac hints: - class: DockerRequirement diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index cec579407..bedb5fc28 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -499,7 +499,7 @@ $graph: type: - "null" - type: array - items: [string, "#CommandLineBinding"] + items: [string, "#Expression", "#CommandLineBinding"] jsonldPredicate: "_id": "cwl:arguments" "_container": "@list" diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 319ff27f0..908c65a35 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -33,15 +33,18 @@ $graph: - cwl:draft-3 - cwl:draft-4.dev1 - cwl:draft-4.dev2 + - cwl:draft-4.dev3 - name: CWLType type: enum extends: "sld:PrimitiveType" symbols: - cwl:File + - cwl:Directory doc: - "Extends primitive types with the concept of a file as a first class type." - "File: A File object" + - "Directory: A Directory object" - name: File type: record @@ -58,20 +61,82 @@ $graph: symbols: - cwl:File jsonldPredicate: - "_id": "@type" - "_type": "@vocab" + _id: "@type" + _type: "@vocab" doc: Must be `File` to indicate this object describes a file. - - name: path + - name: location type: string - doc: The path to the file. + doc: | + A URI that identifies the file resource. This may be a relative + reference, in which case it must be resolved using the base URI of the + document. The location may refer to a local or remote resource; the + implementation must use the URI to retrieve file content. If an + implementation is unable to retrieve the file content stored at a + remote resource (due to unsupported protocol, access denied, or other + issue) it must signal an error. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local path where the File is made available prior to executing a + CommandLineTool. This field must not be used in any other context. The + command line tool being executed must be able to to access the file at + `path` using the POSIX `open(2)` syscall. jsonldPredicate: "_id": "cwl:path" "_type": "@id" + - name: basename + type: string? + doc: | + The base name of the file, that is, the path component following the + final slash in the path. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: dirname + type: string? + doc: | + The name of the directory containing file, that is, the path leading up + to the final slash in the path such that `dirname + '/' + basename = + path`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameroot + type: string? + doc: | + The basename root such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have a nameroot of `.cshrc`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameext + type: string? + doc: | + The basename extension such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have an empty `nameext`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. - name: checksum type: ["null", string] doc: | Optional hash code for validating file integrity. Currently must be in the form - "sha1$ + hexidecimal string" using the SHA-1 algorithm. + "sha1$ + hexadecimal string" using the SHA-1 algorithm. - name: size type: ["null", long] doc: Optional file size. @@ -80,6 +145,7 @@ $graph: - "null" - type: array items: "#File" + - Directory jsonldPredicate: "cwl:secondaryFiles" doc: | A list of additional files that are associated with the primary file @@ -111,12 +177,23 @@ $graph: runtime may perform exact file format matches. +- name: Dirent + type: record + fields: + - name: entryname + type: string + jsonldPredicate: + "_id": cwl:entryname + - name: entry + type: [File, Directory] + + - name: Directory type: record docParent: "#CWLType" doc: | Represents a directory to present to a command line tool. This could be a virtual - directory, made of files assembled from a number of concrete directories. + directory, made of files assembled from multiple locations. fields: - name: class type: @@ -125,18 +202,35 @@ $graph: symbols: - cwl:Directory jsonldPredicate: - "_id": "@type" - "_type": "@vocab" + _id: "@type" + _type: "@vocab" doc: Must be `Directory` to indicate this object describes a Directory. - - name: path + - name: location type: string + doc: | + A URI that identifies the directory resource. This may be a relative + reference, in which case it must be resolved using the base URI of the + document. The location may refer to a local or remote resource. If + the `listing` field is not set, the implementation must use the URI to + retrieve directory listing. If an implementation is unable to retrieve + the file content stored at a remote resource (due to unsupported + protocol, access denied, or other issue) it must signal an error. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? doc: The path to the directory. jsonldPredicate: - "_id": "cwl:path" - "_type": "@id" - # - name: size - # type: ["null", long] - # doc: Optional directory size. + _id: "cwl:path" + _type: "@id" + - name: listing + type: Dirent[]? + doc: List of files or subdirectories contained in this directory + jsonldPredicate: + _id: "cwl:listing" + mapSubject: basename + mapPredicate: entry - name: SchemaBase type: record diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index ae839ba0a..78f05e1f6 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -40,7 +40,7 @@ "foo": { "checksum": "sha1$63da67422622fbf9251a046d7a34b7ea0fd4fead", "class": "File", - "path": "foo.txt", + "location": "foo.txt", "size": 22 } job: draft-4/cat-job.json @@ -52,54 +52,60 @@ output_file: class: File checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b - path: output.txt + location: output.txt size: 13 tool: draft-4/cat3-tool.cwl doc: Test command execution in Docker with stdout redirection - job: draft-4/cat-job.json tool: draft-4/cat3-tool-shortcut.cwl - doc: Test command execution in Docker with stdout redirection + output: + output_file: + class: File + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b + location: + size: 13 + doc: Test command execution in Docker with simplified syntax stdout redirection - job: draft-4/cat-job.json output: output_file: class: File checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b - path: cat-out + location: cat-out size: 13 tool: draft-4/cat3-tool-mediumcut.cwl doc: Test command execution in Docker with stdout redirection -- args: [egrep] - stderr: error.txt - job: - tool: draft-4/egrep-stderr.cwl - doc: Test command line with stderr redirection - -- args: [egrep] - job: - tool: draft-4/egrep-stderr-shortcut.cwl - doc: Test command line with stderr redirection, brief syntax - -- args: [egrep] - stderr: std.err - output: - output_file: - class: File - size: 84 - checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 - path: std.err - job: - tool: draft-4/egrep-stderr-mediumcut.cwl - doc: Test command line with stderr redirection, named brief syntax +# - args: [egrep] +# stderr: error.txt +# job: +# tool: draft-4/egrep-stderr.cwl +# doc: Test command line with stderr redirection + +# - args: [egrep] +# job: +# tool: draft-4/egrep-stderr-shortcut.cwl +# doc: Test command line with stderr redirection, brief syntax + +# - args: [egrep] +# stderr: std.err +# output: +# output_file: +# class: File +# size: 84 +# checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 +# location: std.err +# job: +# tool: draft-4/egrep-stderr-mediumcut.cwl +# doc: Test command line with stderr redirection, named brief syntax - job: draft-4/cat-job.json output: output_txt: class: File checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b - path: output.txt + location: output.txt size: 13 tool: draft-4/cat4-tool.cwl doc: Test command execution in Docker with stdin and stdout redirection @@ -144,7 +150,7 @@ output: class: File checksum: sha1$631bfbac524e2d04cdcc5ec33ade827fc10b06ae - path: output + location: output size: 15 tool: draft-4/wc-tool.cwl doc: Test command execution in with stdin and stdout redirection @@ -214,7 +220,7 @@ out: class: File checksum: sha1$b3ec4ed1749c207e52b3a6d08c59f31d83bff519 - path: out + location: out size: 15 tool: draft-4/env-tool1.cwl doc: Test EnvVarRequirement @@ -259,7 +265,7 @@ out: class: File checksum: sha1$b3ec4ed1749c207e52b3a6d08c59f31d83bff519 - path: out + location: out size: 15 tool: draft-4/env-wf1.cwl doc: Test requirement priority @@ -269,7 +275,7 @@ out: class: File checksum: sha1$cdc1e84968261d6a7575b5305945471f8be199b6 - path: out + location: out size: 9 tool: draft-4/env-wf2.cwl doc: Test requirements override hints @@ -284,7 +290,7 @@ output: class: File checksum: sha1$b9214658cc453331b62c2282b772a5c063dbd284 - path: output.txt + location: output.txt size: 1111 tool: draft-4/revsort.cwl doc: Test sample workflows from the specification @@ -294,7 +300,7 @@ output_file: class: File checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b - path: output.txt + location: output.txt size: 13 tool: draft-4/cat5-tool.cwl doc: Test unknown hints are ignored. @@ -304,32 +310,32 @@ outfile: class: File checksum: sha1$e2dc9daaef945ac15f01c238ed2f1660f60909a0 - path: result.txt + location: result.txt size: 142 indexedfile: { - "path": "input.txt", + "location": "input.txt", "class": "File", "checksum": "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376", "secondaryFiles": [ { - "path": "input.txt.idx1", - "class": "File" + "location": "input.txt.idx1", + "class": "File", }, { - "path": "input.idx2", - "class": "File" + "location": "input.idx2", + "class": "File", }, { - "path": "input.txt.idx3", - "class": "File" + "location": "input.txt.idx3", + "class": "File", }, { - "path": "input.txt.idx4", - "class": "File" + "location": "input.txt.idx4", + "class": "File", }, { - "path": "input.txt.idx5", - "class": "File" + "location": "input.txt.idx5", + "class": "File", } ], "size": 1111 @@ -344,7 +350,7 @@ outfile: class: File checksum: sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376 - path: fish.txt + location: fish.txt size: 1111 tool: draft-4/rename.cwl doc: | @@ -360,7 +366,7 @@ - job: draft-4/schemadef-job.json output: output: - path: output.txt + location: output.txt size: 12 class: File checksum: "sha1$f12e6cfe70f3253f70b0dbde17c692e7fb0f1e5e" @@ -371,7 +377,7 @@ - job: draft-4/schemadef-job.json output: output: - path: output.txt + location: output.txt size: 12 class: File checksum: "sha1$f12e6cfe70f3253f70b0dbde17c692e7fb0f1e5e" @@ -537,7 +543,7 @@ - job: draft-4/formattest-job.json output: output: - "path": "output.txt" + "location": "output.txt" "format": "http://edamontology.org/format_2330" "size": 1111 "class": "File" @@ -549,7 +555,7 @@ - job: draft-4/formattest2-job.json output: output: - "path": "output.txt" + "location": "output.txt" "format": "http://edamontology.org/format_1929" "size": 12010 "class": "File" @@ -561,7 +567,7 @@ - job: draft-4/formattest2-job.json output: output: - "path": "output.txt" + "location": "output.txt" "format": "http://edamontology.org/format_1929" "size": 12010 "class": "File" @@ -575,7 +581,7 @@ output: optional_file: null output_file: - path: output.txt + location: output.txt size: 13 class: "File" checksum: "sha1$47a013e660d408619d894b20806b1d5086aab03b" @@ -601,13 +607,13 @@ output: "orec": { "ofoo": { - "path": "foo", + "location": "foo", "size": 1111, "class": "File", "checksum": "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376" }, "obar": { - "path": "bar", + "location": "bar", "size": 12010, "class": "File", "checksum": "sha1$aeb3d11bdf536511649129f4077d5cda6a324118" @@ -619,7 +625,7 @@ - job: draft-4/empty.json output: { "foo": { - "path": "foo", + "location": "foo", "class": "File" } } @@ -629,19 +635,19 @@ - job: draft-4/abc.json output: files: [{ - "path": "a", + "location": "a", "size": 0, "class": "File", "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709" }, { - "path": "b", + "location": "b", "size": 0, "class": "File", "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709" }, { - "path": "c", + "location": "c", "size": 0, "class": "File", "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709" @@ -676,7 +682,7 @@ - job: draft-4/conflict-job.json output: { "fileout": { - "path": "out.txt", + "location": "out.txt", "checksum": "sha1$a2d8d6e7b28295dc9977dc3bdb652ddd480995f0", "class": "File", "size": 25 @@ -684,3 +690,49 @@ } tool: "draft-4/conflict-wf.cwl#collision" doc: Test workflow two input files with same name. + +- job: draft-4/dir-job.yml + output: + "outlist": { + "size": 20, + "location": "output.txt", + "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", + "class": "File" + } + tool: draft-4/dir.cwl + doc: Test directory input + +- job: draft-4/dir-job.yml + output: + "outlist": { + "size": 20, + "location": "output.txt", + "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", + "class": "File" + } + tool: draft-4/dir2.cwl + doc: Test directory input in Docker + +- job: draft-4/dir3-job.yml + output: + "outdir": { + "class": "Directory", + "listing": [ + { + "entryname": "hello.txt", + "entry": { + "class": "File", + "location": "hello.txt" + } + }, + { + "entryname": "goodbye.txt", + "entry": { + "class": "File", + "location": "goodbye.txt" + } + } + ], + } + tool: draft-4/dir3.cwl + doc: Test directory input in Docker \ No newline at end of file diff --git a/draft-4/draft-4/binding-test.cwl b/draft-4/draft-4/binding-test.cwl index 7e6c3c826..395c50a92 100755 --- a/draft-4/draft-4/binding-test.cwl +++ b/draft-4/draft-4/binding-test.cwl @@ -1,7 +1,7 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: - id: reference @@ -19,7 +19,7 @@ inputs: type: File default: class: File - path: args.py + location: args.py inputBinding: position: -1 diff --git a/draft-4/draft-4/bwa-mem-job.json b/draft-4/draft-4/bwa-mem-job.json index 48d7d6cfe..f3e900dea 100644 --- a/draft-4/draft-4/bwa-mem-job.json +++ b/draft-4/draft-4/bwa-mem-job.json @@ -1,18 +1,18 @@ { "reference": { "class": "File", - "path": "chr20.fa", + "location": "chr20.fa", "size": 123, "checksum": "sha1$hash" }, "reads": [ { "class": "File", - "path": "example_human_Illumina.pe_1.fastq" + "location": "example_human_Illumina.pe_1.fastq" }, { "class": "File", - "path": "example_human_Illumina.pe_2.fastq" + "location": "example_human_Illumina.pe_2.fastq" } ], "min_std_max_min": [ diff --git a/draft-4/draft-4/bwa-mem-tool.cwl b/draft-4/draft-4/bwa-mem-tool.cwl index b0722effa..63ecb885e 100755 --- a/draft-4/draft-4/bwa-mem-tool.cwl +++ b/draft-4/draft-4/bwa-mem-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner -cwlVersion: draft-4.dev2 +cwlVersion: draft-4.dev3 class: CommandLineTool @@ -34,7 +34,7 @@ inputs: type: File default: class: File - path: args.py + location: args.py inputBinding: position: -1 diff --git a/draft-4/draft-4/cat-job.json b/draft-4/draft-4/cat-job.json index 09f16ba7b..837875d92 100644 --- a/draft-4/draft-4/cat-job.json +++ b/draft-4/draft-4/cat-job.json @@ -1,6 +1,6 @@ { "file1": { "class": "File", - "path": "hello.txt" + "location": "hello.txt" } } diff --git a/draft-4/draft-4/cat-n-job.json b/draft-4/draft-4/cat-n-job.json index ee6416857..1b93b815a 100644 --- a/draft-4/draft-4/cat-n-job.json +++ b/draft-4/draft-4/cat-n-job.json @@ -1,7 +1,7 @@ { "file1": { "class": "File", - "path": "hello.txt" + "location": "hello.txt" }, "numbering": true } diff --git a/draft-4/draft-4/cat1-testcli.cwl b/draft-4/draft-4/cat1-testcli.cwl index 3bad2b662..04c3857f6 100755 --- a/draft-4/draft-4/cat1-testcli.cwl +++ b/draft-4/draft-4/cat1-testcli.cwl @@ -1,7 +1,7 @@ #!/usr/bin/env cwl-runner { "class": "CommandLineTool", - "cwlVersion": "cwl:draft-4.dev2", + "cwlVersion": "cwl:draft-4.dev3", "description": "Print the contents of a file to stdout using 'cat' running in a docker container.", "inputs": [ { @@ -22,7 +22,7 @@ type: File, default: { class: File, - path: args.py + location: args.py }, inputBinding: { position: -1 diff --git a/draft-4/draft-4/cat1-tool.cwl b/draft-4/draft-4/cat1-tool.cwl index 8c7402f33..2ccde1553 100755 --- a/draft-4/draft-4/cat1-tool.cwl +++ b/draft-4/draft-4/cat1-tool.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: diff --git a/draft-4/draft-4/cat2-tool.cwl b/draft-4/draft-4/cat2-tool.cwl index 8038f5163..8348a82f5 100755 --- a/draft-4/draft-4/cat2-tool.cwl +++ b/draft-4/draft-4/cat2-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/cat3-tool-mediumcut.cwl b/draft-4/draft-4/cat3-tool-mediumcut.cwl index 1c29102eb..2e3f7282a 100755 --- a/draft-4/draft-4/cat3-tool-mediumcut.cwl +++ b/draft-4/draft-4/cat3-tool-mediumcut.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/cat3-tool-shortcut.cwl b/draft-4/draft-4/cat3-tool-shortcut.cwl index 4d12a4df4..04eb14d5e 100755 --- a/draft-4/draft-4/cat3-tool-shortcut.cwl +++ b/draft-4/draft-4/cat3-tool-shortcut.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/cat3-tool.cwl b/draft-4/draft-4/cat3-tool.cwl index 56df0f6f8..f67588a9b 100755 --- a/draft-4/draft-4/cat3-tool.cwl +++ b/draft-4/draft-4/cat3-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/cat4-tool.cwl b/draft-4/draft-4/cat4-tool.cwl index 1fefd2cf9..733bc9167 100755 --- a/draft-4/draft-4/cat4-tool.cwl +++ b/draft-4/draft-4/cat4-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/cat5-tool.cwl b/draft-4/draft-4/cat5-tool.cwl index f7010e4a8..24e89d91d 100755 --- a/draft-4/draft-4/cat5-tool.cwl +++ b/draft-4/draft-4/cat5-tool.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: diff --git a/draft-4/draft-4/conflict-wf.cwl b/draft-4/draft-4/conflict-wf.cwl index 2e197004d..c72b27bf2 100644 --- a/draft-4/draft-4/conflict-wf.cwl +++ b/draft-4/draft-4/conflict-wf.cwl @@ -1,4 +1,4 @@ -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: echo class: CommandLineTool diff --git a/draft-4/draft-4/count-lines1-wf.cwl b/draft-4/draft-4/count-lines1-wf.cwl index 96244ec2b..7c58ec84f 100755 --- a/draft-4/draft-4/count-lines1-wf.cwl +++ b/draft-4/draft-4/count-lines1-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: diff --git a/draft-4/draft-4/count-lines2-wf.cwl b/draft-4/draft-4/count-lines2-wf.cwl index 65c1fc1d3..267f949ba 100755 --- a/draft-4/draft-4/count-lines2-wf.cwl +++ b/draft-4/draft-4/count-lines2-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: InlineJavascriptRequirement: {} diff --git a/draft-4/draft-4/count-lines3-job.json b/draft-4/draft-4/count-lines3-job.json index 3bdd5a322..3a93e32d4 100644 --- a/draft-4/draft-4/count-lines3-job.json +++ b/draft-4/draft-4/count-lines3-job.json @@ -2,11 +2,11 @@ "file1": [ { "class": "File", - "path": "whale.txt" + "location": "whale.txt" }, { "class": "File", - "path": "hello.txt" + "location": "hello.txt" } ] } diff --git a/draft-4/draft-4/count-lines3-wf.cwl b/draft-4/draft-4/count-lines3-wf.cwl index f7cde5a31..d48cd9258 100755 --- a/draft-4/draft-4/count-lines3-wf.cwl +++ b/draft-4/draft-4/count-lines3-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: diff --git a/draft-4/draft-4/count-lines4-job.json b/draft-4/draft-4/count-lines4-job.json index dcd309a6e..1c85bea01 100644 --- a/draft-4/draft-4/count-lines4-job.json +++ b/draft-4/draft-4/count-lines4-job.json @@ -1,10 +1,10 @@ { "file1": { "class": "File", - "path": "whale.txt" + "location": "whale.txt" }, "file2": { "class": "File", - "path": "hello.txt" + "location": "hello.txt" } } diff --git a/draft-4/draft-4/count-lines4-wf.cwl b/draft-4/draft-4/count-lines4-wf.cwl index 4c73d480c..8aada50e3 100755 --- a/draft-4/draft-4/count-lines4-wf.cwl +++ b/draft-4/draft-4/count-lines4-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: diff --git a/draft-4/draft-4/count-lines5-wf.cwl b/draft-4/draft-4/count-lines5-wf.cwl index 2af0f8597..eb38e2231 100755 --- a/draft-4/draft-4/count-lines5-wf.cwl +++ b/draft-4/draft-4/count-lines5-wf.cwl @@ -1,11 +1,11 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: type: File - default: {class: File, path: hello.txt} + default: {class: File, location: hello.txt} outputs: count_output: type: int diff --git a/draft-4/draft-4/count-lines6-job.json b/draft-4/draft-4/count-lines6-job.json index 4db2d1d70..3652ded9d 100644 --- a/draft-4/draft-4/count-lines6-job.json +++ b/draft-4/draft-4/count-lines6-job.json @@ -2,21 +2,21 @@ "file1": [ { "class": "File", - "path": "whale.txt" + "location": "whale.txt" }, { "class": "File", - "path": "whale.txt" + "location": "whale.txt" } ], "file2": [ { "class": "File", - "path": "hello.txt" + "location": "hello.txt" }, { "class": "File", - "path": "hello.txt" + "location": "hello.txt" } ] } diff --git a/draft-4/draft-4/count-lines6-wf.cwl b/draft-4/draft-4/count-lines6-wf.cwl index 69accb422..36296bffa 100755 --- a/draft-4/draft-4/count-lines6-wf.cwl +++ b/draft-4/draft-4/count-lines6-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: File[] diff --git a/draft-4/draft-4/count-lines7-wf.cwl b/draft-4/draft-4/count-lines7-wf.cwl index 1394fcf1e..cf0e3d8dd 100755 --- a/draft-4/draft-4/count-lines7-wf.cwl +++ b/draft-4/draft-4/count-lines7-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: MultipleInputFeatureRequirement diff --git a/draft-4/draft-4/count-lines8-wf.cwl b/draft-4/draft-4/count-lines8-wf.cwl index f743718fb..02895b549 100755 --- a/draft-4/draft-4/count-lines8-wf.cwl +++ b/draft-4/draft-4/count-lines8-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: File diff --git a/draft-4/draft-4/count-lines9-wf.cwl b/draft-4/draft-4/count-lines9-wf.cwl index aff39fcb8..9a340adc6 100755 --- a/draft-4/draft-4/count-lines9-wf.cwl +++ b/draft-4/draft-4/count-lines9-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: [] @@ -16,7 +16,7 @@ steps: file1: default: class: File - path: whale.txt + location: whale.txt out: [output] step2: diff --git a/draft-4/draft-4/dir-job.yml b/draft-4/draft-4/dir-job.yml new file mode 100644 index 000000000..30392cfc0 --- /dev/null +++ b/draft-4/draft-4/dir-job.yml @@ -0,0 +1,3 @@ +indir: + class: Directory + location: testdir \ No newline at end of file diff --git a/draft-4/draft-4/dir.cwl b/draft-4/draft-4/dir.cwl new file mode 100644 index 000000000..0cfe978e8 --- /dev/null +++ b/draft-4/draft-4/dir.cwl @@ -0,0 +1,18 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +requirements: + - class: ShellCommandRequirement +inputs: + indir: Directory +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["cd", "$(inputs.indir.path)", + {shellQuote: false, valueFrom: "&&"}, + "find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/dir2.cwl b/draft-4/draft-4/dir2.cwl new file mode 100644 index 000000000..e64af2c7b --- /dev/null +++ b/draft-4/draft-4/dir2.cwl @@ -0,0 +1,20 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +hints: + DockerRequirement: + dockerPull: debian:8 + ShellCommandRequirement: {} +inputs: + indir: Directory +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["cd", "$(inputs.indir.path)", + {shellQuote: false, valueFrom: "&&"}, + "find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/dir3-job.yml b/draft-4/draft-4/dir3-job.yml new file mode 100644 index 000000000..aff0e8036 --- /dev/null +++ b/draft-4/draft-4/dir3-job.yml @@ -0,0 +1,3 @@ +inf: + class: File + location: hello.tar \ No newline at end of file diff --git a/draft-4/draft-4/dir3.cwl b/draft-4/draft-4/dir3.cwl new file mode 100644 index 000000000..61fb96eba --- /dev/null +++ b/draft-4/draft-4/dir3.cwl @@ -0,0 +1,13 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +baseCommand: [tar, xvf] +inputs: + inf: + type: File + inputBinding: + position: 1 +outputs: + outdir: + type: Directory + outputBinding: + glob: . diff --git a/draft-4/draft-4/echo-tool.cwl b/draft-4/draft-4/echo-tool.cwl index 85c113939..f207942ea 100644 --- a/draft-4/draft-4/echo-tool.cwl +++ b/draft-4/draft-4/echo-tool.cwl @@ -1,7 +1,7 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: in: type: Any diff --git a/draft-4/draft-4/egrep-stderr-mediumcut.cwl b/draft-4/draft-4/egrep-stderr-mediumcut.cwl index f780c7319..a224a5c50 100644 --- a/draft-4/draft-4/egrep-stderr-mediumcut.cwl +++ b/draft-4/draft-4/egrep-stderr-mediumcut.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Test of capturing stderr output in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/egrep-stderr-shortcut.cwl b/draft-4/draft-4/egrep-stderr-shortcut.cwl index 9a70aa1d5..9a8c26aa1 100644 --- a/draft-4/draft-4/egrep-stderr-shortcut.cwl +++ b/draft-4/draft-4/egrep-stderr-shortcut.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Test of capturing stderr output in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/egrep-stderr.cwl b/draft-4/draft-4/egrep-stderr.cwl index 4bba74962..86ec03d05 100644 --- a/draft-4/draft-4/egrep-stderr.cwl +++ b/draft-4/draft-4/egrep-stderr.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Test of capturing stderr output in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/env-tool1.cwl b/draft-4/draft-4/env-tool1.cwl index 64f406392..af33b3b54 100644 --- a/draft-4/draft-4/env-tool1.cwl +++ b/draft-4/draft-4/env-tool1.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: in: string outputs: diff --git a/draft-4/draft-4/env-tool2.cwl b/draft-4/draft-4/env-tool2.cwl index db5977a8e..8df0b64da 100644 --- a/draft-4/draft-4/env-tool2.cwl +++ b/draft-4/draft-4/env-tool2.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: in: string outputs: diff --git a/draft-4/draft-4/env-wf1.cwl b/draft-4/draft-4/env-wf1.cwl index 11583218f..09268c5a1 100644 --- a/draft-4/draft-4/env-wf1.cwl +++ b/draft-4/draft-4/env-wf1.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: in: string diff --git a/draft-4/draft-4/env-wf2.cwl b/draft-4/draft-4/env-wf2.cwl index fd5b321e7..9de645356 100644 --- a/draft-4/draft-4/env-wf2.cwl +++ b/draft-4/draft-4/env-wf2.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: in: string diff --git a/draft-4/draft-4/formattest-job.json b/draft-4/draft-4/formattest-job.json index da49d0103..0ff024096 100644 --- a/draft-4/draft-4/formattest-job.json +++ b/draft-4/draft-4/formattest-job.json @@ -1,7 +1,7 @@ { "input": { "class": "File", - "path": "whale.txt", + "location": "whale.txt", "format": "edam:format_2330" } } diff --git a/draft-4/draft-4/formattest.cwl b/draft-4/draft-4/formattest.cwl index 80dff90e4..d97e9be4d 100644 --- a/draft-4/draft-4/formattest.cwl +++ b/draft-4/draft-4/formattest.cwl @@ -1,6 +1,6 @@ $namespaces: edam: "http://edamontology.org/" -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool description: "Reverse each line using the `rev` command" inputs: diff --git a/draft-4/draft-4/formattest2-job.json b/draft-4/draft-4/formattest2-job.json index c70f7fb49..f706f6ed1 100644 --- a/draft-4/draft-4/formattest2-job.json +++ b/draft-4/draft-4/formattest2-job.json @@ -1,7 +1,7 @@ { "input": { "class": "File", - "path": "ref.fasta", + "location": "ref.fasta", "format": "edam:format_1929" } } diff --git a/draft-4/draft-4/formattest2.cwl b/draft-4/draft-4/formattest2.cwl index 377198de8..7ed3f9daa 100644 --- a/draft-4/draft-4/formattest2.cwl +++ b/draft-4/draft-4/formattest2.cwl @@ -3,7 +3,7 @@ $namespaces: $schemas: - EDAM.owl class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Reverse each line using the `rev` command" inputs: diff --git a/draft-4/draft-4/formattest3.cwl b/draft-4/draft-4/formattest3.cwl index fa4237699..e6342dfb4 100644 --- a/draft-4/draft-4/formattest3.cwl +++ b/draft-4/draft-4/formattest3.cwl @@ -5,7 +5,7 @@ $schemas: - EDAM.owl - gx_edam.ttl class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Reverse each line using the `rev` command" inputs: diff --git a/draft-4/draft-4/glob-expr-list.cwl b/draft-4/draft-4/glob-expr-list.cwl index 984142779..6144a785c 100644 --- a/draft-4/draft-4/glob-expr-list.cwl +++ b/draft-4/draft-4/glob-expr-list.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: ids: diff --git a/draft-4/draft-4/metadata.cwl b/draft-4/draft-4/metadata.cwl index b49e3f447..71c55e61a 100644 --- a/draft-4/draft-4/metadata.cwl +++ b/draft-4/draft-4/metadata.cwl @@ -6,7 +6,7 @@ $schemas: - foaf.rdf - dcterms.rdf -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool description: "Print the contents of a file to stdout using 'cat' running in a docker container." diff --git a/draft-4/draft-4/null-expression1-tool.cwl b/draft-4/draft-4/null-expression1-tool.cwl index 3460452f4..d953be546 100644 --- a/draft-4/draft-4/null-expression1-tool.cwl +++ b/draft-4/draft-4/null-expression1-tool.cwl @@ -3,7 +3,7 @@ class: ExpressionTool requirements: - class: InlineJavascriptRequirement -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: i1: diff --git a/draft-4/draft-4/null-expression2-tool.cwl b/draft-4/draft-4/null-expression2-tool.cwl index 129e26968..8cfb02dc8 100644 --- a/draft-4/draft-4/null-expression2-tool.cwl +++ b/draft-4/draft-4/null-expression2-tool.cwl @@ -3,7 +3,7 @@ class: ExpressionTool requirements: - class: InlineJavascriptRequirement -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: i1: Any diff --git a/draft-4/draft-4/optional-output.cwl b/draft-4/draft-4/optional-output.cwl index 46be1cdfe..ea058d3e9 100644 --- a/draft-4/draft-4/optional-output.cwl +++ b/draft-4/draft-4/optional-output.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: "cwl:draft-4.dev2" +cwlVersion: "cwl:draft-4.dev3" description: "Print the contents of a file to stdout using 'cat' running in a docker container." hints: DockerRequirement: diff --git a/draft-4/draft-4/params.cwl b/draft-4/draft-4/params.cwl index 774023f48..6fd43a50f 100644 --- a/draft-4/draft-4/params.cwl +++ b/draft-4/draft-4/params.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: bar: type: Any diff --git a/draft-4/draft-4/params2.cwl b/draft-4/draft-4/params2.cwl index 9993cef36..01967fcb3 100644 --- a/draft-4/draft-4/params2.cwl +++ b/draft-4/draft-4/params2.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/parseInt-job.json b/draft-4/draft-4/parseInt-job.json index db0c8f496..b584ea23b 100644 --- a/draft-4/draft-4/parseInt-job.json +++ b/draft-4/draft-4/parseInt-job.json @@ -1,6 +1,6 @@ { "file1": { "class": "File", - "path": "number.txt" + "location": "number.txt" } } diff --git a/draft-4/draft-4/parseInt-tool.cwl b/draft-4/draft-4/parseInt-tool.cwl index bf1cc5a2d..5c3a77557 100755 --- a/draft-4/draft-4/parseInt-tool.cwl +++ b/draft-4/draft-4/parseInt-tool.cwl @@ -3,7 +3,7 @@ class: ExpressionTool requirements: - class: InlineJavascriptRequirement -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: diff --git a/draft-4/draft-4/record-output-job.json b/draft-4/draft-4/record-output-job.json index 44d008189..df29550c2 100644 --- a/draft-4/draft-4/record-output-job.json +++ b/draft-4/draft-4/record-output-job.json @@ -1,6 +1,6 @@ { "irec": { - "ifoo": {"path": "whale.txt", "class": "File"}, - "ibar": {"path": "ref.fasta", "class": "File"} + "ifoo": {"location": "whale.txt", "class": "File"}, + "ibar": {"location": "ref.fasta", "class": "File"} } } \ No newline at end of file diff --git a/draft-4/draft-4/record-output.cwl b/draft-4/draft-4/record-output.cwl index b85251464..2e718da98 100644 --- a/draft-4/draft-4/record-output.cwl +++ b/draft-4/draft-4/record-output.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: ShellCommandRequirement inputs: diff --git a/draft-4/draft-4/rename-job.json b/draft-4/draft-4/rename-job.json index 917bc989d..c8ff96069 100644 --- a/draft-4/draft-4/rename-job.json +++ b/draft-4/draft-4/rename-job.json @@ -1,6 +1,6 @@ { "srcfile": { - "path": "whale.txt", + "location": "whale.txt", "class": "File" }, "newname": "fish.txt" diff --git a/draft-4/draft-4/rename.cwl b/draft-4/draft-4/rename.cwl index 7b9de2931..663fc82d0 100644 --- a/draft-4/draft-4/rename.cwl +++ b/draft-4/draft-4/rename.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 baseCommand: "true" requirements: CreateFileRequirement: diff --git a/draft-4/draft-4/revsort-job.json b/draft-4/draft-4/revsort-job.json index 3c6d0f889..f5671aab2 100644 --- a/draft-4/draft-4/revsort-job.json +++ b/draft-4/draft-4/revsort-job.json @@ -1,6 +1,6 @@ { "input": { "class": "File", - "path": "whale.txt" + "location": "whale.txt" } } diff --git a/draft-4/draft-4/revsort.cwl b/draft-4/draft-4/revsort.cwl index 8ca76ee08..e94b30ada 100644 --- a/draft-4/draft-4/revsort.cwl +++ b/draft-4/draft-4/revsort.cwl @@ -3,7 +3,7 @@ # class: Workflow description: "Reverse the lines in a document, then sort those lines." -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 # Requirements & hints specify prerequisites and extensions to the workflow. # In this example, DockerRequirement specifies a default Docker container diff --git a/draft-4/draft-4/revtool.cwl b/draft-4/draft-4/revtool.cwl index 2a276f57d..ba3033c09 100644 --- a/draft-4/draft-4/revtool.cwl +++ b/draft-4/draft-4/revtool.cwl @@ -2,7 +2,7 @@ # Simplest example command line program wrapper for the Unix tool "rev". # class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Reverse each line using the `rev` command" # The "inputs" array defines the structure of the input object that describes diff --git a/draft-4/draft-4/scatter-valuefrom-wf1.cwl b/draft-4/draft-4/scatter-valuefrom-wf1.cwl index ab36fb4fa..0256066b3 100644 --- a/draft-4/draft-4/scatter-valuefrom-wf1.cwl +++ b/draft-4/draft-4/scatter-valuefrom-wf1.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: Workflow inputs: inp: diff --git a/draft-4/draft-4/scatter-valuefrom-wf2.cwl b/draft-4/draft-4/scatter-valuefrom-wf2.cwl index f59ed9618..1894abee6 100644 --- a/draft-4/draft-4/scatter-valuefrom-wf2.cwl +++ b/draft-4/draft-4/scatter-valuefrom-wf2.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: Workflow inputs: diff --git a/draft-4/draft-4/scatter-valuefrom-wf3.cwl b/draft-4/draft-4/scatter-valuefrom-wf3.cwl index 0e9192d2f..a691acf5d 100644 --- a/draft-4/draft-4/scatter-valuefrom-wf3.cwl +++ b/draft-4/draft-4/scatter-valuefrom-wf3.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: echo diff --git a/draft-4/draft-4/scatter-valuefrom-wf4.cwl b/draft-4/draft-4/scatter-valuefrom-wf4.cwl index aaf763da4..9cae1a2b2 100644 --- a/draft-4/draft-4/scatter-valuefrom-wf4.cwl +++ b/draft-4/draft-4/scatter-valuefrom-wf4.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: echo class: CommandLineTool diff --git a/draft-4/draft-4/scatter-wf1.cwl b/draft-4/draft-4/scatter-wf1.cwl index eec267915..a8ffc6531 100644 --- a/draft-4/draft-4/scatter-wf1.cwl +++ b/draft-4/draft-4/scatter-wf1.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: Workflow inputs: inp: string[] diff --git a/draft-4/draft-4/scatter-wf2.cwl b/draft-4/draft-4/scatter-wf2.cwl index adb798a5c..abc20be26 100644 --- a/draft-4/draft-4/scatter-wf2.cwl +++ b/draft-4/draft-4/scatter-wf2.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: Workflow inputs: diff --git a/draft-4/draft-4/scatter-wf3.cwl b/draft-4/draft-4/scatter-wf3.cwl index 7cfe1250c..ce32fbd69 100644 --- a/draft-4/draft-4/scatter-wf3.cwl +++ b/draft-4/draft-4/scatter-wf3.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: echo diff --git a/draft-4/draft-4/scatter-wf4.cwl b/draft-4/draft-4/scatter-wf4.cwl index 8ff37bc53..c01d5650d 100644 --- a/draft-4/draft-4/scatter-wf4.cwl +++ b/draft-4/draft-4/scatter-wf4.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: echo class: CommandLineTool diff --git a/draft-4/draft-4/schemadef-tool.cwl b/draft-4/draft-4/schemadef-tool.cwl index 33e6f20f3..0d8447442 100644 --- a/draft-4/draft-4/schemadef-tool.cwl +++ b/draft-4/draft-4/schemadef-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - $import: schemadef-type.yml - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/schemadef-wf.cwl b/draft-4/draft-4/schemadef-wf.cwl index 15b873e80..4667f7894 100644 --- a/draft-4/draft-4/schemadef-wf.cwl +++ b/draft-4/draft-4/schemadef-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: Workflow requirements: diff --git a/draft-4/draft-4/search-job.json b/draft-4/draft-4/search-job.json index 0a6735da8..b5a1b61e5 100644 --- a/draft-4/draft-4/search-job.json +++ b/draft-4/draft-4/search-job.json @@ -1,7 +1,7 @@ { "infile": { "class": "File", - "path": "whale.txt" + "location": "whale.txt" }, "term": "find" } diff --git a/draft-4/draft-4/search.cwl b/draft-4/draft-4/search.cwl index 104a26437..0db526a4c 100644 --- a/draft-4/draft-4/search.cwl +++ b/draft-4/draft-4/search.cwl @@ -1,4 +1,4 @@ -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 $graph: - id: index class: CommandLineTool @@ -19,7 +19,7 @@ $graph: type: File default: class: File - path: index.py + location: index.py inputBinding: position: 0 outputs: @@ -30,9 +30,9 @@ $graph: secondaryFiles: - ".idx1" - "^.idx2" - - '$(self.path+".idx3")' - - '$({"path": self.path+".idx4", "class": "File"})' - - '${ return self.path+".idx5"; }' + - '$(self.location+".idx3")' + - '$({"location": self.location+".idx4", "class": "File"})' + - '${ return self.location+".idx5"; }' - id: search class: CommandLineTool @@ -47,14 +47,14 @@ $graph: secondaryFiles: - ".idx1" - "^.idx2" - - '$(self.path+".idx3")' - - '$({"path": self.path+".idx4", "class": "File"})' - - '${ return self.path+".idx5"; }' + - '$(self.location+".idx3")' + - '$({"location": self.location+".idx4", "class": "File"})' + - '${ return self.location+".idx5"; }' search.py: type: File default: class: File - path: search.py + location: search.py inputBinding: position: 0 term: diff --git a/draft-4/draft-4/shelltest.cwl b/draft-4/draft-4/shelltest.cwl index 0ae27a3cb..9ccb34d52 100644 --- a/draft-4/draft-4/shelltest.cwl +++ b/draft-4/draft-4/shelltest.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 description: "Reverse each line using the `rev` command then sort." requirements: - class: ShellCommandRequirement diff --git a/draft-4/draft-4/sorttool.cwl b/draft-4/draft-4/sorttool.cwl index 447a502d2..7aeeadc03 100644 --- a/draft-4/draft-4/sorttool.cwl +++ b/draft-4/draft-4/sorttool.cwl @@ -2,7 +2,7 @@ # demonstrating command line flags. class: CommandLineTool description: "Sort lines using the `sort` command" -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 # This example is similar to the previous one, with an additional input # parameter called "reverse". It is a boolean parameter, which is diff --git a/draft-4/draft-4/step-valuefrom-wf.cwl b/draft-4/draft-4/step-valuefrom-wf.cwl index ba8418131..75b251d02 100644 --- a/draft-4/draft-4/step-valuefrom-wf.cwl +++ b/draft-4/draft-4/step-valuefrom-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: StepInputExpressionRequirement diff --git a/draft-4/draft-4/step-valuefrom-wf.json b/draft-4/draft-4/step-valuefrom-wf.json index 8d6a9a6f1..b13324174 100644 --- a/draft-4/draft-4/step-valuefrom-wf.json +++ b/draft-4/draft-4/step-valuefrom-wf.json @@ -2,7 +2,7 @@ "in": { "file1": { "class": "File", - "path": "whale.txt" + "location": "whale.txt" } } } diff --git a/draft-4/draft-4/step-valuefrom2-wf.cwl b/draft-4/draft-4/step-valuefrom2-wf.cwl index c5c9b8160..8d085aa37 100644 --- a/draft-4/draft-4/step-valuefrom2-wf.cwl +++ b/draft-4/draft-4/step-valuefrom2-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/step-valuefrom3-wf.cwl b/draft-4/draft-4/step-valuefrom3-wf.cwl index bd1acc8a8..54f70ba85 100644 --- a/draft-4/draft-4/step-valuefrom3-wf.cwl +++ b/draft-4/draft-4/step-valuefrom3-wf.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: Workflow -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index 7e3521382..18bf39ad9 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -1,5 +1,5 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool requirements: - class: DockerRequirement diff --git a/draft-4/draft-4/test-cwl-out.cwl b/draft-4/draft-4/test-cwl-out.cwl index 5ce6331be..884054d74 100644 --- a/draft-4/draft-4/test-cwl-out.cwl +++ b/draft-4/draft-4/test-cwl-out.cwl @@ -1,5 +1,5 @@ class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: ShellCommandRequirement - class: DockerRequirement diff --git a/draft-4/draft-4/tmap-job.json b/draft-4/draft-4/tmap-job.json index b4fec8a09..24cdda68a 100644 --- a/draft-4/draft-4/tmap-job.json +++ b/draft-4/draft-4/tmap-job.json @@ -1,7 +1,7 @@ { "reads": { "class": "File", - "path": "reads.fastq" + "location": "reads.fastq" }, "stages": [ { diff --git a/draft-4/draft-4/tmap-tool.cwl b/draft-4/draft-4/tmap-tool.cwl index 8f8aca16a..1b6273e26 100755 --- a/draft-4/draft-4/tmap-tool.cwl +++ b/draft-4/draft-4/tmap-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner { - "cwlVersion": "cwl:draft-4.dev2", + "cwlVersion": "cwl:draft-4.dev3", "class": "CommandLineTool", @@ -24,7 +24,7 @@ type: File, default: { class: File, - path: args.py + location: args.py }, inputBinding: { position: -1 diff --git a/draft-4/draft-4/wc-job.json b/draft-4/draft-4/wc-job.json index 32ffc842b..598568d38 100644 --- a/draft-4/draft-4/wc-job.json +++ b/draft-4/draft-4/wc-job.json @@ -1,6 +1,6 @@ { "file1": { "class": "File", - "path": "whale.txt" + "location": "whale.txt" } } diff --git a/draft-4/draft-4/wc-tool.cwl b/draft-4/draft-4/wc-tool.cwl index f129aaf0d..08ef2e77b 100755 --- a/draft-4/draft-4/wc-tool.cwl +++ b/draft-4/draft-4/wc-tool.cwl @@ -1,7 +1,7 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 inputs: file1: File diff --git a/draft-4/draft-4/wc2-tool.cwl b/draft-4/draft-4/wc2-tool.cwl index 4e5704e7d..510a6dc5c 100755 --- a/draft-4/draft-4/wc2-tool.cwl +++ b/draft-4/draft-4/wc2-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/wc3-tool.cwl b/draft-4/draft-4/wc3-tool.cwl index f7fff7e5a..6aa6986f4 100644 --- a/draft-4/draft-4/wc3-tool.cwl +++ b/draft-4/draft-4/wc3-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: InlineJavascriptRequirement diff --git a/draft-4/draft-4/wc4-tool.cwl b/draft-4/draft-4/wc4-tool.cwl index 4a4404579..21e08a530 100644 --- a/draft-4/draft-4/wc4-tool.cwl +++ b/draft-4/draft-4/wc4-tool.cwl @@ -1,6 +1,6 @@ #!/usr/bin/env cwl-runner class: CommandLineTool -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 requirements: - class: InlineJavascriptRequirement From 09abb8b4cacccf0f4ceff5d1f0c5de692cb967d4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Jun 2016 16:52:20 -0400 Subject: [PATCH 24/38] Squashed 'cwltool/schemas/' changes from 4efa80b..107d005 107d005 Allow expression to provide full listing for InitialWorkDirRequirement 5dba12b Add InitialWorkDirRequirement, supercedes CreateFileRequirement. git-subtree-dir: cwltool/schemas git-subtree-split: 107d00537125e3eef9ae065fc61c558d1bb6fc43 --- draft-4/CommandLineTool.yml | 29 +++++++++++++++++++++++++++++ draft-4/Process.yml | 5 +++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index bedb5fc28..8529d0694 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -626,11 +626,40 @@ $graph: Docker container. +- name: DirentExt + type: record + fields: + - name: entryname + type: [string, Expression] + jsonldPredicate: + "_id": cwl:entryname + - name: entry + type: [string, Expression] + "_id": cwl:entry + + +- name: InitialWorkDirRequirement + type: record + extends: ProcessRequirement + fields: + - name: listing + type: + - type: array + items: DirentExt + - string + - Expression + jsonldPredicate: + _id: "cwl:listing" + mapSubject: entryname + mapPredicate: entry + - name: CreateFileRequirement type: record extends: "#ProcessRequirement" doc: | + *Deprecated*, superceded by InitialWorkDirRequirement. + Define a list of files that must be created by the workflow platform in the designated output directory prior to executing the command line tool. See `FileDef` for details. diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 908c65a35..b86e8fb35 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -186,7 +186,8 @@ $graph: "_id": cwl:entryname - name: entry type: [File, Directory] - + jsonldPredicate: + "_id": cwl:entry - name: Directory type: record @@ -229,7 +230,7 @@ $graph: doc: List of files or subdirectories contained in this directory jsonldPredicate: _id: "cwl:listing" - mapSubject: basename + mapSubject: entryname mapPredicate: entry - name: SchemaBase From 71794a710b27456f04493c89fc04f29d6905605f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Jun 2016 17:00:03 -0400 Subject: [PATCH 25/38] Squashed 'cwltool/schemas/' changes from 107d005..56ef87a 56ef87a secondaryFiles has Dirent, not Directory. git-subtree-dir: cwltool/schemas git-subtree-split: 56ef87ab7a75bc85bf7331b03d54ec6e32be0065 --- draft-4/Process.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/draft-4/Process.yml b/draft-4/Process.yml index b86e8fb35..2eeedb830 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -144,8 +144,7 @@ $graph: type: - "null" - type: array - items: "#File" - - Directory + items: [File, Dirent] jsonldPredicate: "cwl:secondaryFiles" doc: | A list of additional files that are associated with the primary file From d47df4463c224c05238d58a366e0a37bef22ccf1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Jun 2016 17:09:14 -0400 Subject: [PATCH 26/38] Squashed 'cwltool/schemas/' changes from 56ef87a..5239b16 5239b16 Fix syntax error. Update tests to use InitialWorkDirRequirement git-subtree-dir: cwltool/schemas git-subtree-split: 5239b1644417481b5c43379d969762cc2cde6443 --- draft-4/CommandLineTool.yml | 5 +++-- draft-4/draft-4/rename.cwl | 6 +++--- draft-4/draft-4/search.cwl | 7 +++---- draft-4/draft-4/template-tool.cwl | 5 ++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 8529d0694..1723cd9dd 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -632,10 +632,11 @@ $graph: - name: entryname type: [string, Expression] jsonldPredicate: - "_id": cwl:entryname + _id: cwl:entryname - name: entry type: [string, Expression] - "_id": cwl:entry + jsonldPredicate: + _id: cwl:entry - name: InitialWorkDirRequirement diff --git a/draft-4/draft-4/rename.cwl b/draft-4/draft-4/rename.cwl index 663fc82d0..e8707b5aa 100644 --- a/draft-4/draft-4/rename.cwl +++ b/draft-4/draft-4/rename.cwl @@ -3,9 +3,9 @@ cwlVersion: cwl:draft-4.dev3 baseCommand: "true" requirements: CreateFileRequirement: - fileDef: - - filename: $(inputs.newname) - fileContent: $(inputs.srcfile) + listing: + - entryname: $(inputs.newname) + entry: $(inputs.srcfile) inputs: srcfile: File newname: string diff --git a/draft-4/draft-4/search.cwl b/draft-4/draft-4/search.cwl index 0db526a4c..dc32c0955 100644 --- a/draft-4/draft-4/search.cwl +++ b/draft-4/draft-4/search.cwl @@ -7,10 +7,9 @@ $graph: - valueFrom: input.txt position: 1 requirements: - - class: CreateFileRequirement - fileDef: - - filename: input.txt - fileContent: $(inputs.file) + - class: InitialWorkDirRequirement + listing: + input.txt: $(inputs.file) - class: InlineJavascriptRequirement inputs: diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index 18bf39ad9..05524033a 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -9,9 +9,8 @@ requirements: - { $include: underscore.js } - "var t = function(s) { return _.template(s)({'inputs': inputs}); };" - class: CreateFileRequirement - fileDef: - - filename: foo.txt - fileContent: > + listing: + foo.txt: > $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) inputs: - id: file1 From bacd84174cc93a9d097ecaf4b9143b957f468387 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Jun 2016 22:36:15 -0400 Subject: [PATCH 27/38] Use stageFiles for InitialWorkDir --- cwltool/draft2tool.py | 14 +++++++++----- cwltool/job.py | 24 +++++++++++------------- cwltool/pathmapper.py | 15 +++++++++------ cwltool/process.py | 10 ++++++---- cwltool/update.py | 10 ++++++++++ 5 files changed, 45 insertions(+), 28 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index f362ce85e..fe8daeb94 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -273,11 +273,15 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] j.tmpdir = builder.tmpdir j.stagedir = builder.stagedir - createFiles = self.get_requirement("CreateFileRequirement")[0] - j.generatefiles = {} - if createFiles: - for t in createFiles["fileDef"]: - j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"])) + initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0] + j.generatefiles = {"class": "Directory", "listing": []} + if initialWorkdir: + if isinstance(initialWorkdir["listing"], (str, unicode)): + j.generatefiles["listing"] = builder.do_eval(initialWorkdir["listing"]) + else: + for t in initialWorkdir["listing"]: + j.generatefiles["listing"].append({"entryname": builder.do_eval(t["entryname"]), + "entry": copy.deepcopy(builder.do_eval(t["entry"]))}) j.environment = {} evr = self.get_requirement("EnvVarRequirement")[0] diff --git a/cwltool/job.py b/cwltool/job.py index 63a6fb773..78fa4ccb9 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -166,19 +166,17 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, outputs = {} # type: Dict[str,str] try: - for t in self.generatefiles: - entry = self.generatefiles[t] - if isinstance(entry, dict): - src = entry["path"] - dst = os.path.join(self.outdir, t) - if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: - _logger.debug(u"symlinking %s to %s", dst, src) - os.symlink(src, dst) - elif isinstance(entry, (str, unicode)): - with open(os.path.join(self.outdir, t), "w") as fout: - fout.write(entry.encode("utf-8")) - else: - raise Exception("Unhandled type %s", type(entry)) + if self.generatefiles["listing"]: + generatemapper = PathMapper([self.generatefiles], self.outdir, + self.outdir, separateDirs=False) + def linkoutdir(src, tgt): + # Need to make the link to the staged file (may be inside + # the container) + for item in self.pathmapper.items(): + if src == item.resolved: + os.symlink(item.target, tgt) + break + stageFiles(generatemapper, linkoutdir) if self.stdin: stdin = open(self.pathmapper.reversemap(self.stdin)[1], "rb") diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 3230e08e1..c5d537ff3 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -82,12 +82,15 @@ def visit(obj, base): self._pathmap[obj["location"]] = MapperEnt(obj["location"], base, "Directory") for ld in obj["listing"]: tgt = os.path.join(base, ld["entryname"]) - if ld["entry"]["class"] == "Directory": - visit(ld["entry"], tgt) - ab = ld["entry"]["location"] - if ab.startswith("file://"): - ab = ab[7:] - self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) + if isinstance(ld["entry"], (str, unicode)): + self._pathmap[id(ld["entry"])] = MapperEnt(ld["entry"], tgt, "Copy") + else: + if ld["entry"]["class"] == "Directory": + visit(ld["entry"], tgt) + ab = ld["entry"]["location"] + if ab.startswith("file://"): + ab = ab[7:] + self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) visit(fob, stagedir) else: diff --git a/cwltool/process.py b/cwltool/process.py index 42ab22048..8e6536b0a 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -39,14 +39,14 @@ supportedProcessRequirements = ["DockerRequirement", "SchemaDefRequirement", "EnvVarRequirement", - "CreateFileRequirement", "ScatterFeatureRequirement", "SubworkflowFeatureRequirement", "MultipleInputFeatureRequirement", "InlineJavascriptRequirement", "ShellCommandRequirement", "StepInputExpressionRequirement", - "ResourceRequirement"] + "ResourceRequirement", + "InitialWorkDirRequirement"] cwl_files = ("Workflow.yml", "CommandLineTool.yml", @@ -172,12 +172,14 @@ def getListing(fs_access, rec): rec["listing"] = listing def stageFiles(pm, stageFunc): - for f in pm.files(): - p = pm.mapper(f) + for f, p in pm.items(): if not os.path.exists(os.path.dirname(p.target)): os.makedirs(os.path.dirname(p.target), 0755) if p.type == "File": stageFunc(p.resolved, p.target) + elif p.type == "Copy": + with open(p.target, "w") as n: + n.write(p.resolved.encode("utf-8") def collectFilesAndDirs(obj, out): if isinstance(obj, dict): diff --git a/cwltool/update.py b/cwltool/update.py index 9fa4cfd6a..dfe76b14e 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -387,6 +387,16 @@ def _draft4Dev2toDev3(doc, loader, baseuri): for i, sf in enumerate(doc["secondaryFiles"]): if "$(" in sf or "${" in sf: doc["secondaryFiles"][i] = sf.replace('"path"', '"location"').replace(".path", ".location") + + if "class" in doc and doc["class"] == "CreateFileRequirement": + doc["class"] = "InitialWorkDirRequirement" + doc["listing"] = [] + for f in doc["fileDef"]: + doc["listing"].append({ + "entryname": f["filename"], + "entry": f["fileContent"] + }) + del doc["fileDef"] for key, value in doc.items(): doc[key] = _draft4Dev2toDev3(value, loader, baseuri) elif isinstance(doc, list): From 4728bf0efbc4817bc39fabed85d34f3dc2222be6 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Jun 2016 23:17:49 -0400 Subject: [PATCH 28/38] Pass tests for initial work dir. --- cwltool/job.py | 21 ++++++++++++--------- cwltool/pathmapper.py | 9 ++++++--- cwltool/process.py | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index 78fa4ccb9..f7e73180b 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -169,10 +169,12 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, if self.generatefiles["listing"]: generatemapper = PathMapper([self.generatefiles], self.outdir, self.outdir, separateDirs=False) + _logger.debug(u"[job %s] initial work dir %s", self.name, + json.dumps({p: generatemapper.mapper(p) for p in generatemapper.files()}, indent=4)) def linkoutdir(src, tgt): # Need to make the link to the staged file (may be inside # the container) - for item in self.pathmapper.items(): + for _, item in self.pathmapper.items(): if src == item.resolved: os.symlink(item.target, tgt) break @@ -201,7 +203,7 @@ def linkoutdir(src, tgt): else: stdout = sys.stderr - sp = subprocess.Popen([str(x) for x in runtime + self.command_line], + sp = subprocess.Popen([unicode(x).encode('utf-8') for x in runtime + self.command_line], shell=False, close_fds=True, stdin=stdin, @@ -235,13 +237,14 @@ def linkoutdir(src, tgt): else: processStatus = "permanentFail" - for t in self.generatefiles: - if isinstance(self.generatefiles[t], dict): - src = cast(dict, self.generatefiles[t])["path"] - dst = os.path.join(self.outdir, t) - if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: - os.remove(dst) - os.symlink(self.pathmapper.reversemap(src)[1], dst) + if self.generatefiles["listing"]: + def linkoutdir(src, tgt): + # Need to make the link to the staged file (may be inside + # the container) + if os.path.exists(tgt): + os.remove(tgt) + os.symlink(src, tgt) + stageFiles(generatemapper, linkoutdir) outputs = self.collect_outputs(self.outdir) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index c5d537ff3..42f6f462a 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -79,11 +79,14 @@ def setup(self, referenced_files, basedir): if fob["class"] == "Directory": def visit(obj, base): - self._pathmap[obj["location"]] = MapperEnt(obj["location"], base, "Directory") + if "location" in obj: + self._pathmap[obj["location"]] = MapperEnt(obj["location"], base, "Directory") + else: + self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), base, "Directory") for ld in obj["listing"]: tgt = os.path.join(base, ld["entryname"]) if isinstance(ld["entry"], (str, unicode)): - self._pathmap[id(ld["entry"])] = MapperEnt(ld["entry"], tgt, "Copy") + self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "Copy") else: if ld["entry"]["class"] == "Directory": visit(ld["entry"], tgt) @@ -109,7 +112,7 @@ def visit(ob): # Dereference symbolic links for path, (ab, tgt, type) in self._pathmap.items(): - if type == "Directory": # or not os.path.exists(ab): + if type != "File": # or not os.path.exists(ab): continue deref = ab st = os.lstat(deref) diff --git a/cwltool/process.py b/cwltool/process.py index 8e6536b0a..141cc6909 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -179,7 +179,7 @@ def stageFiles(pm, stageFunc): stageFunc(p.resolved, p.target) elif p.type == "Copy": with open(p.target, "w") as n: - n.write(p.resolved.encode("utf-8") + n.write(p.resolved.encode("utf-8")) def collectFilesAndDirs(obj, out): if isinstance(obj, dict): From e65b384ff2d652a06a94694e0aa079509385ccec Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 08:48:04 -0400 Subject: [PATCH 29/38] Pathmapper wip --- cwltool/pathmapper.py | 71 ++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 42f6f462a..38a4f7075 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -59,14 +59,47 @@ class PathMapper(object): """Mapping of files from relative path provided in the file to a tuple of (absolute local path, absolute container path)""" - def __init__(self, referenced_files, basedir, stagedir, scramble=False, separateDirs=True): + def __init__(self, referenced_files, basedir, stagedir, separateDirs=True): # type: (Set[Any], unicode, unicode) -> None self._pathmap = {} # type: Dict[unicode, Tuple[unicode, unicode]] self.stagedir = stagedir - self.scramble = scramble self.separateDirs = separateDirs self.setup(referenced_files, basedir) + def visit(self, obj, stagedir, basedir, entryname=None): + if ob["class"] == "Directory": + if "location" in obj: + self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") + else: + self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), stagedir, "Directory") + for ld in obj["listing"]: + tgt = os.path.join(stagedir, ld["entryname"]) + if isinstance(ld["entry"], (str, unicode)): + self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "Copy") + else: + if ld["entry"]["class"] == "Directory": + self.visit(ld["entry"], tgt) + ab = ld["entry"]["location"] + if ab.startswith("file://"): + ab = ab[7:] + self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) + elif ob["class"] == "File": + path = ob["location"] + if path in self._pathmap: + return + ab = abspath(path, basedir) + if entryname: + tgt = os.path.join(stagedir, entryname) + else: + tgt = os.path.join(stagedir, os.path.basename(path)) + self._pathmap[path] = MapperEnt(ab, tgt, "File") + if ob.get("secondaryFiles"): + for sf in obj["secondaryFiles"]: + if "entryname" in sf: + self.visit(sf["entry"], stagedir, basedir, entryname=sf["entryname"]) + else: + self.visit(sf, stagedir, basedir) + def setup(self, referenced_files, basedir): # type: (Set[Any], unicode) -> None @@ -76,39 +109,7 @@ def setup(self, referenced_files, basedir): for fob in referenced_files: if self.separateDirs: stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) - - if fob["class"] == "Directory": - def visit(obj, base): - if "location" in obj: - self._pathmap[obj["location"]] = MapperEnt(obj["location"], base, "Directory") - else: - self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), base, "Directory") - for ld in obj["listing"]: - tgt = os.path.join(base, ld["entryname"]) - if isinstance(ld["entry"], (str, unicode)): - self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "Copy") - else: - if ld["entry"]["class"] == "Directory": - visit(ld["entry"], tgt) - ab = ld["entry"]["location"] - if ab.startswith("file://"): - ab = ab[7:] - self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) - - visit(fob, stagedir) - else: - def visit(ob): - path = ob["location"] - if path in self._pathmap: - return - ab = abspath(path, basedir) - if self.scramble: - tgt = os.path.join(stagedir, "inp%x.dat" % random.randint(1, 1000000000)) - else: - tgt = os.path.join(stagedir, os.path.basename(path)) - self._pathmap[path] = MapperEnt(ab, tgt, "File") - - adjustFileObjs(fob, visit) + self.visit(fob, stagedir, basedir) # Dereference symbolic links for path, (ab, tgt, type) in self._pathmap.items(): From c332dde31fd6bdee4fe00af9bb89646de1a068c2 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 08:48:08 -0400 Subject: [PATCH 30/38] Squashed 'cwltool/schemas/' changes from 5239b16..a5976f3 a5976f3 Unify Directory listing, InitialWorkDir and secondaryFiles to accept equivalent type. eb9449f Adding directory feature tests. 9dfbb8e Update tests to use InitialWorkDirRequirement git-subtree-dir: cwltool/schemas git-subtree-split: a5976f32351626f8f3072f598093036fc9e5864a --- draft-4/CommandLineTool.yml | 2 +- draft-4/Process.yml | 5 ++++- draft-4/conformance_test_draft-4.yaml | 26 +++++++++++++++++++++++++- draft-4/draft-4/dir4-job.yml | 10 ++++++++++ draft-4/draft-4/dir4.cwl | 18 ++++++++++++++++++ draft-4/draft-4/dir5.cwl | 18 ++++++++++++++++++ draft-4/draft-4/rename.cwl | 2 +- draft-4/draft-4/template-tool.cwl | 2 +- draft-4/draft-4/testdir/a | 0 draft-4/draft-4/testdir/b | 0 draft-4/draft-4/testdir/c/d | 0 11 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 draft-4/draft-4/dir4-job.yml create mode 100644 draft-4/draft-4/dir4.cwl create mode 100644 draft-4/draft-4/dir5.cwl create mode 100644 draft-4/draft-4/testdir/a create mode 100644 draft-4/draft-4/testdir/b create mode 100644 draft-4/draft-4/testdir/c/d diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 1723cd9dd..91332ef95 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -646,7 +646,7 @@ $graph: - name: listing type: - type: array - items: DirentExt + items: [File, DirentExt] - string - Expression jsonldPredicate: diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 2eeedb830..86bc0302b 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -225,7 +225,10 @@ $graph: _id: "cwl:path" _type: "@id" - name: listing - type: Dirent[]? + type: + - "null" + - type: array + items: [File, Dirent] doc: List of files or subdirectories contained in this directory jsonldPredicate: _id: "cwl:listing" diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index 78f05e1f6..23bb583eb 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -735,4 +735,28 @@ ], } tool: draft-4/dir3.cwl - doc: Test directory input in Docker \ No newline at end of file + doc: Test directory input in Docker + +- job: draft-4/dir4-job.yml + output: + "outlist": { + "size": 20, + "location": "output.txt", + "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", + "class": "File" + } + tool: draft-4/dir4.cwl + doc: Test directories in secondaryFiles + +- job: draft-4/dir-job.yml + output: { + "outlist": { + "checksum": "sha1$907a866a3e0b7f1fc5a2222531c5fb9063704438", + "path": "/home/peter/work/common-workflow-language/draft-4/draft-4/output.txt", + "size": 33, + "location": "file:///home/peter/work/common-workflow-language/draft-4/draft-4/output.txt", + "class": "File" + } + } + tool: draft-4/dir5.cwl + doc: Test dynamic initial work dir diff --git a/draft-4/draft-4/dir4-job.yml b/draft-4/draft-4/dir4-job.yml new file mode 100644 index 000000000..29d5c1116 --- /dev/null +++ b/draft-4/draft-4/dir4-job.yml @@ -0,0 +1,10 @@ +inf: + class: File + location: hello.tar + secondaryFiles: + - class: File + location: index.py + - entryname: testdir + entry: + class: Directory + location: testdir diff --git a/draft-4/draft-4/dir4.cwl b/draft-4/draft-4/dir4.cwl new file mode 100644 index 000000000..07341cf7d --- /dev/null +++ b/draft-4/draft-4/dir4.cwl @@ -0,0 +1,18 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +requirements: + - class: ShellCommandRequirement +inputs: + inf: File +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["cd", "$(inputs.inf.dirname)", + {shellQuote: false, valueFrom: "&&"}, + "find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/dir5.cwl b/draft-4/draft-4/dir5.cwl new file mode 100644 index 000000000..27d9ac9e9 --- /dev/null +++ b/draft-4/draft-4/dir5.cwl @@ -0,0 +1,18 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +requirements: + - class: ShellCommandRequirement + - class: InitialWorkDirRequirement + listing: $(inputs.indir.listing) +inputs: + indir: Directory +outputs: + outlist: + type: File + outputBinding: + glob: output.txt +baseCommand: [] +arguments: ["find", ".", + {shellQuote: false, valueFrom: "|"}, + "sort"] +stdout: output.txt \ No newline at end of file diff --git a/draft-4/draft-4/rename.cwl b/draft-4/draft-4/rename.cwl index e8707b5aa..e97743ee0 100644 --- a/draft-4/draft-4/rename.cwl +++ b/draft-4/draft-4/rename.cwl @@ -2,7 +2,7 @@ class: CommandLineTool cwlVersion: cwl:draft-4.dev3 baseCommand: "true" requirements: - CreateFileRequirement: + InitialWorkDirRequirement: listing: - entryname: $(inputs.newname) entry: $(inputs.srcfile) diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index 05524033a..f7a00024f 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -8,7 +8,7 @@ requirements: expressionLib: - { $include: underscore.js } - "var t = function(s) { return _.template(s)({'inputs': inputs}); };" - - class: CreateFileRequirement + - class: InitialWorkDirRequirement listing: foo.txt: > $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) diff --git a/draft-4/draft-4/testdir/a b/draft-4/draft-4/testdir/a new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/testdir/b b/draft-4/draft-4/testdir/b new file mode 100644 index 000000000..e69de29bb diff --git a/draft-4/draft-4/testdir/c/d b/draft-4/draft-4/testdir/c/d new file mode 100644 index 000000000..e69de29bb From b94c87e83d37f3083cecfb57c56ec45ebbd3bd85 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 10:02:18 -0400 Subject: [PATCH 31/38] Pathmapper handles unified listing/secondaryFiles. --- cwltool/pathmapper.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 38a4f7075..43daa914a 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -66,25 +66,33 @@ def __init__(self, referenced_files, basedir, stagedir, separateDirs=True): self.separateDirs = separateDirs self.setup(referenced_files, basedir) - def visit(self, obj, stagedir, basedir, entryname=None): - if ob["class"] == "Directory": - if "location" in obj: - self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") - else: - self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), stagedir, "Directory") - for ld in obj["listing"]: + def visitlisting(self, listing, stagedir, basedir): + for ld in listing: + if "entryname" in ld: tgt = os.path.join(stagedir, ld["entryname"]) if isinstance(ld["entry"], (str, unicode)): self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "Copy") else: if ld["entry"]["class"] == "Directory": - self.visit(ld["entry"], tgt) - ab = ld["entry"]["location"] - if ab.startswith("file://"): - ab = ab[7:] - self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) - elif ob["class"] == "File": - path = ob["location"] + self.visit(ld["entry"], tgt, basedir) + else: + self.visit(ld["entry"], stagedir, basedir, entryname=ld["entryname"]) + #ab = ld["entry"]["location"] + #if ab.startswith("file://"): + # ab = ab[7:] + #self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) + elif ld.get("class") == "File": + self.visit(ld, stagedir, basedir) + + def visit(self, obj, stagedir, basedir, entryname=None): + if obj["class"] == "Directory": + if "location" in obj: + self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") + else: + self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), stagedir, "Directory") + self.visitlisting(obj.get("listing", []), stagedir, basedir) + elif obj["class"] == "File": + path = obj["location"] if path in self._pathmap: return ab = abspath(path, basedir) @@ -93,12 +101,7 @@ def visit(self, obj, stagedir, basedir, entryname=None): else: tgt = os.path.join(stagedir, os.path.basename(path)) self._pathmap[path] = MapperEnt(ab, tgt, "File") - if ob.get("secondaryFiles"): - for sf in obj["secondaryFiles"]: - if "entryname" in sf: - self.visit(sf["entry"], stagedir, basedir, entryname=sf["entryname"]) - else: - self.visit(sf, stagedir, basedir) + self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir) def setup(self, referenced_files, basedir): # type: (Set[Any], unicode) -> None From cb7f18c050334af7b964e84e03f25b975be0880f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 14:58:41 -0400 Subject: [PATCH 32/38] Squashed 'cwltool/schemas/' changes from a5976f3..c6538ab c6538ab Remove CreateFileRequirement. Add documentation to InitialWorkDirRequirement. Add "writable" flag to DirentExt. 8b48da6 Update stderr-shortcut tests. 5b56226 Update test git-subtree-dir: cwltool/schemas git-subtree-split: c6538ab6dab3b49eea14a946a696428336854dc3 --- draft-4/CommandLineTool.yml | 79 +++++++++++++-------------- draft-4/Process.yml | 31 ++++++++--- draft-4/conformance_test_draft-4.yaml | 65 +++++++++++++--------- draft-4/draft-4/dir4-job.yml | 2 +- 4 files changed, 100 insertions(+), 77 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 91332ef95..06eb86042 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -79,30 +79,6 @@ $graph: - {$include: concepts.md} - {$include: invocation.md} -- type: record - name: FileDef - doc: | - Define a file that must be placed in the designated output directory - prior to executing the command line tool. May be the result of executing - an expression, such as building a configuration file from a template. - fields: - - name: "filename" - type: ["string", "#Expression"] - doc: "The name of the file to create in the output directory." - - name: "fileContent" - type: ["string", "#Expression"] - doc: | - If the value is a string literal or an expression which evaluates to a - string, a new file must be created with the string as the file contents. - - If the value is an expression that evaluates to a File object, this - indicates the referenced file should be added to the designated output - directory prior to executing the tool. - - Files added in this way may be read-only, and may be provided - by bind mounts or file system links to avoid - unnecessary copying of the input file. - - type: record name: EnvironmentDef @@ -628,20 +604,53 @@ $graph: - name: DirentExt type: record + doc: | + Define a file or subdirectory that must be placed in the designated output + directory prior to executing the command line tool. May be the result of + executing an expression, such as building a configuration file from a + template. fields: - name: entryname type: [string, Expression] jsonldPredicate: _id: cwl:entryname + doc: | + The name of the file or subdirectory to create in the output directory. - name: entry type: [string, Expression] jsonldPredicate: _id: cwl:entry + doc: | + If the value is a string literal or an expression which evaluates to a + string, a new file must be created with the string as the file contents. + If the value is an expression that evaluates to a `File` object, this + indicates the referenced file should be added to the designated output + directory prior to executing the tool. + + If the value is an expression that evaluates to a `Dirent` object, this + indicates that the File or Directory in `entry` should be added to the + designated output directory with the name in `entryname`. + + If `writable` is false, the file may be made available using a bind + mount or file system link to avoid unnecessary copying of the input + file. + - name: writable + type: boolean? + doc: | + If true, the file or directory must be writable by the tool. Changes + to the file or directory must be isolated and not visible by any other + CommandLineTool process. This may be implemented by making a copy of + the original file or directory. Default false (files and directories + read-only by default). - name: InitialWorkDirRequirement type: record extends: ProcessRequirement + doc: + Define a list of files and subdirectories that must be created by the + workflow platform in the designated output directory prior to executing the + command line tool. fields: - name: listing type: @@ -653,24 +662,12 @@ $graph: _id: "cwl:listing" mapSubject: entryname mapPredicate: entry + doc: | + The list of files or subdirectories that must be placed in the + designated output directory prior to executing the command line tool. - -- name: CreateFileRequirement - type: record - extends: "#ProcessRequirement" - doc: | - *Deprecated*, superceded by InitialWorkDirRequirement. - - Define a list of files that must be created by the workflow - platform in the designated output directory prior to executing the command - line tool. See `FileDef` for details. - fields: - - name: fileDef - type: - type: "array" - items: "#FileDef" - doc: The list of files. - + May be an expression. If so, the expression return value must validate + as `{type: array, items: [File, Dirent]}`. - name: EnvVarRequirement type: record diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 86bc0302b..351bab9d2 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -74,6 +74,10 @@ $graph: implementation is unable to retrieve the file content stored at a remote resource (due to unsupported protocol, access denied, or other issue) it must signal an error. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. jsonldPredicate: _id: "@id" _type: "@id" @@ -81,9 +85,10 @@ $graph: type: string? doc: | The local path where the File is made available prior to executing a - CommandLineTool. This field must not be used in any other context. The - command line tool being executed must be able to to access the file at - `path` using the POSIX `open(2)` syscall. + CommandLineTool. This must be set by the implementation. This field + must not be used in any other context. The command line tool being + executed must be able to to access the file at `path` using the POSIX + `open(2)` syscall. jsonldPredicate: "_id": "cwl:path" "_type": "@id" @@ -211,16 +216,26 @@ $graph: A URI that identifies the directory resource. This may be a relative reference, in which case it must be resolved using the base URI of the document. The location may refer to a local or remote resource. If - the `listing` field is not set, the implementation must use the URI to - retrieve directory listing. If an implementation is unable to retrieve - the file content stored at a remote resource (due to unsupported - protocol, access denied, or other issue) it must signal an error. + the `listing` field is not set, the implementation must use the + location URI to retrieve directory listing. If an implementation is + unable to retrieve the directory listing stored at a remote resource (due to + unsupported protocol, access denied, or other issue) it must signal an + error. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. jsonldPredicate: _id: "@id" _type: "@id" - name: path type: string? - doc: The path to the directory. + doc: | + The local path where the Directory is made available prior to executing a + CommandLineTool. This must be set by the implementation. This field + must not be used in any other context. The command line tool being + executed must be able to to access the directory at `path` using the POSIX + `opendir(2)` syscall. jsonldPredicate: _id: "cwl:path" _type: "@id" diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index 23bb583eb..7769640b8 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -77,28 +77,39 @@ tool: draft-4/cat3-tool-mediumcut.cwl doc: Test command execution in Docker with stdout redirection -# - args: [egrep] -# stderr: error.txt -# job: -# tool: draft-4/egrep-stderr.cwl -# doc: Test command line with stderr redirection - -# - args: [egrep] -# job: -# tool: draft-4/egrep-stderr-shortcut.cwl -# doc: Test command line with stderr redirection, brief syntax - -# - args: [egrep] -# stderr: std.err -# output: -# output_file: -# class: File -# size: 84 -# checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 -# location: std.err -# job: -# tool: draft-4/egrep-stderr-mediumcut.cwl -# doc: Test command line with stderr redirection, named brief syntax +- job: + output: { + "output_file": { + "checksum": "sha1$cec7b8746a78c42060c96505887449bca0142976", + "size": 84, + "location": "error.txt", + "class": "File" + } + } + tool: draft-4/egrep-stderr.cwl + doc: Test command line with stderr redirection + +- job: + output: { + "output_file": { + "checksum": "sha1$cec7b8746a78c42060c96505887449bca0142976", + "size": 84, + "location": "", + "class": "File" + } + } + tool: draft-4/egrep-stderr-shortcut.cwl + doc: Test command line with stderr redirection, brief syntax + +- output: + output_file: + class: File + size: 84 + checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 + location: std.err + job: + tool: draft-4/egrep-stderr-mediumcut.cwl + doc: Test command line with stderr redirection, named brief syntax - job: draft-4/cat-job.json output: @@ -738,13 +749,14 @@ doc: Test directory input in Docker - job: draft-4/dir4-job.yml - output: + output: { "outlist": { - "size": 20, + "checksum": "sha1$2ab6f189e84753c05a23413fbf6b6fbf4c53489f", + "size": 90, "location": "output.txt", - "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592", "class": "File" } + } tool: draft-4/dir4.cwl doc: Test directories in secondaryFiles @@ -752,9 +764,8 @@ output: { "outlist": { "checksum": "sha1$907a866a3e0b7f1fc5a2222531c5fb9063704438", - "path": "/home/peter/work/common-workflow-language/draft-4/draft-4/output.txt", "size": 33, - "location": "file:///home/peter/work/common-workflow-language/draft-4/draft-4/output.txt", + "location": "output.txt", "class": "File" } } diff --git a/draft-4/draft-4/dir4-job.yml b/draft-4/draft-4/dir4-job.yml index 29d5c1116..8da860d27 100644 --- a/draft-4/draft-4/dir4-job.yml +++ b/draft-4/draft-4/dir4-job.yml @@ -4,7 +4,7 @@ inf: secondaryFiles: - class: File location: index.py - - entryname: testdir + - entryname: xtestdir entry: class: Directory location: testdir From 910725babdc95eba39d8ffc5f8ca5ef6945ee7eb Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 15:35:49 -0400 Subject: [PATCH 33/38] Add support for "writable" in InitialWorkDir --- cwltool/pathmapper.py | 15 +++++++++------ cwltool/process.py | 4 +++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 43daa914a..8455d648a 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -71,20 +71,20 @@ def visitlisting(self, listing, stagedir, basedir): if "entryname" in ld: tgt = os.path.join(stagedir, ld["entryname"]) if isinstance(ld["entry"], (str, unicode)): - self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "Copy") + self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "CreateFile") else: if ld["entry"]["class"] == "Directory": - self.visit(ld["entry"], tgt, basedir) + self.visit(ld["entry"], tgt, basedir, copy=ld.get("writable", False)) else: - self.visit(ld["entry"], stagedir, basedir, entryname=ld["entryname"]) + self.visit(ld["entry"], stagedir, basedir, entryname=ld["entryname"], copy=ld.get("writable", False)) #ab = ld["entry"]["location"] #if ab.startswith("file://"): # ab = ab[7:] #self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) elif ld.get("class") == "File": - self.visit(ld, stagedir, basedir) + self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) - def visit(self, obj, stagedir, basedir, entryname=None): + def visit(self, obj, stagedir, basedir, entryname=None, copy=False): if obj["class"] == "Directory": if "location" in obj: self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") @@ -100,7 +100,10 @@ def visit(self, obj, stagedir, basedir, entryname=None): tgt = os.path.join(stagedir, entryname) else: tgt = os.path.join(stagedir, os.path.basename(path)) - self._pathmap[path] = MapperEnt(ab, tgt, "File") + if copy: + self._pathmap[path] = MapperEnt(ab, tgt, "WritableFile") + else: + self._pathmap[path] = MapperEnt(ab, tgt, "File") self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir) def setup(self, referenced_files, basedir): diff --git a/cwltool/process.py b/cwltool/process.py index 141cc6909..11bc273ef 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -177,7 +177,9 @@ def stageFiles(pm, stageFunc): os.makedirs(os.path.dirname(p.target), 0755) if p.type == "File": stageFunc(p.resolved, p.target) - elif p.type == "Copy": + elif p.type == "WritableFile": + shutil.copy(p.resolved, p.target) + elif p.type == "CreateFile": with open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) From 2551f6a7229ca7b76d3cea15fdfbad035987143e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 15:35:52 -0400 Subject: [PATCH 34/38] Squashed 'cwltool/schemas/' changes from c6538ab..afe31c1 afe31c1 Delete obsolote initialWorkDir record from previous work on Directory implemention. git-subtree-dir: cwltool/schemas git-subtree-split: afe31c1abe44be891a6909f591dd6864dca9f984 --- draft-4/CommandLineTool.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 06eb86042..f33dadfc4 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -224,17 +224,6 @@ $graph: the File objects must include up to the first 64 KiB of file contents in the `contents` field. -- name: initialWorkDir - type: record - doc: | - Setup a working directory based on a number of input files (and/or directories) - fields: - - name: dirDef - type: - type: "array" - items: [ "#FileDef", "#File", "#Directory" ] - doc: list of files and/or directories - - name: CommandInputRecordField type: record From d2270a793d5c4e170c5377854ab9925b018db02c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 29 Jun 2016 16:38:35 -0400 Subject: [PATCH 35/38] Don't reset real files by accident. --- cwltool/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwltool/job.py b/cwltool/job.py index f7e73180b..1055c2dd2 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -241,7 +241,7 @@ def linkoutdir(src, tgt): def linkoutdir(src, tgt): # Need to make the link to the staged file (may be inside # the container) - if os.path.exists(tgt): + if os.path.exists(tgt) and os.path.islink(tgt): os.remove(tgt) os.symlink(src, tgt) stageFiles(generatemapper, linkoutdir) From 229565c50605f3f1bb406232cf594eb0e4c16304 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 30 Jun 2016 08:39:36 -0400 Subject: [PATCH 36/38] cwltest order-independent directory compare. --- cwltool/cwltest.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index d4760d1ef..615822b23 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -37,16 +37,30 @@ def compare(a, b): # type: (Any, Any) -> bool # ignore empty collections b = {k: v for k, v in b.iteritems() if not isinstance(v, (list, dict)) or len(v) > 0} + elif a.get("class") == "Directory": + if len(a["listing"]) != len(b["listing"]): + return False + for i in a["listing"]: + found = False + for j in b["listing"]: + try: + if compare(i, j): + found = True + break + except: + pass + if not found: + raise CompareFail(u"%s not in %s" % (json.dumps(i, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) a = {k: v for k, v in a.iteritems() - if k not in ("path", "location")} + if k not in ("path", "location", "listing")} b = {k: v for k, v in b.iteritems() - if k not in ("path", "location")} + if k not in ("path", "location", "listing")} if len(a) != len(b): raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) for c in a: - if a.get("class") != "File" or c not in ("path", "location"): + if a.get("class") != "File" or c not in ("path", "location", "listing"): if c not in b: raise CompareFail(u"%s not in %s" % (c, b)) if not compare(a[c], b[c]): From e70eeb8cf24e65807a069df1c9b2f429551e34e5 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 30 Jun 2016 10:21:34 -0400 Subject: [PATCH 37/38] Update scandeps to produce nested directory listing to correctly capture file structure. --- cwltool/main.py | 16 ++++++++-------- cwltool/process.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/cwltool/main.py b/cwltool/main.py index c240ca658..4679d2012 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -26,7 +26,7 @@ from . import workflow from .errors import WorkflowException, UnsupportedRequirement from .cwlrdf import printrdf, printdot -from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate +from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate, scandeps from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool from .builder import adjustFileObjs, adjustDirObjs @@ -423,29 +423,29 @@ def pathToLoc(p): return (job_order_object, input_basedir) -def printdeps(obj, document_loader, stdout, relative_deps, basedir=None): +def printdeps(obj, document_loader, stdout, relative_deps, uri, basedir=None): # type: (Dict[unicode, Any], Loader, IO[Any], bool, str) -> None deps = {"class": "File", - "path": obj.get("id", "#")} + "location": uri} def loadref(b, u): return document_loader.resolve_ref(u, base_url=b)[0] - sf = scandeps(basedir if basedir else obj["id"], obj, + sf = scandeps(basedir if basedir else uri, obj, set(("$import", "run")), - set(("$include", "$schemas", "path")), loadref) + set(("$include", "$schemas", "path", "location")), loadref) if sf: deps["secondaryFiles"] = sf if relative_deps: if relative_deps == "primary": - base = basedir if basedir else os.path.dirname(obj["id"]) + base = basedir if basedir else os.path.dirname(uri) elif relative_deps == "cwd": base = "file://" + os.getcwd() else: raise Exception(u"Unknown relative_deps %s" % relative_deps) def makeRelative(ob): - u = ob["location"] + u = ob.get("location", ob.get("path")) if ":" in u.split("/")[0] and not u.startswith("file://"): pass else: @@ -604,7 +604,7 @@ def main(argsl=None, document_loader, workflowobj, uri = fetch_document(args.workflow) if args.print_deps: - printdeps(workflowobj, document_loader, stdout, args.relative_deps) + printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ diff --git a/cwltool/process.py b/cwltool/process.py index 11bc273ef..7e125280f 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -549,6 +549,41 @@ def uniquename(stem): # type: (unicode) -> unicode _names.add(u) return u +def nestdir(base, deps): + dirname = os.path.dirname(base) + "/" + subid = deps["location"] + if subid.startswith(dirname): + s2 = subid[len(dirname):] + sp = s2.split('/') + sp.pop() + while sp: + nx = sp.pop() + deps = { + "entryname": nx, + "entry": { + "class": "Directory", + "listing": [deps] + } + } + return deps + +def mergedirs(listing): + r = [] + ents = {} + for e in listing: + if "entryname" in e: + if e["entryname"] not in ents: + ents[e["entryname"]] = e + elif e["entry"]["class"] == "Directory": + ents[e["entryname"]]["entry"]["listing"].extend(e["entry"]["listing"]) + else: + r.append(e) + for e in ents.itervalues(): + if e["entry"]["class"] == "Directory": + e["entry"]["listing"] = mergedirs(e["entry"]["listing"]) + r.extend(ents.itervalues()) + return r + def scandeps(base, doc, reffields, urlfields, loadref): # type: (unicode, Any, Set[str], Set[str], Callable[[unicode, str], Any]) -> List[Dict[str, str]] r = [] @@ -574,20 +609,25 @@ def scandeps(base, doc, reffields, urlfields, loadref): deps = { "class": "File", "location": subid - } # type: Dict[str, Any] + } # type: Dict[str, Any] sf = scandeps(subid, sub, reffields, urlfields, loadref) if sf: deps["secondaryFiles"] = sf + deps = nestdir(base, deps) r.append(deps) elif k in urlfields: for u in aslist(v): - r.append({ + deps = { "class": "File", "location": urlparse.urljoin(base, u) - }) + } + deps = nestdir(base, deps) + r.append(deps) else: r.extend(scandeps(base, v, reffields, urlfields, loadref)) elif isinstance(doc, list): for d in doc: r.extend(scandeps(base, d, reffields, urlfields, loadref)) + + r = mergedirs(r) return r From 6f9c2f8ef6a681bfbe67bf4be61a5890d2f68ef2 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 30 Jun 2016 10:52:38 -0400 Subject: [PATCH 38/38] s/WHITELIST/ACCEPTLIST/g --- cwltool/draft2tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index fe8daeb94..d74199b31 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -26,7 +26,7 @@ from .pathmapper import PathMapper from .job import CommandLineJob -WHITELIST_RE = re.compile(r"^[a-zA-Z0-9._-]+$") +ACCEPTLIST_RE = re.compile(r"^[a-zA-Z0-9._-]+$") from .flatten import flatten @@ -244,7 +244,7 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] f["dirname"], f["basename"] = os.path.split(f["path"]) if f["class"] == "File": f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) - if not WHITELIST_RE.match(f["basename"]): + if not ACCEPTLIST_RE.match(f["basename"]): raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"])) return f