diff --git a/docs/jq.md b/docs/jq.md index 0927a90fa..6a65ebe5b 100644 --- a/docs/jq.md +++ b/docs/jq.md @@ -144,7 +144,7 @@ Invoke jq with a filter on a set of json input files. | args | Additional args to pass to jq | `[]` | | out | Name of the output json file; defaults to the rule name plus ".json" | `None` | | data | List of additional files. May be empty. | `[]` | -| expand_args | Run bazel's location-expansion on the args. | `False` | +| expand_args | Run bazel's location and make variable expansion on the args. | `False` | | kwargs | Other common named parameters such as `tags` or `visibility` | none | diff --git a/docs/strings.md b/docs/strings.md index 08daf4a82..04b0289c0 100644 --- a/docs/strings.md +++ b/docs/strings.md @@ -80,3 +80,29 @@ Unicode replacement character, U+FFFD. codepoint of `c` argument. + + +## split_args + +
+split_args(s)
+
+ +Split a string into a list space separated arguments + +Unlike the naive `.split(" ")`, this function takes quoted strings +and escapes into account. + + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| s | input string | none | + +**RETURNS** + +list of strings with each an argument found in the input string + + diff --git a/lib/jq.bzl b/lib/jq.bzl index c8b01c5a8..b0993ee5b 100644 --- a/lib/jq.bzl +++ b/lib/jq.bzl @@ -144,7 +144,7 @@ def jq(name, srcs, filter = None, filter_file = None, args = [], out = None, dat filter_file: File containing filter expression (alternative to `filter`) args: Additional args to pass to jq - expand_args: Run bazel's location-expansion on the args. + expand_args: Run bazel's location and make variable expansion on the args. out: Name of the output json file; defaults to the rule name plus ".json" **kwargs: Other common named parameters such as `tags` or `visibility` """ diff --git a/lib/private/BUILD.bazel b/lib/private/BUILD.bazel index e5a4bb986..384aa236e 100644 --- a/lib/private/BUILD.bazel +++ b/lib/private/BUILD.bazel @@ -160,6 +160,8 @@ bzl_library( srcs = ["jq.bzl"], visibility = ["//lib:__subpackages__"], deps = [ + ":expand_variables", + ":strings", "//lib:stamping", ], ) @@ -182,6 +184,10 @@ bzl_library( name = "params_file", srcs = ["params_file.bzl"], visibility = ["//lib:__subpackages__"], + deps = [ + ":expand_variables", + ":strings", + ], ) bzl_library( @@ -204,6 +210,7 @@ bzl_library( visibility = ["//lib:__subpackages__"], deps = [ ":expand_variables", + ":strings", "//lib:stamping", "@bazel_skylib//lib:dicts", ], diff --git a/lib/private/bats.bzl b/lib/private/bats.bzl index f849a9425..65de719a7 100644 --- a/lib/private/bats.bzl +++ b/lib/private/bats.bzl @@ -41,7 +41,7 @@ def _bats_test_impl(ctx): for (key, value) in ctx.attr.env.items(): envs.append(_ENV_SET.format( key = key, - value = " ".join([expand_variables(ctx, exp, attribute_name = "env") for exp in ctx.expand_location(value, targets = ctx.attr.data).split(" ")]), + value = expand_variables(ctx, ctx.expand_location(value, targets = ctx.attr.data), attribute_name = "env"), )) # See https://www.msys2.org/wiki/Porting/: diff --git a/lib/private/expand_template.bzl b/lib/private/expand_template.bzl index c2d99baa3..88b326893 100644 --- a/lib/private/expand_template.bzl +++ b/lib/private/expand_template.bzl @@ -2,15 +2,12 @@ load("@bazel_skylib//lib:dicts.bzl", "dicts") load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") -load(":expand_variables.bzl", _expand_variables = "expand_variables") +load(":expand_variables.bzl", "expand_variables") def _expand_substitutions(ctx, output, substitutions): result = {} for k, v in substitutions.items(): - result[k] = " ".join([ - _expand_variables(ctx, e, outs = [output], attribute_name = "substitutions") - for e in ctx.expand_location(v, targets = ctx.attr.data).split(" ") - ]) + result[k] = expand_variables(ctx, ctx.expand_location(v, targets = ctx.attr.data), outs = [output], attribute_name = "substitutions") return result def _expand_template_impl(ctx): diff --git a/lib/private/jq.bzl b/lib/private/jq.bzl index 4f903b904..97815fac3 100644 --- a/lib/private/jq.bzl +++ b/lib/private/jq.bzl @@ -1,6 +1,8 @@ """Implementation for jq rule""" load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") +load(":expand_variables.bzl", "expand_variables") +load(":strings.bzl", "split_args") _jq_attrs = dict({ "srcs": attr.label_list( @@ -22,12 +24,6 @@ _jq_attrs = dict({ ), }, **STAMP_ATTRS) -def _expand_locations(ctx, s): - # `.split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309 - # TODO: If the string has intentional spaces or if one or more of the expanded file - # locations has a space in the name, we will incorrectly split it into multiple arguments - return ctx.expand_location(s, targets = ctx.attr.data).split(" ") - def _jq_impl(ctx): jq_bin = ctx.toolchains["@aspect_bazel_lib//lib:jq_toolchain_type"].jqinfo.bin @@ -35,7 +31,7 @@ def _jq_impl(ctx): if ctx.attr.expand_args: args = [] for a in ctx.attr.args: - args += _expand_locations(ctx, a) + args += split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.data), outs = [out])) else: args = ctx.attr.args @@ -52,7 +48,7 @@ def _jq_impl(ctx): args = args + ["--null-input"] if ctx.attr.filter_file: - args = args + ["--from-file '%s'" % ctx.file.filter_file.path] + args = args + ["--from-file", ctx.file.filter_file.path] inputs.append(ctx.file.filter_file) stamp = maybe_stamp(ctx) @@ -76,9 +72,16 @@ def _jq_impl(ctx): args = args + ["--slurpfile", "STAMP", stamp_json.path] + # quote args that contain spaces + quoted_args = [] + for a in args: + if " " in a: + a = "'{}'".format(a) + quoted_args.append(a) + cmd = "{jq} {args} {filter} {sources} > {out}".format( jq = jq_bin.path, - args = " ".join(args), + args = " ".join(quoted_args), filter = "'%s'" % ctx.attr.filter if ctx.attr.filter else "", sources = " ".join(["'%s'" % file.path for file in ctx.files.srcs]), out = out.path, diff --git a/lib/private/params_file.bzl b/lib/private/params_file.bzl index 5a279f299..7dc617143 100644 --- a/lib/private/params_file.bzl +++ b/lib/private/params_file.bzl @@ -1,5 +1,8 @@ "params_file rule" +load(":expand_variables.bzl", "expand_variables") +load(":strings.bzl", "split_args") + _ATTRS = { "args": attr.string_list(), "data": attr.label_list(allow_files = True), @@ -11,12 +14,6 @@ _ATTRS = { "_windows_constraint": attr.label(default = "@platforms//os:windows"), } -def _expand_locations(ctx, s): - # `.split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309 - # TODO: If the string has intentional spaces or if one or more of the expanded file - # locations has a space in the name, we will incorrectly split it into multiple arguments - return ctx.expand_location(s, targets = ctx.attr.data).split(" ") - def _params_file_impl(ctx): is_windows = ctx.target_platform_has_constraint(ctx.attr._windows_constraint[platform_common.ConstraintValueInfo]) @@ -29,12 +26,9 @@ def _params_file_impl(ctx): expanded_args = [] - # First expand predefined source/output path variables + # Expand predefined source/output path && predefined variables & custom variables for a in ctx.attr.args: - expanded_args += _expand_locations(ctx, a) - - # Next expand predefined variables & custom variables - expanded_args = [ctx.expand_make_variables("args", e, {}) for e in expanded_args] + expanded_args += split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.data), outs = [ctx.outputs.out])) # ctx.actions.write creates a FileWriteAction which uses UTF-8 encoding. ctx.actions.write( diff --git a/lib/private/run_binary.bzl b/lib/private/run_binary.bzl index 77f3875f5..41e611f18 100644 --- a/lib/private/run_binary.bzl +++ b/lib/private/run_binary.bzl @@ -17,6 +17,7 @@ load("@bazel_skylib//lib:dicts.bzl", "dicts") load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") load(":expand_variables.bzl", "expand_variables") +load(":strings.bzl", "split_args") def _run_binary_impl(ctx): args = ctx.actions.args() @@ -46,15 +47,11 @@ Possible fixes: rule_kind = str(ctx.attr.tool.label), )) - # `expand_locations(...).split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309 - # _expand_locations returns an array of args to support $(execpaths) expansions. - # TODO: If the string has intentional spaces or if one or more of the expanded file - # locations has a space in the name, we will incorrectly split it into multiple arguments for a in ctx.attr.args: - args.add_all([expand_variables(ctx, e, outs = outputs) for e in ctx.expand_location(a, targets = ctx.attr.srcs).split(" ")]) + args.add_all(split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.srcs), outs = outputs))) envs = {} for k, v in ctx.attr.env.items(): - envs[k] = " ".join([expand_variables(ctx, e, outs = outputs, attribute_name = "env") for e in ctx.expand_location(v, targets = ctx.attr.srcs).split(" ")]) + envs[k] = expand_variables(ctx, ctx.expand_location(v, targets = ctx.attr.srcs), outs = outputs, attribute_name = "env") stamp = maybe_stamp(ctx) if stamp: diff --git a/lib/private/strings.bzl b/lib/private/strings.bzl index c0fe54119..acf5157ff 100644 --- a/lib/private/strings.bzl +++ b/lib/private/strings.bzl @@ -583,3 +583,73 @@ def hex(number): hex_string = "0" return "{}0x{}".format("-" if is_signed else "", hex_string) + +def split_args(s): + """Split a string into a list space separated arguments + + Unlike the naive `.split(" ")`, this function takes quoted strings + and escapes into account. + + Args: + s: input string + + Returns: + list of strings with each an argument found in the input string + """ + args = [] + arg = "" + single_quote = False + double_quote = False + escape = False + for c in s.elems(): + if c == "\\": + escape = True + continue + if escape: + # this is an escaped character + if c == " ": + # a dangling escape is not an escape, put the backslack back + arg = arg + "\\" + else: + escape = False + else: + # not an escaped character, look for quotes & spaces + if c == "'": + # single quote char + if double_quote: + # we're in a double quote so single quotes are just chars + pass + elif single_quote: + # end of single quote + single_quote = False + continue + else: + # start of single quote + single_quote = True + continue + elif c == "\"": + # double quote char + if single_quote: + # we're in a single quote so double quotes are just chars + pass + elif double_quote: + # end of double quote + double_quote = False + continue + else: + # start of double quote + double_quote = True + continue + if c == " ": + if not single_quote and not double_quote: + # splitting space + if arg != "": + args.append(arg) + arg = "" + continue + arg = arg + c + + # final arg? + if arg != "": + args.append(arg) + return args diff --git a/lib/strings.bzl b/lib/strings.bzl index 8ae55f0ce..12d4850a3 100644 --- a/lib/strings.bzl +++ b/lib/strings.bzl @@ -1,7 +1,8 @@ "Utilities for strings" -load("//lib/private:strings.bzl", _chr = "chr", _hex = "hex", _ord = "ord") +load("//lib/private:strings.bzl", _chr = "chr", _hex = "hex", _ord = "ord", _split_args = "split_args") chr = _chr ord = _ord hex = _hex +split_args = _split_args diff --git a/lib/tests/strings_tests.bzl b/lib/tests/strings_tests.bzl index a156dde80..177dae286 100644 --- a/lib/tests/strings_tests.bzl +++ b/lib/tests/strings_tests.bzl @@ -2,7 +2,7 @@ load("@bazel_skylib//lib:partial.bzl", "partial") load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest") -load("//lib/private:strings.bzl", "chr", "hex", "ord") +load("//lib/private:strings.bzl", "chr", "hex", "ord", "split_args") def _ord_test_impl(ctx): env = unittest.begin(ctx) @@ -56,10 +56,38 @@ def _hex_test_impl(ctx): hex_test = unittest.make(_hex_test_impl) +def _split_args_test_impl(ctx): + env = unittest.begin(ctx) + + asserts.equals(env, ["a", "b", "c", "d"], split_args("a b c d")) + + # sinle quotes + asserts.equals(env, ["a", "b c", "d"], split_args("a 'b c' d")) + + # double quotes + asserts.equals(env, ["a", "b c", "d"], split_args("a \"b c\" d")) + + # escaped single quotes + asserts.equals(env, ["a", "'b", "c'", "d"], split_args("a \\'b c\\' d")) + + # escaped double quotes + asserts.equals(env, ["a", "\"b", "c\"", "d"], split_args("a \\\"b c\\\" d")) + + # sinle quotes containing escaped quotes + asserts.equals(env, ["a", "b'\" c", "d"], split_args("a 'b\\'\\\" c' d")) + + # double quotes containing escaped quotes + asserts.equals(env, ["a", "b'\" c", "d"], split_args("a \"b\\'\\\" c\" d")) + + return unittest.end(env) + +split_args_test = unittest.make(_split_args_test_impl) + def strings_test_suite(): unittest.suite( "strings_tests", partial.make(ord_test, timeout = "short"), partial.make(chr_test, timeout = "short"), partial.make(hex_test, timeout = "short"), + partial.make(split_args_test, timeout = "short"), )