From 3abd5c0fcb4330eedcc130e55115162815360bb3 Mon Sep 17 00:00:00 2001 From: Anders Eknert Date: Wed, 17 Jul 2024 16:41:48 +0200 Subject: [PATCH] Rule: unused-output-variable (#922) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the rules I've wanted since the project was started, but not until now would this have been wildly expensive to add. With the recent fixes that allow us to reuse the result of `walk`ing the AST, that's no longer the case. Still, this required quite some time and effort to get right! NOTE: this rule currently only considers output variables as found in references, i.e. `x` in `input[x]` and NOT `x` in `some x, _ in input`. While that would be easy to add, I'm not sure if those are really "output variables" or should be considered as such... 🤔 or if we should just have another rules for "unused iteration variable" or something like that. Either way, this is a big step forward! Fixes #60 Signed-off-by: Anders Eknert --- README.md | 3 +- bundle/regal/ast/ast.rego | 58 +++++++--- bundle/regal/ast/search.rego | 25 +++- bundle/regal/config/provided/data.yaml | 2 + .../rules/bugs/unused_output_variable.rego | 84 ++++++++++++++ .../bugs/unused_output_variable_test.rego | 109 ++++++++++++++++++ docs/rules/bugs/unused-output-variable.md | 103 +++++++++++++++++ e2e/testdata/violations/most_violations.rego | 5 + internal/lsp/examples/examples.go | 3 +- 9 files changed, 371 insertions(+), 21 deletions(-) create mode 100644 bundle/regal/rules/bugs/unused_output_variable.rego create mode 100644 bundle/regal/rules/bugs/unused_output_variable_test.rego create mode 100644 docs/rules/bugs/unused-output-variable.md diff --git a/README.md b/README.md index 7d2ab7a2..c18ea75b 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,7 @@ The following rules are currently available: | bugs | [rule-shadows-builtin](https://docs.styra.com/regal/rules/bugs/rule-shadows-builtin) | Rule name shadows built-in | | bugs | [top-level-iteration](https://docs.styra.com/regal/rules/bugs/top-level-iteration) | Iteration in top-level assignment | | bugs | [unassigned-return-value](https://docs.styra.com/regal/rules/bugs/unassigned-return-value) | Non-boolean return value unassigned | +| bugs | [unused-output-variable](https://docs.styra.com/regal/rules/bugs/unused-output-variable) | Unused output variable | | bugs | [var-shadows-builtin](https://docs.styra.com/regal/rules/bugs/var-shadows-builtin) | Variable name shadows built-in | | bugs | [zero-arity-function](https://docs.styra.com/regal/rules/bugs/zero-arity-function) | Avoid functions without args | | custom | [forbidden-function-call](https://docs.styra.com/regal/rules/custom/forbidden-function-call) | Forbidden function call | @@ -696,7 +697,7 @@ in the near future: - [ ] Allow remediation of more `style` category rules using the `regal fix` command - [ ] Add [unused-rule](https://github.com/StyraInc/regal/issues/358) linter -- [ ] Add [unused-output-variable](https://github.com/StyraInc/regal/issues/60) linter +- [x] Add [unused-output-variable](https://github.com/StyraInc/regal/issues/60) linter ### Language Server diff --git a/bundle/regal/ast/ast.rego b/bundle/regal/ast/ast.rego index 46efe394..c1219374 100644 --- a/bundle/regal/ast/ast.rego +++ b/bundle/regal/ast/ast.rego @@ -31,12 +31,7 @@ is_constant(value) if value.type in scalar_types is_constant(value) if { value.type in {"array", "object"} - not has_var(value) -} - -has_var(value) if { - walk(value.value, [_, term]) - term.type == "var" + not has_term_var(value.value) } builtin_names := object.keys(config.capabilities.builtins) @@ -119,7 +114,7 @@ functions := [rule | ] # METADATA -# description: a list of the names for the giiven rule (if function) +# description: a list of the argument names for the given rule (if function) function_arg_names(rule) := [arg.value | some arg in rule.head.args] # METADATA @@ -134,11 +129,6 @@ identifiers := rule_and_function_names | imported_identifiers # description: all rule names in the input AST (excluding functions) rule_names contains ref_to_string(rule.head.ref) if some rule in rules -_function_arg_names(rule) := {arg.value | - some arg in rule.head.args - arg.type == "var" -} - # METADATA # description: | # determine if var in ref (e.g. `x` in `input[x]`) is used as input or output @@ -157,12 +147,52 @@ is_ref(value) if value.type == "ref" is_ref(value) if value[0].type == "ref" -all_rules_refs contains value if { - walk(input.rules, [_, value]) +refs[rule_index] contains value if { + some i, rule in _rules + + # converting to string until https://github.com/open-policy-agent/opa/issues/6736 is fixed + rule_index := sprintf("%d", [i]) + + walk(rule, [_, value]) is_ref(value) } +# METADATA +# description: | +# a map containing all function calls (built-in and custom) in the input AST +# keyed by rule index +function_calls[rule_index] contains call if { + some i, rule in _rules + + # converting to string until https://github.com/open-policy-agent/opa/issues/6736 is fixed + rule_index := sprintf("%d", [i]) + + some ref in refs[rule_index] + + name := ref_to_string(ref[0].value) + args := [arg | + some i, arg in array.slice(ref, 1, 100) + + not _exclude_arg(name, i, arg) + ] + + call := { + "name": ref_to_string(ref[0].value), + "location": ref[0].location, + "args": args, + } +} + +# these will be aggregated as calls anyway, so let's try and keep this flat +_exclude_arg(_, _, arg) if arg.type == "call" + +# first "arg" of assign is the variable to assign to.. special case we simply +# ignore here, as it's covered elsewhere +_exclude_arg("assign", 0, _) + +all_rules_refs contains refs[_][_] + # METADATA # title: all_refs # description: set containing all references found in the input AST diff --git a/bundle/regal/ast/search.rego b/bundle/regal/ast/search.rego index 1c1623c0..2adf1875 100644 --- a/bundle/regal/ast/search.rego +++ b/bundle/regal/ast/search.rego @@ -71,14 +71,24 @@ _find_every_vars(value) := var if { # METADATA # description: | -# traverses all nodes in provided term (using `walk`), and returns an array with -# all variables declared in term, i,e [x, y] or {x: y}, etc. -find_term_vars(term) := [value | - walk(term, [_, value]) +# traverses all nodes in provided terms (using `walk`), and returns an array with +# all variables declared in terms, i,e [x, y] or {x: y}, etc. +find_term_vars(terms) := [term | + walk(terms, [_, term]) - value.type == "var" + term.type == "var" ] +# METADATA +# description: | +# traverses all nodes in provided terms (using `walk`), and returns true if any variable +# is found in terms, with early exit (as opposed to find_term_vars) +has_term_var(terms) if { + walk(terms, [_, term]) + + term.type == "var" +} + _find_vars(value, last) := {"term": find_term_vars(function_ret_args(fn_name, value))} if { last == "terms" value[0].type == "ref" @@ -223,6 +233,11 @@ find_names_in_local_scope(rule, location) := names if { names := fn_arg_names | var_names } +_function_arg_names(rule) := {arg.value | + some arg in rule.head.args + arg.type == "var" +} + # METADATA # description: | # similar to `find_vars_in_local_scope`, but returns all variable names in scope diff --git a/bundle/regal/config/provided/data.yaml b/bundle/regal/config/provided/data.yaml index 593f6b74..c36e54b4 100644 --- a/bundle/regal/config/provided/data.yaml +++ b/bundle/regal/config/provided/data.yaml @@ -40,6 +40,8 @@ rules: level: error unassigned-return-value: level: error + unused-output-variable: + level: error var-shadows-builtin: level: error zero-arity-function: diff --git a/bundle/regal/rules/bugs/unused_output_variable.rego b/bundle/regal/rules/bugs/unused_output_variable.rego new file mode 100644 index 00000000..76d7bf8b --- /dev/null +++ b/bundle/regal/rules/bugs/unused_output_variable.rego @@ -0,0 +1,84 @@ +# METADATA +# description: Unused output variable +package regal.rules.bugs["unused-output-variable"] + +import rego.v1 + +import data.regal.ast +import data.regal.result + +# METADATA +# description: | +# The `report` set contains unused output vars from `some` declarations. Using a +# stricter ruleset than OPA, Regal considers an output var "unused" if it's used +# only once in a ref, as that usage may just as well be replaced by a wildcard. +# ``` +# some y +# x := data.foo.bar[y] +# # y not used later +# ``` +# Would better be written as: +# ``` +# some x in data.foo.bar +# ``` +report contains violation if { + some rule_index, name + + var_refs := _ref_vars[rule_index][name] + + count(var_refs) == 1 + + some var in var_refs + + not _var_in_head(input.rules[to_number(rule_index)].head, var.value) + not _var_in_call(ast.function_calls, rule_index, var.value) + + violation := result.fail(rego.metadata.chain(), result.ranged_location_from_text(var)) +} + +_ref_vars[rule_index][var.value] contains var if { + some rule_index + var := ast.vars[rule_index].ref[_] + + not startswith(var.value, "$") +} + +_var_in_head(head, name) if head.value.value == name + +_var_in_head(head, name) if { + some var in ast.find_term_vars(head.value.value) + + var.value == name +} + +_var_in_head(head, name) if head.key.value == name + +_var_in_head(head, name) if { + some var in ast.find_term_vars(head.key.value) + + var.value == name +} + +_var_in_call(calls, rule_index, name) if _var_in_arg(calls[rule_index][_].args[_], name) + +_var_in_arg(arg, name) if { + arg.type == "var" + arg.value == name +} + +_var_in_arg(arg, name) if { + arg.type == "ref" + + some val in arg.value + + val.type == "var" + val.value == name +} + +_var_in_arg(arg, name) if { + arg.type in {"array", "object"} + + some var in ast.find_term_vars(arg) + + var.value == name +} diff --git a/bundle/regal/rules/bugs/unused_output_variable_test.rego b/bundle/regal/rules/bugs/unused_output_variable_test.rego new file mode 100644 index 00000000..217ffedd --- /dev/null +++ b/bundle/regal/rules/bugs/unused_output_variable_test.rego @@ -0,0 +1,109 @@ +package regal.rules.bugs["unused-output-variable_test"] + +import rego.v1 + +import data.regal.ast +import data.regal.config + +import data.regal.rules.bugs["unused-output-variable"] as rule + +test_fail_unused_output_variable if { + module := ast.with_rego_v1(` + fail if { + input[x] + } + `) + + r := rule.report with input as module + r == {{ + "category": "bugs", + "description": "Unused output variable", + "level": "error", + "location": {"col": 9, "end": {"col": 10, "row": 7}, "file": "policy.rego", "row": 7, "text": "\t\tinput[x]"}, + "related_resources": [{ + "description": "documentation", + "ref": config.docs.resolve_url("$baseUrl/$category/unused-output-variable", "bugs"), + }], + "title": "unused-output-variable", + }} +} + +test_fail_unused_output_variable_some if { + module := ast.with_rego_v1(` + fail if { + some xx + input[xx] + } + `) + + r := rule.report with input as module + r == {{ + "category": "bugs", + "description": "Unused output variable", + "level": "error", + "location": {"col": 9, "end": {"col": 11, "row": 8}, "file": "policy.rego", "row": 8, "text": "\t\tinput[xx]"}, + "related_resources": [{ + "description": "documentation", + "ref": config.docs.resolve_url("$baseUrl/$category/unused-output-variable", "bugs"), + }], + "title": "unused-output-variable", + }} +} + +test_success_unused_wildcard if { + module := ast.with_rego_v1(` + success if { + input[_] + } + `) + + r := rule.report with input as module + r == set() +} + +test_success_not_unused_variable_in_head_value if { + module := ast.with_rego_v1(` + success := x if { + input[x] + } + `) + + r := rule.report with input as module + r == set() +} + +test_success_not_unused_variable_in_head_key if { + module := ast.with_rego_v1(` + success contains x if { + input[x] + } + `) + + r := rule.report with input as module + r == set() +} + +test_success_not_unused_output_variable_function_call if { + module := ast.with_rego_v1(` + success if { + some x + input[x] + regex.match("[x]", x) + } + `) + + r := rule.report with input as module + r == set() +} + +test_success_not_unused_output_variable_other_ref if { + module := ast.with_rego_v1(` + success if { + some x + input[x] == data.foo[x] + } + `) + + r := rule.report with input as module + r == set() +} diff --git a/docs/rules/bugs/unused-output-variable.md b/docs/rules/bugs/unused-output-variable.md new file mode 100644 index 00000000..8b4e42f1 --- /dev/null +++ b/docs/rules/bugs/unused-output-variable.md @@ -0,0 +1,103 @@ +# unused-output-variable + +**Summary**: Unused output variable + +**Category**: Bugs + +**Avoid** +```rego +package policy + +allow if { + some x + role := input.user.roles[x] + + # do something with "role", but not "x" +} +``` + +**Prefer** +```rego +package policy + +allow if { + # don't declare `x` output var as it is redundant + role := input.user.roles[_] + + # do something with "role" +} + +# or better (see prefer-some-in-iteration rule) + +allow if { + some role in input.user.roles + + # do something with "role" +} + +# or actually _use_ value bound to `x` somewhere, like in another +# reference, function call, etc + +allow if { + some x + input.user.roles[x] == data.required_roles[x] +} +``` + +## Rationale + +Output variables are variables "automatically" bound to values during evaluation, most commonly in iteration. This is +a powerful feature of Rego that when used correctly can create concise but readable policies. However, output variables +that are declared but not later referenced are _effectively_ unused and should be replaced by wildcard variables (`_`), +or the use of `some .. in` iteration. + +OPA itself has two methods for detecting and reporting unused variables as errors — one when using `some`: + +```rego +allow if { + # `x` is never used in the body — this is a compiler error + some x + input.user.roles[role] + + role == "admin" +} +``` + +And a [strict mode](https://www.openpolicyagent.org/docs/latest/policy-language/#strict-mode) check for unused +variables defined in assignment (`:=`), or as a function arguments: + +```rego +allow(role, required) { + required_roles := data.required_roles + + role == "admin" + + # `required` never used in body, and neither is `required_roles` + # both would be errors when strict mode is enabled +} +``` + +Neither of these methods however considers an unused output variable as "unused". + +## Configuration Options + +This linter rule provides the following configuration options: + +```yaml +rules: + bugs: + unused-output-variable: + # one of "error", "warning", "ignore" + level: error +``` + +## Related Resources + +- Regal Docs: [prefer-some-in-iteration](https://docs.styra.com/regal/rules/style/prefer-some-in-iteration) +- OPA Docs: [Strict Mode](https://www.openpolicyagent.org/docs/latest/policy-language/#strict-mode) + +## Community + +If you think you've found a problem with this rule or its documentation, would like to suggest improvements, new rules, +or just talk about Regal in general, please join us in the `#regal` channel in the Styra Community +[Slack](https://communityinviter.com/apps/styracommunity/signup)! diff --git a/e2e/testdata/violations/most_violations.rego b/e2e/testdata/violations/most_violations.rego index 13daf2d9..46e483b4 100644 --- a/e2e/testdata/violations/most_violations.rego +++ b/e2e/testdata/violations/most_violations.rego @@ -100,6 +100,11 @@ some_rule := true var_shadows_builtin if http := true +unused_output_variable if { + some x + input[x] +} + ### Idiomatic ### custom_has_key_construct(map, key) if { diff --git a/internal/lsp/examples/examples.go b/internal/lsp/examples/examples.go index e04415b0..d3749b96 100644 --- a/internal/lsp/examples/examples.go +++ b/internal/lsp/examples/examples.go @@ -1,9 +1,10 @@ package examples import ( - _ "embed" "encoding/json" "fmt" + + _ "embed" ) // GetBuiltInLink returns the URL for the built-in function documentation