From b3c2281219893ee422e18f9d3917e8cc8216114d Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 20 Sep 2023 21:08:13 +0200 Subject: [PATCH 1/9] lib.fileset: Order noEval last --- lib/fileset/internal.nix | 12 ++++++++---- lib/fileset/tests.sh | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index b3dbc5b33dc3f..43e6dd31b7110 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -114,8 +114,10 @@ rec { # The one and only! _internalIsEmptyWithoutBase = true; - # Double __ to make it be evaluated and ordered first - __noEval = throw _noEvalMessage; + # Due to alphabetical ordering, this is evaluated last, + # which makes the nix repl output nicer than if it would be ordered first. + # It also allows evaluating it strictly up to this error, which could be useful + _noEval = throw _noEvalMessage; }; # Create a fileset, see ./README.md#fileset @@ -137,8 +139,10 @@ rec { _internalBaseComponents = components parts.subpath; _internalTree = tree; - # Double __ to make it be evaluated and ordered first - __noEval = throw _noEvalMessage; + # Due to alphabetical ordering, this is evaluated last, + # which makes the nix repl output nicer than if it would be ordered first. + # It also allows evaluating it strictly up to this error, which could be useful + _noEval = throw _noEvalMessage; }; # Coerce a value to a fileset, erroring when the value cannot be coerced. diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 525b8aaa91707..1b53e33073ef1 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -84,14 +84,14 @@ expectStorePath() { crudeUnquoteJSON <<< "$result" } -# Check that a nix expression fails to evaluate (strictly, coercing to json, read-write-mode). +# Check that a nix expression fails to evaluate (strictly, read-write-mode). # And check the received stderr against a regex # The expression has `lib.fileset` in scope. # Usage: expectFailure NIX REGEX expectFailure() { local expr=$1 local expectedErrorRegex=$2 - if result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace 2>"$tmp/stderr" \ + if result=$(nix-instantiate --eval --strict --read-write-mode --show-trace 2>"$tmp/stderr" \ --expr "$prefixExpression $expr"); then die "$expr evaluated successfully to $result, but it was expected to fail" fi From 86802e19ceffbdff6a2a80dbec2d5f14f9f40efa Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 21 Sep 2023 00:59:09 +0200 Subject: [PATCH 2/9] lib.fileset: _simplifyTree -> _normaliseTreeFilter --- lib/fileset/internal.nix | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 43e6dd31b7110..ad10b9ac0bbed 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -241,22 +241,22 @@ rec { // value; /* - Simplify a filesetTree recursively: - - Replace all directories that have no files with `null` + A normalisation of a filesetTree suitable for use in builtins.path-based filtering: + - Replace all directories that have no files with `null`. This removes directories that would be empty - - Replace all directories with all files with `"directory"` + - Replace all directories with all files with `"directory"`. This speeds up the source filter function Note that this function is strict, it evaluates the entire tree Type: Path -> filesetTree -> filesetTree */ - _simplifyTree = path: tree: + _normaliseTreeFilter = path: tree: if tree == "directory" || isAttrs tree then let entries = _directoryEntries path tree; - simpleSubtrees = mapAttrs (name: _simplifyTree (path + "/${name}")) entries; - subtreeValues = attrValues simpleSubtrees; + normalisedSubtrees = mapAttrs (name: _normaliseTreeFilter (path + "/${name}")) entries; + subtreeValues = attrValues normalisedSubtrees; in # This triggers either when all files in a directory are filtered out # Or when the directory doesn't contain any files at all @@ -266,7 +266,7 @@ rec { else if all isString subtreeValues then "directory" else - simpleSubtrees + normalisedSubtrees else tree; @@ -277,7 +277,7 @@ rec { let # Simplify the tree, necessary to make sure all empty directories are null # which has the effect that they aren't included in the result - tree = _simplifyTree fileset._internalBase fileset._internalTree; + tree = _normaliseTreeFilter fileset._internalBase fileset._internalTree; # The base path as a string with a single trailing slash baseString = From efbcf5938fca42f004e34c3f56f9ab1a12a95559 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 21 Sep 2023 01:00:59 +0200 Subject: [PATCH 3/9] lib.fileset: Add internal helpers for pretty-printing --- lib/fileset/internal.nix | 109 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index ad10b9ac0bbed..ebfe69b010def 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -7,11 +7,14 @@ let isString pathExists readDir - typeOf + seq split + trace + typeOf ; inherit (lib.attrsets) + attrNames attrValues mapAttrs setAttrByPath @@ -241,7 +244,7 @@ rec { // value; /* - A normalisation of a filesetTree suitable for use in builtins.path-based filtering: + A normalisation of a filesetTree suitable filtering with `builtins.path`: - Replace all directories that have no files with `null`. This removes directories that would be empty - Replace all directories with all files with `"directory"`. @@ -270,6 +273,108 @@ rec { else tree; + /* + A minimal normalisation of a filesetTree, intended for pretty-printing: + - If all children of a path are recursively included or empty directories, the path itself is also recursively included + - If all children of a path are fully excluded or empty directories, the path itself is an empty directory + - Other empty directories are represented with the special "emptyDir" string + While these could be replaced with `null`, that would take another mapAttrs + + Note that this function is partially lazy. + + Type: Path -> filesetTree -> filesetTree (with "emptyDir"'s) + */ + _normaliseTreeMinimal = path: tree: + if tree == "directory" || isAttrs tree then + let + entries = _directoryEntries path tree; + normalisedSubtrees = mapAttrs (name: _normaliseTreeMinimal (path + "/${name}")) entries; + subtreeValues = attrValues normalisedSubtrees; + in + # If there are no entries, or all entries are empty directories, return "emptyDir". + # After this branch we know that there's at least one file + if all (value: value == "emptyDir") subtreeValues then + "emptyDir" + + # If all subtrees are fully included or empty directories + # (both of which are coincidentally represented as strings), return "directory". + # This takes advantage of the fact that empty directories can be represented as included directories. + # Note that the tree == "directory" check allows avoiding recursion + else if tree == "directory" || all (value: isString value) subtreeValues then + "directory" + + # If all subtrees are fully excluded or empty directories, return null. + # This takes advantage of the fact that empty directories can be represented as excluded directories + else if all (value: isNull value || value == "emptyDir") subtreeValues then + null + + # Mix of included and excluded entries + else + normalisedSubtrees + else + tree; + + # Trace a filesetTree in a pretty way when the resulting value is evaluated. + # This can handle both normal filesetTree's, and ones returned from _normaliseTreeMinimal + # Type: Path -> filesetTree (with "emptyDir"'s) -> Null + _printMinimalTree = base: tree: + let + treeSuffix = tree: + if isAttrs tree then + "" + else if tree == "directory" then + " (all files in directory)" + else + # This does "leak" the file type strings of the internal representation, + # but this is the main reason these file type strings even are in the representation! + # TODO: Consider removing that information from the internal representation for performance. + # The file types can still be printed by querying them only during tracing + " (${tree})"; + + # Only for attribute set trees + traceTreeAttrs = prevLine: indent: tree: + foldl' (prevLine: name: + let + subtree = tree.${name}; + + # Evaluating this prints the line for this subtree + thisLine = + trace "${indent}- ${name}${treeSuffix subtree}" prevLine; + in + if subtree == null || subtree == "emptyDir" then + # Don't print anything at all if this subtree is empty + prevLine + else if isAttrs subtree then + # A directory with explicit entries + # Do print this node, but also recurse + traceTreeAttrs thisLine "${indent} " subtree + else + # Either a file, or a recursively included directory + # Do print this node but no further recursion needed + thisLine + ) prevLine (attrNames tree); + + # Evaluating this will print the first line + firstLine = + if tree == null || tree == "emptyDir" then + trace "(empty)" null + else + trace "${toString base}${treeSuffix tree}" null; + in + if isAttrs tree then + traceTreeAttrs firstLine "" tree + else + firstLine; + + # Pretty-print a file set in a pretty way when the resulting value is evaluated + # Type: fileset -> Null + _printFileset = fileset: + if fileset._internalIsEmptyWithoutBase then + trace "(empty)" null + else + _printMinimalTree fileset._internalBase + (_normaliseTreeMinimal fileset._internalBase fileset._internalTree); + # Turn a fileset into a source filter function suitable for `builtins.path` # Only directories recursively containing at least one files are recursed into # Type: Path -> fileset -> (String -> String -> Bool) From ac2c8d321c879aa36046d404661e8a3e9c7a93fe Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 21 Sep 2023 01:24:27 +0200 Subject: [PATCH 4/9] lib.fileset: Make expectEqual check more --- lib/fileset/tests.sh | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 1b53e33073ef1..13e984806c7db 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -57,18 +57,35 @@ with lib.fileset;' expectEqual() { local actualExpr=$1 local expectedExpr=$2 - if ! actualResult=$(nix-instantiate --eval --strict --show-trace \ + if actualResult=$(nix-instantiate --eval --strict --show-trace 2>"$tmp"/actualStderr \ --expr "$prefixExpression ($actualExpr)"); then - die "$actualExpr failed to evaluate, but it was expected to succeed" + actualExitCode=$? + else + actualExitCode=$? fi - if ! expectedResult=$(nix-instantiate --eval --strict --show-trace \ + actualStderr=$(< "$tmp"/actualStderr) + + if expectedResult=$(nix-instantiate --eval --strict --show-trace 2>"$tmp"/expectedStderr \ --expr "$prefixExpression ($expectedExpr)"); then - die "$expectedExpr failed to evaluate, but it was expected to succeed" + expectedExitCode=$? + else + expectedExitCode=$? + fi + expectedStderr=$(< "$tmp"/expectedStderr) + + if [[ "$actualExitCode" != "$expectedExitCode" ]]; then + echo "$actualStderr" >&2 + echo "$actualResult" >&2 + die "$actualExpr should have exited with $expectedExitCode, but it exited with $actualExitCode" fi if [[ "$actualResult" != "$expectedResult" ]]; then die "$actualExpr should have evaluated to $expectedExpr:\n$expectedResult\n\nbut it evaluated to\n$actualResult" fi + + if [[ "$actualStderr" != "$expectedStderr" ]]; then + die "$actualExpr should have had this on stderr:\n$expectedStderr\n\nbut it was\n$actualStderr" + fi } # Check that a nix expression evaluates successfully to a store path and returns it (without quotes). From 467e428f0061bc25841ab7e555aee25bfa24c39b Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 21 Sep 2023 01:01:45 +0200 Subject: [PATCH 5/9] lib.fileset.trace: init --- lib/fileset/default.nix | 37 ++++++++++ lib/fileset/tests.sh | 151 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index ab26112c94709..8eee7cf70003a 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -6,12 +6,14 @@ let _coerceMany _toSourceFilter _unionMany + _printFileset ; inherit (builtins) isList isPath pathExists + seq typeOf ; @@ -275,4 +277,39 @@ If a directory does not recursively contain any file, it is omitted from the sto _unionMany ]; + /* + Incrementally evaluate and trace a file set in a pretty way. + This function is only intended for debugging purposes. + The exact tracing format is unspecified and may change. + + Type: + trace :: FileSet -> Any -> Any + + Example: + trace (unions [ ./Makefile ./src ./tests/run.sh ]) null + => + trace: /home/user/src/myProject + trace: - Makefile (regular) + trace: - src (all files in directory) + trace: - tests + trace: - run.sh (regular) + null + */ + trace = + /* + The file set to trace. + + This argument can also be a path, + which gets [implicitly coerced to a file set](#sec-fileset-path-coercion). + */ + fileset: + let + # "fileset" would be a better name, but that would clash with the argument name, + # and we cannot change that because of https://github.com/nix-community/nixdoc/issues/76 + actualFileset = _coerce "lib.fileset.trace: argument" fileset; + in + seq + (_printFileset actualFileset) + (x: x); + } diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 13e984806c7db..1b70ad433d931 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -118,6 +118,24 @@ expectFailure() { fi } +# Check that the traces of a Nix expression are as expected when evaluated. +# The expression has `lib.fileset` in scope. +# Usage: expectTrace NIX STR +expectTrace() { + local expr=$1 + local expectedTrace=$2 + + nix-instantiate --eval --show-trace >/dev/null 2>"$tmp"/stderrTrace \ + --expr "$prefixExpression trace ($expr)" || true + + actualTrace=$(sed -n 's/^trace: //p' "$tmp/stderrTrace") + + if [[ "$actualTrace" != "$expectedTrace" ]]; then + cat "$tmp"/stderrTrace >&2 + die "$expr should have traced this:\n\n$expectedTrace\n\nbut this was actually traced:\n\n$actualTrace" + fi +} + # We conditionally use inotifywait in checkFileset. # Check early whether it's available # TODO: Darwin support, though not crucial since we have Linux CI @@ -518,6 +536,139 @@ done # So, just using 1000 files for now. checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.readDir ./.))' +## Tracing + +# The second trace argument is returned +expectEqual 'trace ./. "some value"' 'builtins.trace "(empty)" "some value"' + +# The tracing happens before the final argument is needed +expectEqual 'trace ./.' 'builtins.trace "(empty)" (x: x)' + +# Tracing an empty directory shows it as such +expectTrace './.' '(empty)' + +# This also works if there are directories, but all recursively without files +mkdir -p a/b/c +expectTrace './.' '(empty)' +rm -rf -- * + +# The empty file set without a base also prints as empty +expectTrace '_emptyWithoutBase' '(empty)' +expectTrace 'unions [ ]' '(empty)' + +# If a directory is fully included, print it as such +touch a +expectTrace './.' "$work"' (all files in directory)' +rm -rf -- * + +# If a directory is not fully included, recurse +mkdir a b +touch a/{x,y} b/{x,y} +expectTrace 'union ./a/x ./b' "$work"' +- a + - x (regular) +- b (all files in directory)' +rm -rf -- * + +# If an included path is a file, print its type +touch a x +ln -s a b +mkfifo c +expectTrace 'unions [ ./a ./b ./c ]' "$work"' +- a (regular) +- b (symlink) +- c (unknown)' +rm -rf -- * + +# Do not print directories without any files recursively +mkdir -p a/b/c +touch b x +expectTrace 'unions [ ./a ./b ]' "$work"' +- b (regular)' +rm -rf -- * + +# If all children are either fully included or empty directories, +# the parent should be printed as fully included +touch a +mkdir b +expectTrace 'union ./a ./b' "$work"' (all files in directory)' +rm -rf -- * + +mkdir -p x/b x/c +touch x/a +touch a +# If all children are either fully excluded or empty directories, +# the parent should be shown (or rather not shown) as fully excluded +expectTrace 'unions [ ./a ./x/b ./x/c ]' "$work"' +- a (regular)' +rm -rf -- * + +# Completely filtered out directories also print as empty +touch a +expectTrace '_create ./. {}' '(empty)' +rm -rf -- * + +# A general test to make sure the resulting format makes sense +# Such as indentation and ordering +mkdir -p bar/{qux,someDir} +touch bar/{baz,qux,someDir/a} foo +touch bar/qux/x +ln -s x bar/qux/a +mkfifo bar/qux/b +expectTrace 'unions [ + ./bar/baz + ./bar/qux/a + ./bar/qux/b + ./bar/someDir/a + ./foo +]' "$work"' +- bar + - baz (regular) + - qux + - a (symlink) + - b (unknown) + - someDir (all files in directory) +- foo (regular)' +rm -rf -- * + +# For recursively included directories, +# `(all files in directory)` should only be used if there's at least one file (otherwise it would be `(empty)`) +# and this should be determined without doing a full search +# +# Create a 100 level deep path, which would cause a stack overflow with the below limit +# if recursed into to figure out if the current directory is empty +mkdir -p "b/$(seq -s/ 100)" +# But that can be avoided by short-circuiting if the file a (here intentionally ordered before b) is checked first. +# In a more realistic scenario, some directories might need to be recursed into, +# but a file would be quickly found to trigger the short-circuit. +touch a +( + # Locally limit the stack to 100 * 1024 bytes, this would cause a stack overflow if the short-circuiting isn't implemented + ulimit -s 100 + expectTrace './.' "$work"' (all files in directory)' +) +rm -rf -- * + +# Partially included directories trace entries as they are evaluated +touch a b c +expectTrace '_create ./. { a = null; b = "regular"; c = throw "b"; }' "$work"' +- b (regular)' + +# Except entries that need to be evaluated to even figure out if it's only partially included: +# Here the directory could be fully excluded or included just from seeing a and b, +# so c needs to be evaluated before anything can be traced +expectTrace '_create ./. { a = null; b = null; c = throw "c"; }' '' +expectTrace '_create ./. { a = "regular"; b = "regular"; c = throw "c"; }' '' +rm -rf -- * + +# We can trace large directories (10000 here) without any problems +filesToCreate=({0..9}{0..9}{0..9}{0..9}) +expectedTrace=$work$'\n'$(printf -- '- %s (regular)\n' "${filesToCreate[@]}") +# We need an excluded file so it doesn't print as `(all files in directory)` +touch 0 "${filesToCreate[@]}" +expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace" +rm -rf -- * + # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets echo >&2 tests ok From 704452f29cc632f1d9fbc651993eed5d69ee8549 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 21 Sep 2023 01:43:39 +0200 Subject: [PATCH 6/9] lib.fileset.traceVal: init --- lib/fileset/default.nix | 54 +++++++++++++++++++++++++++++++++++++++++ lib/fileset/tests.sh | 14 +++++++++++ 2 files changed, 68 insertions(+) diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index 8eee7cf70003a..eb3b8dcb1d9b5 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -282,6 +282,12 @@ If a directory does not recursively contain any file, it is omitted from the sto This function is only intended for debugging purposes. The exact tracing format is unspecified and may change. + This function takes a final argument to return. + In comparison, [`traceVal`](#function-library-lib.fileset.traceVal) returns + the given file set argument. + + This variant is useful for tracing file sets in the Nix repl. + Type: trace :: FileSet -> Any -> Any @@ -312,4 +318,52 @@ If a directory does not recursively contain any file, it is omitted from the sto (_printFileset actualFileset) (x: x); + /* + Incrementally evaluate and trace a file set in a pretty way. + This function is only intended for debugging purposes. + The exact tracing format is unspecified and may change. + + This function returns the given file set. + In comparison, [`trace`](#function-library-lib.fileset.trace) takes another argument to return. + + This variant is useful for tracing file sets passed as arguments to other functions. + + Type: + traceVal :: FileSet -> FileSet + + Example: + toSource { + root = ./.; + fileset = traceVal (unions [ + ./Makefile + ./src + ./tests/run.sh + ]); + } + => + trace: /home/user/src/myProject + trace: - Makefile (regular) + trace: - src (all files in directory) + trace: - tests + trace: - run.sh (regular) + "/nix/store/...-source" + */ + traceVal = + /* + The file set to trace and return. + + This argument can also be a path, + which gets [implicitly coerced to a file set](#sec-fileset-path-coercion). + */ + fileset: + let + # "fileset" would be a better name, but that would clash with the argument name, + # and we cannot change that because of https://github.com/nix-community/nixdoc/issues/76 + actualFileset = _coerce "lib.fileset.traceVal: argument" fileset; + in + seq + (_printFileset actualFileset) + # We could also return the original fileset argument here, + # but that would then duplicate work for consumers of the fileset, because then they have to coerce it again + actualFileset; } diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 1b70ad433d931..80fa21961ffb9 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -130,6 +130,17 @@ expectTrace() { actualTrace=$(sed -n 's/^trace: //p' "$tmp/stderrTrace") + nix-instantiate --eval --show-trace >/dev/null 2>"$tmp"/stderrTraceVal \ + --expr "$prefixExpression traceVal ($expr)" || true + + actualTraceVal=$(sed -n 's/^trace: //p' "$tmp/stderrTraceVal") + + # Test that traceVal returns the same trace as trace + if [[ "$actualTrace" != "$actualTraceVal" ]]; then + cat "$tmp"/stderrTrace >&2 + die "$expr traced this for lib.fileset.trace:\n\n$actualTrace\n\nand something different for lib.fileset.traceVal:\n\n$actualTraceVal" + fi + if [[ "$actualTrace" != "$expectedTrace" ]]; then cat "$tmp"/stderrTrace >&2 die "$expr should have traced this:\n\n$expectedTrace\n\nbut this was actually traced:\n\n$actualTrace" @@ -541,6 +552,9 @@ checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.rea # The second trace argument is returned expectEqual 'trace ./. "some value"' 'builtins.trace "(empty)" "some value"' +# The fileset traceVal argument is returned +expectEqual 'traceVal ./.' 'builtins.trace "(empty)" (_create ./. "directory")' + # The tracing happens before the final argument is needed expectEqual 'trace ./.' 'builtins.trace "(empty)" (x: x)' From 5bbe67bf67b5728bf2c0ed8228853c661a39606d Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Tue, 26 Sep 2023 19:12:58 +0200 Subject: [PATCH 7/9] lib.fileset: Mention trace functions in the file set evaluation error --- lib/fileset/README.md | 1 - lib/fileset/internal.nix | 4 +++- lib/fileset/tests.sh | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/fileset/README.md b/lib/fileset/README.md index 2ef7b81e88503..d4c80e9433e4f 100644 --- a/lib/fileset/README.md +++ b/lib/fileset/README.md @@ -207,6 +207,5 @@ Here's a list of places in the library that need to be updated in the future: - > The file set library is currently somewhat limited but is being expanded to include more functions over time. in [the manual](../../doc/functions/fileset.section.md) -- Once a tracing function exists, `__noEval` in [internal.nix](./internal.nix) should mention it - If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed - If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index ebfe69b010def..d18e37e9d6e6c 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -106,7 +106,9 @@ rec { ]; _noEvalMessage = '' - lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.''; + lib.fileset: Directly evaluating a file set is not supported. + To turn it into a usable source, use `lib.fileset.toSource`. + To pretty-print the contents, use `lib.fileset.trace` or `lib.fileset.traceVal`.''; # The empty file set without a base path _emptyWithoutBase = { diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 80fa21961ffb9..6c6379d073491 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -327,8 +327,12 @@ expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.to expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) does not exist.' # File sets cannot be evaluated directly -expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.' -expectFailure '_emptyWithoutBase' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.' +expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is not supported. +\s*To turn it into a usable source, use `lib.fileset.toSource`. +\s*To pretty-print the contents, use `lib.fileset.trace` or `lib.fileset.traceVal`.' +expectFailure '_emptyWithoutBase' 'lib.fileset: Directly evaluating a file set is not supported. +\s*To turn it into a usable source, use `lib.fileset.toSource`. +\s*To pretty-print the contents, use `lib.fileset.trace` or `lib.fileset.traceVal`.' # Past versions of the internal representation are supported expectEqual '_coerce ": value" { _type = "fileset"; _internalVersion = 0; _internalBase = ./.; }' \ From 692b75257e415f46e477f473e65779b80a6e125c Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 2 Oct 2023 18:49:24 +0200 Subject: [PATCH 8/9] lib.fileset: Refactor inotify testing to be reusable --- lib/fileset/tests.sh | 138 +++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 51 deletions(-) diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 6c6379d073491..d1780af79e69d 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -147,16 +147,83 @@ expectTrace() { fi } -# We conditionally use inotifywait in checkFileset. +# We conditionally use inotifywait in withFileMonitor. # Check early whether it's available # TODO: Darwin support, though not crucial since we have Linux CI if type inotifywait 2>/dev/null >/dev/null; then - canMonitorFiles=1 + canMonitor=1 else - echo "Warning: Not checking that excluded files don't get accessed since inotifywait is not available" >&2 - canMonitorFiles= + echo "Warning: Cannot check for paths not getting read since the inotifywait command (from the inotify-tools package) is not available" >&2 + canMonitor= fi +# Run a function while monitoring that it doesn't read certain paths +# Usage: withFileMonitor FUNNAME PATH... +# - FUNNAME should be a bash function that: +# - Performs some operation that should not read some paths +# - Delete the paths it shouldn't read without triggering any open events +# - PATH... are the paths that should not get read +# +# This function outputs the same as FUNNAME +withFileMonitor() { + local funName=$1 + shift + + # If we can't monitor files or have none to monitor, just run the function directly + if [[ -z "$canMonitor" ]] || (( "$#" == 0 )); then + "$funName" + else + + # Use a subshell to start the coprocess in and use a trap to kill it when exiting the subshell + ( + # Assigned by coproc, makes shellcheck happy + local watcher watcher_PID + + # Start inotifywait in the background to monitor all excluded paths + coproc watcher { + # inotifywait outputs a string on stderr when ready + # Redirect it to stdout so we can access it from the coproc's stdout fd + # exec so that the coprocess is inotify itself, making the kill below work correctly + # See below why we listen to both open and delete_self events + exec inotifywait --format='%e %w' --event open,delete_self --monitor "$@" 2>&1 + } + + # This will trigger when this subshell exits, no matter if successful or not + # After exiting the subshell, the parent shell will continue executing + trap 'kill "${watcher_PID}"' exit + + # Synchronously wait until inotifywait is ready + while read -r -u "${watcher[0]}" line && [[ "$line" != "Watches established." ]]; do + : + done + + # Call the function that should not read the given paths and delete them afterwards + "$funName" + + # Get the first event + read -r -u "${watcher[0]}" event file + + # With funName potentially reading files first before deleting them, + # there's only these two possible event timelines: + # - open*, ..., open*, delete_self, ..., delete_self: If some excluded paths were read + # - delete_self, ..., delete_self: If no excluded paths were read + # So by looking at the first event we can figure out which one it is! + # This also means we don't have to wait to collect all events. + case "$event" in + OPEN*) + die "$funName opened excluded file $file when it shouldn't have" + ;; + DELETE_SELF) + # Expected events + ;; + *) + die "During $funName, Unexpected event type '$event' on file $file that should be excluded" + ;; + esac + ) + fi +} + # Check whether a file set includes/excludes declared paths as expected, usage: # # tree=( @@ -166,7 +233,7 @@ fi # ) # checkFileset './a' # Pass the fileset as the argument declare -A tree -checkFileset() ( +checkFileset() { # New subshell so that we can have a separate trap handler, see `trap` below local fileset=$1 @@ -214,54 +281,21 @@ checkFileset() ( touch "${filesToCreate[@]}" fi - # Start inotifywait in the background to monitor all excluded files (if any) - if [[ -n "$canMonitorFiles" ]] && (( "${#excludedFiles[@]}" != 0 )); then - coproc watcher { - # inotifywait outputs a string on stderr when ready - # Redirect it to stdout so we can access it from the coproc's stdout fd - # exec so that the coprocess is inotify itself, making the kill below work correctly - # See below why we listen to both open and delete_self events - exec inotifywait --format='%e %w' --event open,delete_self --monitor "${excludedFiles[@]}" 2>&1 - } - # This will trigger when this subshell exits, no matter if successful or not - # After exiting the subshell, the parent shell will continue executing - # shellcheck disable=SC2154 - trap 'kill "${watcher_PID}"' exit - - # Synchronously wait until inotifywait is ready - while read -r -u "${watcher[0]}" line && [[ "$line" != "Watches established." ]]; do - : - done - fi - - # Call toSource with the fileset, triggering open events for all files that are added to the store expression="toSource { root = ./.; fileset = $fileset; }" - storePath=$(expectStorePath "$expression") - # Remove all files immediately after, triggering delete_self events for all of them - rm -rf -- * + # We don't have lambda's in bash unfortunately, + # so we just define a function instead and then pass its name + # shellcheck disable=SC2317 + run() { + # Call toSource with the fileset, triggering open events for all files that are added to the store + expectStorePath "$expression" + if (( ${#excludedFiles[@]} != 0 )); then + rm "${excludedFiles[@]}" + fi + } - # Only check for the inotify events if we actually started inotify earlier - if [[ -v watcher ]]; then - # Get the first event - read -r -u "${watcher[0]}" event file - - # There's only these two possible event timelines: - # - open, ..., open, delete_self, ..., delete_self: If some excluded files were read - # - delete_self, ..., delete_self: If no excluded files were read - # So by looking at the first event we can figure out which one it is! - case "$event" in - OPEN) - die "$expression opened excluded file $file when it shouldn't have" - ;; - DELETE_SELF) - # Expected events - ;; - *) - die "Unexpected event type '$event' on file $file that should be excluded" - ;; - esac - fi + # Runs the function while checking that the given excluded files aren't read + storePath=$(withFileMonitor run "${excludedFiles[@]}") # For each path that should be included, make sure it does occur in the resulting store path for p in "${included[@]}"; do @@ -276,7 +310,9 @@ checkFileset() ( die "$expression included path $p when it shouldn't have" fi done -) + + rm -rf -- * +} #### Error messages ##### From 5b4e53a30074a98621681142b4698f7c45ebc716 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 2 Oct 2023 18:49:50 +0200 Subject: [PATCH 9/9] lib.fileset: Don't use ulimit for testing tracing --- lib/fileset/tests.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index d1780af79e69d..2dec97ab953d1 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -689,18 +689,23 @@ rm -rf -- * # `(all files in directory)` should only be used if there's at least one file (otherwise it would be `(empty)`) # and this should be determined without doing a full search # -# Create a 100 level deep path, which would cause a stack overflow with the below limit -# if recursed into to figure out if the current directory is empty -mkdir -p "b/$(seq -s/ 100)" -# But that can be avoided by short-circuiting if the file a (here intentionally ordered before b) is checked first. +# a is intentionally ordered first here in order to allow triggering the short-circuit behavior +# We then check that b is not read # In a more realistic scenario, some directories might need to be recursed into, # but a file would be quickly found to trigger the short-circuit. touch a -( - # Locally limit the stack to 100 * 1024 bytes, this would cause a stack overflow if the short-circuiting isn't implemented - ulimit -s 100 +mkdir b +# We don't have lambda's in bash unfortunately, +# so we just define a function instead and then pass its name +# shellcheck disable=SC2317 +run() { + # This shouldn't read b/ expectTrace './.' "$work"' (all files in directory)' -) + # Remove all files immediately after, triggering delete_self events for all of them + rmdir b +} +# Runs the function while checking that b isn't read +withFileMonitor run b rm -rf -- * # Partially included directories trace entries as they are evaluated