Skip to content

Commit

Permalink
Merge pull request #4997 from edolstra/nix-develop-arrays
Browse files Browse the repository at this point in the history
nix develop: Make bash environment parsing more robust
  • Loading branch information
edolstra authored Jul 9, 2021
2 parents d2b8b23 + e50408b commit 223e056
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 95 deletions.
174 changes: 81 additions & 93 deletions src/nix/develop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "affinity.hh"
#include "progress-bar.hh"

#include <regex>
#include <nlohmann/json.hpp>

using namespace nix;

Expand All @@ -25,94 +25,98 @@ static DevelopSettings developSettings;

static GlobalConfig::Register rDevelopSettings(&developSettings);

struct Var
{
bool exported = true;
bool associative = false;
std::string quoted; // quoted string or array
};

struct BuildEnvironment
{
std::map<std::string, Var> env;
std::string bashFunctions;
};

BuildEnvironment readEnvironment(const Path & path)
{
BuildEnvironment res;

std::set<std::string> exported;

debug("reading environment file '%s'", path);

auto file = readFile(path);

auto pos = file.cbegin();

static std::string varNameRegex =
R"re((?:[a-zA-Z_][a-zA-Z0-9_]*))re";

static std::string simpleStringRegex =
R"re((?:[a-zA-Z0-9_/:\.\-\+=@%]*))re";

static std::string dquotedStringRegex =
R"re((?:\$?"(?:[^"\\]|\\[$`"\\\n])*"))re";
struct String
{
bool exported;
std::string value;
};

static std::string squotedStringRegex =
R"re((?:\$?(?:'(?:[^'\\]|\\[abeEfnrtv\\'"?])*'|\\')+))re";
using Array = std::vector<std::string>;

static std::string indexedArrayRegex =
R"re((?:\(( *\[[0-9]+\]="(?:[^"\\]|\\.)*")*\)))re";
using Associative = std::map<std::string, std::string>;

static std::regex declareRegex(
"^declare -a?x (" + varNameRegex + ")(=(" +
dquotedStringRegex + "|" + indexedArrayRegex + "))?\n");
using Value = std::variant<String, Array, Associative>;

static std::regex varRegex(
"^(" + varNameRegex + ")=(" + simpleStringRegex + "|" + squotedStringRegex + "|" + indexedArrayRegex + ")\n");
std::map<std::string, Value> vars;
std::map<std::string, std::string> bashFunctions;

/* Note: we distinguish between an indexed and associative array
using the space before the closing parenthesis. Will
undoubtedly regret this some day. */
static std::regex assocArrayRegex(
"^(" + varNameRegex + ")=" + R"re((?:\(( *\[[^\]]+\]="(?:[^"\\]|\\.)*")* *\)))re" + "\n");
static BuildEnvironment fromJSON(const Path & path)
{
BuildEnvironment res;

static std::regex functionRegex(
"^" + varNameRegex + " \\(\\) *\n");
std::set<std::string> exported;

while (pos != file.end()) {
debug("reading environment file '%s'", path);

std::smatch match;
auto json = nlohmann::json::parse(readFile(path));

if (std::regex_search(pos, file.cend(), match, declareRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
exported.insert(match[1]);
for (auto & [name, info] : json["variables"].items()) {
std::string type = info["type"];
if (type == "var" || type == "exported")
res.vars.insert({name, BuildEnvironment::String { .exported = type == "exported", .value = info["value"] }});
else if (type == "array")
res.vars.insert({name, (Array) info["value"]});
else if (type == "associative")
res.vars.insert({name, (Associative) info["value"]});
}

else if (std::regex_search(pos, file.cend(), match, varRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
res.env.insert({match[1], Var { .exported = exported.count(match[1]) > 0, .quoted = match[2] }});
for (auto & [name, def] : json["bashFunctions"].items()) {
res.bashFunctions.insert({name, def});
}

else if (std::regex_search(pos, file.cend(), match, assocArrayRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
res.env.insert({match[1], Var { .associative = true, .quoted = match[2] }});
}
return res;
}

else if (std::regex_search(pos, file.cend(), match, functionRegex, std::regex_constants::match_continuous)) {
res.bashFunctions = std::string(pos, file.cend());
break;
void toBash(std::ostream & out, const std::set<std::string> & ignoreVars) const
{
for (auto & [name, value] : vars) {
if (!ignoreVars.count(name)) {
if (auto str = std::get_if<String>(&value)) {
out << fmt("%s=%s\n", name, shellEscape(str->value));
if (str->exported)
out << fmt("export %s\n", name);
}
else if (auto arr = std::get_if<Array>(&value)) {
out << "declare -a " << name << "=(";
for (auto & s : *arr)
out << shellEscape(s) << " ";
out << ")\n";
}
else if (auto arr = std::get_if<Associative>(&value)) {
out << "declare -A " << name << "=(";
for (auto & [n, v] : *arr)
out << "[" << shellEscape(n) << "]=" << shellEscape(v) << " ";
out << ")\n";
}
}
}

else throw Error("shell environment '%s' has unexpected line '%s'",
path, file.substr(pos - file.cbegin(), 60));
for (auto & [name, def] : bashFunctions) {
out << name << " ()\n{\n" << def << "}\n";
}
}

res.env.erase("__output");
static std::string getString(const Value & value)
{
if (auto str = std::get_if<String>(&value))
return str->value;
else
throw Error("bash variable is not a string");
}

return res;
}
static Array getStrings(const Value & value)
{
if (auto str = std::get_if<String>(&value))
return tokenizeString<Array>(str->value);
else if (auto arr = std::get_if<Array>(&value)) {
return *arr;
}
else
throw Error("bash variable is not a string or array");
}
};

const static std::string getEnvSh =
#include "get-env.sh.gen.hh"
Expand Down Expand Up @@ -185,19 +189,15 @@ StorePath getDerivationEnvironment(ref<Store> store, const StorePath & drvPath)

struct Common : InstallableCommand, MixProfile
{
std::set<string> ignoreVars{
std::set<std::string> ignoreVars{
"BASHOPTS",
"EUID",
"HOME", // FIXME: don't ignore in pure mode?
"HOSTNAME",
"NIX_BUILD_TOP",
"NIX_ENFORCE_PURITY",
"NIX_LOG_FD",
"NIX_REMOTE",
"PPID",
"PWD",
"SHELLOPTS",
"SHLVL",
"SSL_CERT_FILE", // FIXME: only want to ignore /no-cert-file.crt
"TEMP",
"TEMPDIR",
Expand Down Expand Up @@ -233,22 +233,10 @@ struct Common : InstallableCommand, MixProfile

out << "nix_saved_PATH=\"$PATH\"\n";

for (auto & i : buildEnvironment.env) {
if (!ignoreVars.count(i.first) && !hasPrefix(i.first, "BASH_")) {
if (i.second.associative)
out << fmt("declare -A %s=(%s)\n", i.first, i.second.quoted);
else {
out << fmt("%s=%s\n", i.first, i.second.quoted);
if (i.second.exported)
out << fmt("export %s\n", i.first);
}
}
}
buildEnvironment.toBash(out, ignoreVars);

out << "PATH=\"$PATH:$nix_saved_PATH\"\n";

out << buildEnvironment.bashFunctions << "\n";

out << "export NIX_BUILD_TOP=\"$(mktemp -d -t nix-shell.XXXXXX)\"\n";
for (auto & i : {"TMP", "TMPDIR", "TEMP", "TEMPDIR"})
out << fmt("export %s=\"$NIX_BUILD_TOP\"\n", i);
Expand All @@ -258,16 +246,16 @@ struct Common : InstallableCommand, MixProfile
auto script = out.str();

/* Substitute occurrences of output paths. */
auto outputs = buildEnvironment.env.find("outputs");
assert(outputs != buildEnvironment.env.end());
auto outputs = buildEnvironment.vars.find("outputs");
assert(outputs != buildEnvironment.vars.end());

// FIXME: properly unquote 'outputs'.
StringMap rewrites;
for (auto & outputName : tokenizeString<std::vector<std::string>>(replaceStrings(outputs->second.quoted, "'", ""))) {
auto from = buildEnvironment.env.find(outputName);
assert(from != buildEnvironment.env.end());
for (auto & outputName : BuildEnvironment::getStrings(outputs->second)) {
auto from = buildEnvironment.vars.find(outputName);
assert(from != buildEnvironment.vars.end());
// FIXME: unquote
rewrites.insert({from->second.quoted, outputsDir + "/" + outputName});
rewrites.insert({BuildEnvironment::getString(from->second), outputsDir + "/" + outputName});
}

/* Substitute redirects. */
Expand Down Expand Up @@ -321,7 +309,7 @@ struct Common : InstallableCommand, MixProfile

updateProfile(shellOutPath);

return {readEnvironment(strPath), strPath};
return {BuildEnvironment::fromJSON(strPath), strPath};
}
};

Expand Down
106 changes: 104 additions & 2 deletions src/nix/get-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,112 @@ if [[ -n $stdenv ]]; then
source $stdenv/setup
fi

# Better to use compgen, but stdenv bash doesn't have it.
__vars="$(declare -p)"
__functions="$(declare -F)"

__dumpEnv() {
printf '{\n'

printf ' "bashFunctions": {\n'
local __first=1
while read __line; do
if ! [[ $__line =~ ^declare\ -f\ (.*) ]]; then continue; fi
__fun_name="${BASH_REMATCH[1]}"
__fun_body="$(type $__fun_name)"
if [[ $__fun_body =~ \{(.*)\} ]]; then
if [[ -z $__first ]]; then printf ',\n'; else __first=; fi
__fun_body="${BASH_REMATCH[1]}"
printf " "
__escapeString "$__fun_name"
printf ':'
__escapeString "$__fun_body"
else
printf "Cannot parse definition of function '%s'.\n" "$__fun_name" >&2
return 1
fi
done < <(printf "%s\n" "$__functions")
printf '\n },\n'

printf ' "variables": {\n'
local __first=1
while read __line; do
if ! [[ $__line =~ ^declare\ (-[^ ])\ ([^=]*) ]]; then continue; fi
local type="${BASH_REMATCH[1]}"
local __var_name="${BASH_REMATCH[2]}"

if [[ $__var_name =~ ^BASH_ || \
$__var_name = _ || \
$__var_name = DIRSTACK || \
$__var_name = EUID || \
$__var_name = FUNCNAME || \
$__var_name = HISTCMD || \
$__var_name = HOSTNAME || \
$__var_name = PIPESTATUS || \
$__var_name = PWD || \
$__var_name = RANDOM || \
$__var_name = SHLVL || \
$__var_name = SECONDS \
]]; then continue; fi

if [[ -z $__first ]]; then printf ',\n'; else __first=; fi

printf " "
__escapeString "$__var_name"
printf ': {'

# FIXME: handle -i, -r, -n.
if [[ $type == -x ]]; then
printf '"type": "exported", "value": '
__escapeString "${!__var_name}"
elif [[ $type == -- ]]; then
printf '"type": "var", "value": '
__escapeString "${!__var_name}"
elif [[ $type == -a ]]; then
printf '"type": "array", "value": ['
local __first2=1
__var_name="$__var_name[@]"
for __i in "${!__var_name}"; do
if [[ -z $__first2 ]]; then printf ', '; else __first2=; fi
__escapeString "$__i"
printf ' '
done
printf ']'
elif [[ $type == -A ]]; then
printf '"type": "associative", "value": {\n'
local __first2=1
declare -n __var_name2="$__var_name"
for __i in "${!__var_name2[@]}"; do
if [[ -z $__first2 ]]; then printf ',\n'; else __first2=; fi
printf " "
__escapeString "$__i"
printf ": "
__escapeString "${__var_name2[$__i]}"
done
printf '\n }'
else
printf '"type": "unknown"'
fi

printf "}"
done < <(printf "%s\n" "$__vars")
printf '\n }\n}'
}

__escapeString() {
local __s="$1"
__s="${__s//\\/\\\\}"
__s="${__s//\"/\\\"}"
__s="${__s//$'\n'/\\n}"
__s="${__s//$'\r'/\\r}"
__s="${__s//$'\t'/\\t}"
printf '"%s"' "$__s"
}

# Dump the bash environment as JSON.
for __output in $outputs; do
if [[ -z $__done ]]; then
export > ${!__output}
set >> ${!__output}
__dumpEnv > ${!__output}
__done=1
else
echo -n >> ${!__output}
Expand Down
4 changes: 4 additions & 0 deletions tests/nix-shell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,7 @@ nix_develop -f shell.nix shellDrv -c echo foo |& grep -q foo
# Test 'nix print-dev-env'.
source <(nix print-dev-env -f shell.nix shellDrv)
[[ -n $stdenv ]]
[[ ${arr1[2]} = "3 4" ]]
[[ ${arr2[1]} = $'\n' ]]
[[ ${arr2[2]} = $'x\ny' ]]
[[ $(fun) = blabla ]]
5 changes: 5 additions & 0 deletions tests/shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ let pkgs = rec {
for pkg in $buildInputs; do
export PATH=$PATH:$pkg/bin
done
declare -a arr1=(1 2 "3 4" 5)
declare -a arr2=(x $'\n' $'x\ny')
fun() {
echo blabla
}
'';

stdenv = mkDerivation {
Expand Down

0 comments on commit 223e056

Please sign in to comment.