Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions containers/agent/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,54 @@ echo "[entrypoint] Switching to awfuser (UID: $(id -u awfuser), GID: $(id -g awf
echo "[entrypoint] Executing command: $@"
echo ""

# Default list of sensitive token environment variable names.
# Must stay aligned with DEFAULT_SENSITIVE_TOKENS in one-shot-token.c
# and SENSITIVE_ENV_NAMES in docker-manager.ts.
DEFAULT_SENSITIVE_TOKENS="COPILOT_GITHUB_TOKEN,GITHUB_TOKEN,GH_TOKEN,GITHUB_API_TOKEN,GITHUB_PAT,GH_ACCESS_TOKEN,GITHUB_PERSONAL_ACCESS_TOKEN,OPENAI_API_KEY,OPENAI_KEY,ANTHROPIC_API_KEY,CLAUDE_API_KEY,CODEX_API_KEY"

# scrub_sensitive_tokens CACHE_FILE_PATH
#
# Writes sensitive token values to a cache file (mode 0600, owned by awfuser),
# then unsets them from the environment. The LD_PRELOAD one-shot-token library
# reads this file on load to populate its in-memory cache so that getenv()
# still returns the values while /proc/self/environ is clean.
#
# The cache file is NOT deleted by the library (it must survive the full exec
# chain: capsh → gosu → user command). Cleanup is handled by the EXIT trap.
scrub_sensitive_tokens() {
local cache_file="$1"
# Use umask to create the file with restricted permissions atomically
(umask 077 && : > "${cache_file}")
# Use AWF_ONE_SHOT_TOKENS if set, otherwise use defaults
local sensitive_tokens
if [ -n "${AWF_ONE_SHOT_TOKENS}" ]; then
sensitive_tokens="${AWF_ONE_SHOT_TOKENS}"
else
sensitive_tokens="${DEFAULT_SENSITIVE_TOKENS}"
fi
local token_names
IFS=',' read -ra token_names <<< "${sensitive_tokens}"
local token_name token_value
for token_name in "${token_names[@]}"; do
token_name=$(echo "$token_name" | xargs) # trim whitespace
if [ -n "$token_name" ]; then
token_value=$(printenv "$token_name" 2>/dev/null || true)
if [ -n "$token_value" ]; then
printf '%s=%s\n' "$token_name" "$token_value" >> "${cache_file}"
echo "[entrypoint] Token ${token_name} written to cache file and will be scrubbed from environ"
fi
fi
done
chown "$(id -u awfuser):$(id -g awfuser)" "${cache_file}" 2>/dev/null || true
# Unset sensitive vars so they don't appear in /proc/self/environ of exec'd process
for token_name in "${token_names[@]}"; do
token_name=$(echo "$token_name" | xargs)
if [ -n "$token_name" ]; then
unset "$token_name" 2>/dev/null || true
fi
done
}

# If chroot mode is enabled, run user command INSIDE the chroot /host
# This provides transparent host binary access - user command sees host filesystem as /
if [ "${AWF_CHROOT_ENABLED}" = "true" ]; then
Expand Down Expand Up @@ -389,9 +437,23 @@ AWFEOF
LD_PRELOAD_CMD="export LD_PRELOAD=${ONE_SHOT_TOKEN_LIB};"
fi

# Scrub sensitive tokens from the environment before exec to prevent
# /proc/self/environ exposure. Write values to a cache file so the
# LD_PRELOAD library can still serve them via getenv().
SCRUB_CMD=""
if [ -n "${ONE_SHOT_TOKEN_LIB}" ]; then
TOKEN_CACHE_FILE="/tmp/.awf-token-cache-$$"
# In chroot mode, the file lives under /host but the chroot sees it at TOKEN_CACHE_FILE
scrub_sensitive_tokens "/host${TOKEN_CACHE_FILE}"
SCRUB_CMD="export AWF_TOKEN_CACHE_FILE=${TOKEN_CACHE_FILE};"
# Also add cache file cleanup to the exit trap (chroot perspective, no /host prefix)
CLEANUP_CMD="${CLEANUP_CMD}; rm -f ${TOKEN_CACHE_FILE} 2>/dev/null || true"
Copy link

Copilot AI Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The token cache file is created at /host${TOKEN_CACHE_FILE} (/host/tmp/.awf-token-cache-$$) from the chroot context, but the cleanup command uses ${TOKEN_CACHE_FILE} without the /host prefix. Since the trap runs within the chroot, this path should work, but if the cleanup trap executes after the chroot context has changed or if there's an error, the file might not be deleted. Consider making the cleanup path more explicit or documenting why the /host prefix is intentionally omitted in the cleanup command.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good observation. In e767a90 this is clarified with a comment: the cleanup trap uses ${TOKEN_CACHE_FILE} (without /host prefix) because it runs inside the chroot where the file is at that path. The chroot exec chain always runs within the chroot context, so the path is correct.

fi

exec chroot /host /bin/bash -c "
cd '${CHROOT_WORKDIR}' 2>/dev/null || cd /
trap '${CLEANUP_CMD}' EXIT
${SCRUB_CMD}
${LD_PRELOAD_CMD}
exec capsh --drop=${CAPS_TO_DROP} --user=${HOST_USER} -- -c 'exec ${SCRIPT_FILE}'
"
Expand All @@ -409,5 +471,13 @@ else
# Enable one-shot token protection - tokens are cached in memory and
# unset from the environment so /proc/self/environ is cleared
export LD_PRELOAD=/usr/local/lib/one-shot-token.so

# Scrub sensitive tokens from the environment before exec to prevent
# /proc/self/environ exposure. Write values to a cache file so the
# LD_PRELOAD library can still serve them via getenv().
TOKEN_CACHE_FILE="/tmp/.awf-token-cache-$$"
scrub_sensitive_tokens "${TOKEN_CACHE_FILE}"
export AWF_TOKEN_CACHE_FILE="${TOKEN_CACHE_FILE}"

exec capsh --drop=$CAPS_TO_DROP -- -c "exec gosu awfuser $(printf '%q ' "$@")"
fi
21 changes: 19 additions & 2 deletions containers/agent/one-shot-token/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

## Overview

The one-shot token library is an `LD_PRELOAD` shared library that provides **cached access** to sensitive environment variables containing GitHub, OpenAI, Anthropic/Claude, and Codex API tokens. When a process reads a protected token via `getenv()`, the library caches the value in memory and immediately unsets the environment variable. Subsequent `getenv()` calls return the cached value, allowing the process to read tokens multiple times while `/proc/self/environ` is cleared.
The one-shot token library is an `LD_PRELOAD` shared library that provides **cached access** to sensitive environment variables containing GitHub, OpenAI, Anthropic/Claude, and Codex API tokens. On library load, a constructor eagerly caches all sensitive tokens and removes them from the process environment, ensuring `/proc/self/environ` never exposes secrets to user code.

The library supports two token loading mechanisms:
1. **Token cache file** (preferred): `entrypoint.sh` writes token values to a temporary file (`AWF_TOKEN_CACHE_FILE`) and unsets the env vars before `exec`. The constructor reads the file and populates the cache. The file is NOT deleted by the library — it must survive the full exec chain (`capsh → gosu → user command`) since each `exec()` resets static data. Cleanup is handled by the EXIT trap in `entrypoint.sh`.
2. **Environment fallback**: If no cache file exists, tokens are read from the environment on library load, cached, and unset.

This protects against exfiltration via `/proc/self/environ` inspection while allowing legitimate multi-read access patterns that programs like the Copilot CLI require.

Expand All @@ -19,6 +23,7 @@ By default, the library protects these token variables:
- `GITHUB_API_TOKEN`
- `GITHUB_PAT`
- `GH_ACCESS_TOKEN`
- `GITHUB_PERSONAL_ACCESS_TOKEN`

**OpenAI:**
- `OPENAI_API_KEY`
Expand Down Expand Up @@ -48,9 +53,21 @@ LD_PRELOAD=/usr/local/lib/one-shot-token.so ./your-program
- If `AWF_ONE_SHOT_TOKENS` is set but contains only whitespace or commas (e.g., `" "` or `",,,"`), the library falls back to the default token list to maintain protection
- Use comma-separated token names (whitespace is automatically trimmed)
- Maximum of 100 tokens can be protected
- The configuration is read once at library initialization (first `getenv()` call)
- The configuration is read once at library initialization (constructor on library load)
- Uses `strtok_r()` internally, which is thread-safe and won't interfere with application code using `strtok()`

### Token Cache File

The `AWF_TOKEN_CACHE_FILE` environment variable specifies a file containing pre-cached token values. This is set automatically by `entrypoint.sh` to eliminate the `/proc/self/environ` exposure window across `exec` chains.

**Format:** One `NAME=VALUE` pair per line:
```
GITHUB_TOKEN=ghp_abc123...
ANTHROPIC_API_KEY=sk-ant-...
```

The file is read by the library constructor and immediately deleted. This is an internal mechanism managed by AWF's entrypoint — users should not set this variable manually.

## How It Works

### The LD_PRELOAD Mechanism
Expand Down
135 changes: 131 additions & 4 deletions containers/agent/one-shot-token/one-shot-token.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
/**
* One-Shot Token LD_PRELOAD Library
*
* Intercepts getenv() calls for sensitive token environment variables.
* On first access, caches the value in memory and unsets from environment.
* Subsequent calls return the cached value, so the process can read tokens
* multiple times while /proc/self/environ no longer exposes them.
* Protects sensitive token environment variables from exposure via
* /proc/self/environ and limits access via getenv().
*
* When loaded, the library constructor reads cached token values from
* AWF_TOKEN_CACHE_FILE (written by entrypoint.sh) and populates an
* in-memory cache. The file is NOT deleted by the library because the
* constructor runs in every process in the exec chain (capsh → gosu →
* user command), and each exec() resets static data. The file is
* cleaned up by the EXIT trap in entrypoint.sh instead.
*
* The sensitive variables are never present in the process environment
* (entrypoint.sh unsets them before exec), so /proc/self/environ is clean.
* Subsequent getenv() calls return the cached values from memory.
*
* Fallback: If no cache file is found, tokens are read from the environment
* on library load, cached, and unset (original behavior).
*
* Configuration:
* AWF_ONE_SHOT_TOKENS - Comma-separated list of token names to protect
* AWF_TOKEN_CACHE_FILE - Path to the token cache file (set by entrypoint.sh)
* If not set, uses built-in defaults
*
* Compile: gcc -shared -fPIC -o one-shot-token.so one-shot-token.c -ldl
Expand All @@ -20,6 +33,7 @@
#include <string.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <ctype.h>

/* Default sensitive token environment variable names */
Expand All @@ -31,6 +45,7 @@ static const char *DEFAULT_SENSITIVE_TOKENS[] = {
"GITHUB_API_TOKEN",
"GITHUB_PAT",
"GH_ACCESS_TOKEN",
"GITHUB_PERSONAL_ACCESS_TOKEN",
/* OpenAI tokens */
"OPENAI_API_KEY",
"OPENAI_KEY",
Expand Down Expand Up @@ -75,6 +90,10 @@ static char *(*real_secure_getenv)(const char *name) = NULL;
static pthread_once_t getenv_init_once = PTHREAD_ONCE_INIT;
static pthread_once_t secure_getenv_init_once = PTHREAD_ONCE_INIT;

/* Forward declarations */
static int get_token_index(const char *name);
static const char *format_token_value(const char *value);

/* Initialize the real getenv pointer (called exactly once via pthread_once) */
static void init_real_getenv_once(void) {
real_getenv = dlsym(RTLD_NEXT, "getenv");
Expand Down Expand Up @@ -183,6 +202,114 @@ static void init_token_list(void) {

tokens_initialized = 1;
}

/**
* Load cached token values from AWF_TOKEN_CACHE_FILE.
*
* The file format is one NAME=VALUE per line. The file is NOT deleted here
* because the LD_PRELOAD constructor runs in every process in the exec chain
* (capsh → gosu → user command), and each exec() creates a fresh process
* image with reset static data. Deleting here would cause subsequent processes
* to lose access to the cached tokens. The file is cleaned up by the EXIT
* trap in entrypoint.sh instead. The file is created with mode 0600 and
* owned by awfuser, so it is not readable by other users.
*
* Must be called with token_mutex held and after init_token_list().
*/
static void load_token_cache_file(void) {
const char *cache_path = real_getenv("AWF_TOKEN_CACHE_FILE");
if (cache_path == NULL || cache_path[0] == '\0') {
return;
}

FILE *f = fopen(cache_path, "r");
if (f == NULL) {
fprintf(stderr, "[one-shot-token] WARNING: Could not open token cache file: %s\n", cache_path);
return;
}
Comment on lines +227 to +229
Copy link

Copilot AI Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The line buffer is fixed at 8192 bytes. If a token value exceeds this length (e.g., a very long JWT or concatenated secrets), fgets will read only the first 8191 characters, potentially splitting the line. The next fgets call will read the remainder as a new line, which may parse incorrectly. While 8KB is generous for typical tokens, consider either validating line length, handling partial reads, or documenting this limitation. Alternatively, use getline() which dynamically allocates buffer space.

Copilot uses AI. Check for mistakes.

char line[8192];
int loaded = 0;
while (fgets(line, sizeof(line), f) != NULL) {
/* Strip trailing newline */
size_t len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
line[len - 1] = '\0';
len--;
}

/* Find the '=' separator */
char *eq = strchr(line, '=');
if (eq == NULL || eq == line) continue;

*eq = '\0';
const char *name = line;
const char *value = eq + 1;
Copy link

Copilot AI Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function checks !token_accessed[idx] before caching a token from the file, but if a token appears multiple times in the cache file, only the first occurrence will be loaded. While this might be intentional (first-wins policy), it's not documented. If the entrypoint.sh script accidentally writes duplicate entries, the later value will be silently ignored. Consider either warning about duplicates or documenting this behavior.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added (first-wins for duplicates) comment to the code in e767a90. This is intentional — entrypoint.sh writes each token exactly once, so duplicates would only occur from manual cache file editing.


/* Find if this name matches a sensitive token (first-wins for duplicates) */
int idx = get_token_index(name);
if (idx >= 0 && !token_accessed[idx]) {
token_cache[idx] = strdup(value);
if (token_cache[idx] != NULL) {
token_accessed[idx] = 1;
loaded++;
fprintf(stderr, "[one-shot-token] Loaded cached token %s from file\n", name);
}
}
}

fclose(f);

if (loaded > 0) {
fprintf(stderr, "[one-shot-token] Loaded %d token(s) from cache file\n", loaded);
}
}

/**
* Library constructor - runs when the library is loaded (before main()).
*
* If AWF_TOKEN_CACHE_FILE is set (by entrypoint.sh), loads cached token
* values from the file. The sensitive variables are never present in
* /proc/self/environ because entrypoint.sh unsets them before exec.
*
* If no cache file exists, tokens remain in the environment and will be
* cached + unset on first getenv() call (original fallback behavior).
*/
__attribute__((constructor))
static void one_shot_token_init(void) {
/* Initialize the real getenv pointer first */
init_real_getenv_once();

pthread_mutex_lock(&token_mutex);
if (!tokens_initialized) {
init_token_list();
}

/* Load tokens from cache file if available (set by entrypoint.sh) */
load_token_cache_file();

/* Eagerly cache any remaining sensitive tokens still in the environment
* (fallback for when no cache file was used) */
for (int i = 0; i < num_tokens; i++) {
if (!token_accessed[i]) {
char *value = real_getenv(sensitive_tokens[i]);
if (value != NULL) {
token_cache[i] = strdup(value);
if (token_cache[i] != NULL) {
unsetenv(sensitive_tokens[i]);
fprintf(stderr, "[one-shot-token] Token %s eagerly cached and scrubbed from environ\n",
sensitive_tokens[i]);
}
token_accessed[i] = 1;
}
}
}
pthread_mutex_unlock(&token_mutex);

fprintf(stderr, "[one-shot-token] Library initialized: %d token(s) protected, /proc/self/environ scrubbed\n",
num_tokens);
}

/* Ensure real_getenv is initialized (thread-safe) */
static void init_real_getenv(void) {
pthread_once(&getenv_init_once, init_real_getenv_once);
Expand Down
10 changes: 10 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading