diff --git a/.gitignore b/.gitignore index 729bae19..16b7385d 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,7 @@ scripts/rebase-github-difc.sh # Serena cache directories .serena/ + +# WASM guard build artifacts +examples/guards/*/guard.wasm +*.wasm diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 00000000..14d86ad6 --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1 @@ +/cache diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 00000000..934d61ad --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,89 @@ +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp +# csharp_omnisharp dart elixir elm erlang +# fortran fsharp go groovy haskell +# java julia kotlin lua markdown +# matlab nix pascal perl php +# powershell python python_jedi r rego +# ruby ruby_solargraph rust scala swift +# terraform toml typescript typescript_vts vue +# yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# - csharp: Requires the presence of a .sln file in the project folder. +# - pascal: Requires Free Pascal Compiler (fpc) and optionally Lazarus. +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- go + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# whether to use project's .gitignore files to ignore files +ignore_all_files_in_gitignore: true + +# list of additional paths to ignore in all projects +# same syntax as gitignore, so you can use * and ** +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +project_name: "gh-aw-mcpg" +included_optional_tools: [] diff --git a/AGENTS.md b/AGENTS.md index e2cac4cd..540a53d8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -364,6 +364,18 @@ DEBUG_COLORS=0 DEBUG=* ./awmg --config config.toml - `DEBUG_COLORS` - Control colored output (0 to disable, auto-disabled when piping) - `MCP_GATEWAY_LOG_DIR` - Log file directory (sets default for `--log-dir` flag, default: `/tmp/gh-aw/mcp-logs`) - `MCP_GATEWAY_PAYLOAD_DIR` - Large payload storage directory (sets default for `--payload-dir` flag, default: `/tmp/jq-payloads`) +- `MCP_GATEWAY_CONFIG_EXTENSIONS` - Enable config extensions for DIFC features (sets default for `--enable-config-extensions` flag) +- `MCP_GATEWAY_ENABLE_DIFC` - Enable DIFC enforcement (requires `MCP_GATEWAY_CONFIG_EXTENSIONS=1`) +- `MCP_GATEWAY_DIFC_FILTER` - Enable DIFC response filtering (requires DIFC enabled) +- `MCP_GATEWAY_SESSION_SECRECY` - Initial secrecy labels for sessions (requires config extensions) +- `MCP_GATEWAY_SESSION_INTEGRITY` - Initial integrity labels for sessions (requires config extensions) + +**Config Extensions:** +Config extensions (guards, session labels) require `--enable-config-extensions` or `MCP_GATEWAY_CONFIG_EXTENSIONS=1`. +This gates all DIFC-related configuration including: +- `guards` section in config +- `gateway.session` section in config +- `--enable-difc`, `--difc-filter`, `--session-secrecy`, `--session-integrity` flags **File Logging:** - Operational logs are always written to `mcp-gateway.log` in the configured log directory diff --git a/Makefile b/Makefile index 4f07b058..be4c36a3 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build lint test test-unit test-integration test-all test-serena test-serena-gateway coverage test-ci format clean install release help agent-finished +.PHONY: build lint test test-unit test-integration test-all test-serena test-serena-gateway coverage test-ci format clean install release help agent-finished echo-guard-demo echo-guard-build echo-guard-test echo-guard-codex echo-guard-tmux # Default target .DEFAULT_GOAL := help @@ -21,15 +21,15 @@ build: lint: @echo "Running linters..." @go mod tidy - @go vet ./... + @go vet $$(go list ./... | grep -v '/examples/guards/') @echo "Running gofmt check..." - @test -z "$$(gofmt -l .)" || (echo "The following files are not formatted:"; gofmt -l .; exit 1) + @test -z "$$(gofmt -l $$(find . -name '*.go' -not -path './examples/guards/*'))" || (echo "The following files are not formatted:"; gofmt -l $$(find . -name '*.go' -not -path './examples/guards/*'); exit 1) @echo "Running golangci-lint..." @GOPATH=$$(go env GOPATH); \ if [ -f "$$GOPATH/bin/golangci-lint" ]; then \ - $$GOPATH/bin/golangci-lint run --timeout=5m || echo "⚠ Warning: golangci-lint failed (compatibility issue with Go 1.25.0). Continuing with other checks..."; \ + $$GOPATH/bin/golangci-lint run --timeout=5m --skip-dirs examples/guards || echo "⚠ Warning: golangci-lint failed (compatibility issue with Go 1.25.0). Continuing with other checks..."; \ elif command -v golangci-lint >/dev/null 2>&1; then \ - golangci-lint run --timeout=5m || echo "⚠ Warning: golangci-lint failed (compatibility issue with Go 1.25.0). Continuing with other checks..."; \ + golangci-lint run --timeout=5m --skip-dirs examples/guards || echo "⚠ Warning: golangci-lint failed (compatibility issue with Go 1.25.0). Continuing with other checks..."; \ else \ echo "⚠ Warning: golangci-lint not found. Run 'make install' to install it."; \ echo " Skipping golangci-lint checks..."; \ @@ -261,4 +261,43 @@ help: @echo " install - Install required toolchains and dependencies" @echo " release - Create and push a release tag (usage: make release patch|minor|major)" @echo " agent-finished - Run format, build, lint, and all tests (for agents before completion)" + @echo " echo-guard-demo - Run quick echo guard test demo (shows guard I/O)" + @echo " echo-guard-codex - Start gateway with echo guard for Codex integration" + @echo " echo-guard-tmux - Interactive tmux demo (gateway + instructions)" + @echo " echo-guard-build - Build the echo guard WASM file" + @echo " echo-guard-test - Run echo guard integration tests" @echo " help - Display this help message" + +# Echo Guard Demo targets +echo-guard-demo: + @echo "" + @echo "╔═══════════════════════════════════════════════════════════════╗" + @echo "║ ECHO GUARD DEMO ║" + @echo "║ Demonstrates guard I/O for debugging guard implementations ║" + @echo "╚═══════════════════════════════════════════════════════════════╝" + @echo "" + @./scripts/echo-guard-demo.sh all + +echo-guard-codex: + @echo "" + @echo "╔═══════════════════════════════════════════════════════════════╗" + @echo "║ ECHO GUARD END-TO-END DEMO WITH CODEX ║" + @echo "╚═══════════════════════════════════════════════════════════════╝" + @echo "" + @./scripts/echo-guard-demo.sh codex + +echo-guard-tmux: + @echo "" + @echo "╔═══════════════════════════════════════════════════════════════╗" + @echo "║ ECHO GUARD INTERACTIVE TMUX DEMO ║" + @echo "╚═══════════════════════════════════════════════════════════════╝" + @echo "" + @./scripts/echo-guard-demo.sh tmux + +echo-guard-build: + @echo "Building echo guard WASM..." + @./scripts/echo-guard-demo.sh build + +echo-guard-test: + @echo "Running echo guard tests..." + @go test -v -run "TestEchoGuard" ./test/integration/... diff --git a/config.example.toml b/config.example.toml index b0de6c7c..fa28f64d 100644 --- a/config.example.toml +++ b/config.example.toml @@ -104,7 +104,7 @@ args = [ # ============================================================================ # Enable Data Information Flow Control (DIFC) security model (default: false) -# When true, requires sys___init call before tool access +# When true, enables DIFC enforcement. Sessions are auto-created from Authorization header. # This is an experimental feature - keep disabled for standard MCP compatibility # enable_difc = false diff --git a/docs/github-difc.md b/docs/github-difc.md new file mode 100644 index 00000000..8985d368 --- /dev/null +++ b/docs/github-difc.md @@ -0,0 +1,2356 @@ +# Proposal: DIFC Integrity and Secrecy Labels for GitHub Data + +This document proposes a principled scheme for assigning and enforcing **Decentralized Information Flow Control (DIFC)** integrity and secrecy labels for GitHub objects. The proposal is designed to be: + +- **Precise enough** to guide a prototype implementation by a coding agent, and +- **Principled enough** to withstand scrutiny by an SOSP/OSDI audience. + +The design adheres to the following core insights: +1. Integrity reflects **current endorsement**, not authorship or workflow history. +2. Integrity labels grow **monotonically** over an object’s lifecycle. +3. Secrecy labels constrain **information release**, not authority. +4. AI agents are **not integrity principals** and must attenuate integrity. +5. Workflow- and plan-level controls mediate promotion of integrity and enforcement of secrecy. +6. Labels are **derived, not stored**: because GitHub does not support first-class security labels, all labels must be *reconstructible from Git history and GitHub API–visible metadata*. + +--- + +## 1. Scope and Objects + +This proposal applies to the following GitHub objects: + +- Commits +- Pull requests (PRs) +- Branches +- Issues and comments +- Repository-visible artifacts (e.g., PR descriptions, commit messages) +- Sensitive artifacts (e.g., authentication tokens, CI logs, internal analysis output) + +The scheme assumes an external system that observes and mediates interactions with GitHub via APIs or MCP servers; it does not require modifications to GitHub itself. + +--- + +## 2. Label Model + +Each object `o` is associated with: + +- An **integrity label** `I(o)` +- A **secrecy label** `S(o)` + +Labels are elements of fixed, finite lattices defined below. Labels are *logical properties* inferred from repository state and metadata, not persistent fields stored in GitHub. + +--- + +## 3. Integrity Lattice + +Integrity labels represent *endorsement and trust*, not provenance. + +Integrity classes are ordered from lowest to highest: + +``` +∅ (empty) +≤ contributor: +≤ project: +``` + +where `` is the repository identifier in `owner/name` format (e.g., `github/github-mcp-server`). + +Interpretation: + +- `∅` (empty): No trusted party endorses correctness. An empty integrity label indicates the absence of endorsement. +- `contributor:`: Endorsed as originating from a known contributor role in the specified repository. +- `project:`: Endorsed as part of the trusted project history (e.g., merged into a protected branch of the specified repository). + +### 3.1 Guard Responsibility for Hierarchical Expansion + +The DIFC evaluator treats labels as opaque strings and does **not** understand the hierarchical relationship between GitHub integrity tags. Therefore, the GitHub guard **must explicitly expand** integrity labels to include all implied lower-level tags: + +| When assigning... | Guard must include... | +|-------------------|----------------------| +| `contributor:` | `contributor:` | +| `project:` | `contributor:`, `project:` | + +**Example:** When labeling a commit merged to a protected branch: + +```json +{ + "integrity": [ + "contributor:github/github-mcp-server", + "project:github/github-mcp-server" + ] +} +``` + +This explicit expansion ensures that DIFC flow checks work correctly. An agent with `project:` clearance can write to resources labeled with `contributor:` because both tags are present in the resource's integrity label. + +**Rationale:** This design keeps the DIFC evaluator domain-agnostic while allowing domain-specific guards (like the GitHub guard) to encode hierarchical trust relationships through label expansion. + +Integrity labels **must grow monotonically**. Demotion is not permitted. + +--- + +## 4. Secrecy Lattice + +Secrecy labels represent *information sensitivity and release constraints*. + +``` +∅ (empty) +≤ private: +≤ secret +``` + +where `` is the repository identifier in `owner/name` format (e.g., `github/github-mcp-server`). + +Interpretation: + +- `∅` (empty): May be disclosed publicly. An empty secrecy label indicates no sensitivity restrictions. +- `private:`: Restricted to collaborators of the specified repository. +- `secret`: Must not be disclosed via GitHub-visible state. + +Secrecy labels are enforced strictly: information may flow only to objects with secrecy labels greater than or equal to the source. + +--- + +## 5. Label Derivation Principle (Derived Labels) + +GitHub does not provide first-class support for security labels. Therefore, **all integrity and secrecy labels must be computable from Git history and GitHub API–visible metadata**. + +### 5.1 Derivation Requirements + +For any object `o`, the system must be able to compute `I(o)` and `S(o)` using only: + +- Git commit graph and branch structure +- PR state (open, approved, merged) +- Review metadata (reviewer roles, approval status) +- Repository configuration (protected branches, visibility) +- CI and check results +- Static configuration (e.g., role definitions) + +No label state is stored back into GitHub. + +--- + +### 5.2 Determinism Requirement + +Label derivation functions must be **deterministic**: + +``` +I(o) = f_I(git_history, github_metadata, config) +S(o) = f_S(git_history, github_metadata, config) +``` + +Given the same repository state and configuration, label computation must yield identical results. This supports auditing, replay, and verification. + +--- + +## 6. Initial Label Assignment + +### 6.1 Commits + +When a commit exists but is not merged into a protected branch: + +- `I(commit) = ∅` (empty — no endorsement) +- `S(commit) = ∅` (empty — public repo, no sensitivity) +- `S(commit) = private:` (private repo) + +Authorship information is treated as provenance metadata only. + +--- + +### 6.2 Pull Requests + +When a PR is opened: + +- `I(PR) = ∅` (empty — no endorsement) +- `S(PR)` depends on repository visibility + +Review comments and CI results do not automatically promote integrity. + +--- + +### 6.3 Issues and Discussions + +Issues and comments are treated as low-integrity inputs: + +- `I(issue) = ∅` (empty — no endorsement) +- `S(issue)` depends on repository visibility + +--- + +### 6.4 Sensitive Artifacts + +Objects containing credentials or internal secrets are labeled: + +- `S(object) = secret` + +Such objects must never influence GitHub-visible artifacts. + +--- + +## 7. Integrity Promotion Rules + +Integrity promotion is derived from **observable repository events**. + +### 7.1 Promotion Predicates + +Examples: + +- `∅ → contributor:` + If the PR author is a known contributor to the repository. + +- `contributor: → project:` + If at least one project-approved review exists and required checks pass. + +- `project: → project:` + If the commit is merged into a protected branch by a project. + +These predicates operate on metadata; only the resulting integrity class is recorded logically. + +--- + +### 7.2 Monotonicity Invariant + +For any object `o`: + +``` +I_new(o) ≥ I_old(o) +``` + +Integrity never decreases as repository state evolves. + +--- + +## 8. Secrecy Enforcement Rules + +### 8.1 No-Secret-Export Invariant + +For any flow from object `o1` to `o2`: + +``` +S(o1) ≤ S(o2) +``` + +Objects with `S = secret` must not affect GitHub-visible state. + +--- + +## 9. AI Agents and Integrity + +### 9.1 Agents Are Not Integrity Principals + +AI agents are not treated as sources of integrity. + +For any agent-produced artifact: + +``` +I = ∅ (empty) +``` + +regardless of the user on whose behalf the agent operates. + +--- + +### 9.2 Integrity Attenuation + +For combined inputs: + +``` +I(output) ≤ min(I(human_input), I(agent_input)) +``` + +Since agent input always has empty integrity (`∅`), agent outputs require external endorsement to gain integrity. + +--- + +## 10. Workflow and Enforcement Context + +Integrity and secrecy promotion occur only at explicit workflow transitions driven by observable repository state and policy. This document does not assume that GitHub enforces these policies directly; instead, enforcement is performed by an external mediator that evaluates requests and repository state. + +--- + +## 11. Implementation Guidance + +This proposal is intended to guide the implementation of a GitHub mediation module that enforces DIFC over GitHub API interactions. + +### 11.1 Request Classification + +The module receives GitHub MCP requests issued by an agent. Each request must first be classified as one of: + +- **Read-only**: retrieves GitHub state (e.g., listing PRs, reading commits) +- **Write-only**: mutates GitHub state (e.g., creating PRs, pushing commits) +- **Read–write**: reads GitHub state and conditionally writes new state + +This classification is determined by the module’s understanding of the MCP request semantics. + +--- + +### 11.2 Resource Label Determination + +For each MCP request, the module must identify the GitHub resources accessed and compute their integrity and secrecy labels using the derivation rules in this document. This may require issuing auxiliary GitHub API calls to inspect repository state, metadata, and configuration. + +--- + +### 11.3 DIFC Enforcement + +After classifying the request and deriving resource labels, the module invokes a DIFC decision procedure that determines whether the request is permitted. + +The decision considers: +- The agent’s integrity and secrecy labels (provided with the request) +- The derived integrity and secrecy labels of GitHub resources +- The operation type (read, write, or read–write) +- Standard DIFC flow rules: + - Reads must not violate secrecy constraints + - Writes must not violate integrity constraints + - Combined operations must satisfy both + +The module does not need to know *how* the agent’s labels were assigned; it treats them as authoritative inputs to the DIFC engine. + +--- + +### 11.4 Enforcement Outcomes + +If the request is permitted, it is forwarded to GitHub. If not, the module must block the request and return a policy violation error. All decisions should be auditable by replaying label derivation and DIFC checks against recorded repository state. + +--- + +### 11.5 Session Initialization with DIFC Labels + +When an agent connects to the gateway, it must be assigned initial secrecy and integrity labels that define: +- **Secrecy clearance**: What sensitive data the agent is allowed to read +- **Integrity clearance**: What trust level the agent operates at for writes + +These initial labels are associated with the session ID provided in the `Authorization` header. + +> **Prerequisite:** Session label configuration requires enabling config extensions: +> ```bash +> ./awmg --enable-config-extensions --config config.toml ... +> ``` +> Or via environment variable: `MCP_GATEWAY_CONFIG_EXTENSIONS=1` + +#### 11.5.1 Configuration via Flags + +The gateway accepts flags to specify initial session labels: + +```bash +# Specify initial secrecy clearance (agent can read private repo data) +./awmg --enable-config-extensions --config config.toml \ + --session-secrecy "private:github/my-private-repo" + +# Specify initial integrity clearance (agent operates at project level) +./awmg --enable-config-extensions --config config.toml \ + --session-integrity "contributor:github/my-repo,project:github/my-repo" + +# Combined: agent can read private data and write as project +./awmg --config config.toml \ + --session-secrecy "private:github/my-private-repo" \ + --session-integrity "contributor:github/my-repo,project:github/my-repo" + +# Multiple repos (comma-separated tags) +./awmg --config config.toml \ + --session-secrecy "private:github/repo-a,private:github/repo-b" \ + --session-integrity "contributor:github/repo-a,project:github/repo-b" +``` + +**Flag Reference:** + +| Flag | Description | Example | +|------|-------------|---------| +| `--session-secrecy` | Comma-separated secrecy tags for agent clearance | `private:owner/repo,secret` | +| `--session-integrity` | Comma-separated integrity tags for agent clearance | `contributor:owner/repo,project:owner/repo` | + +#### 11.5.2 Configuration via Environment Variables + +The same configuration can be provided via environment variables: + +```bash +# Environment variable equivalents +export MCP_GATEWAY_SESSION_SECRECY="private:github/my-private-repo" +export MCP_GATEWAY_SESSION_INTEGRITY="contributor:github/my-repo,project:github/my-repo" + +./awmg --config config.toml +``` + +**Environment Variable Reference:** + +| Variable | Description | Equivalent Flag | +|----------|-------------|-----------------| +| `MCP_GATEWAY_SESSION_SECRECY` | Initial secrecy clearance tags | `--session-secrecy` | +| `MCP_GATEWAY_SESSION_INTEGRITY` | Initial integrity clearance tags | `--session-integrity` | + +#### 11.5.3 Configuration via Config File + +For more complex setups, session labels can be specified in the configuration file: + +**TOML Format:** +```toml +[gateway] +port = 3000 +domain = "localhost" + +[gateway.session] +secrecy = ["private:github/my-private-repo"] +integrity = ["contributor:github/my-repo", "project:github/my-repo"] +``` + +**JSON Format (stdin):** +```json +{ + "mcpServers": { ... }, + "gateway": { + "port": 3000, + "session": { + "secrecy": ["private:github/my-private-repo"], + "integrity": ["contributor:github/my-repo", "project:github/my-repo"] + } + } +} +``` + +#### 11.5.4 Label Semantics for Sessions + +**Secrecy Clearance:** +- An agent with `private:` clearance can read resources labeled with `private:` or lower (empty/public) +- An agent with `secret` clearance can read any resource +- An agent with no secrecy clearance (empty) can only read public resources + +**Integrity Clearance:** +- An agent with `contributor:` clearance can write to resources requiring contributor-level integrity +- An agent with `project:` clearance can write to resources requiring project-level integrity (and contributor by hierarchical inclusion) +- **Important:** Integrity labels must be properly expanded (see Section 3.1) + +#### 11.5.5 Example: GitHub Copilot Agent for Private Repo + +A typical setup for an agent working on a private GitHub repository: + +```bash +# Agent working on github/private-project as a project +./awmg --config config.toml \ + --session-secrecy "private:github/private-project" \ + --session-integrity "contributor:github/private-project,project:github/private-project" +``` + +This configuration: +1. Allows the agent to **read** issues, PRs, and code from `github/private-project` +2. Allows the agent to **write** (create issues, submit PRs) at project level +3. Prevents the agent from accessing other private repos +4. Prevents the agent from performing project-level operations (e.g., branch protection changes) + +#### 11.5.6 Dynamic Label Assignment (Future) + +A future enhancement could derive session labels dynamically from the GitHub token: + +```bash +# Auto-derive labels from token permissions (proposed) +./awmg --config config.toml --session-from-token +``` + +This would: +1. Introspect the GitHub token to determine accessible repos +2. Query GitHub API to determine user's role in each repo +3. Automatically assign appropriate secrecy and integrity labels + +**Note:** This requires the gateway to have access to the GitHub token and make API calls at session initialization time. + +--- + +### 11.6 GitHub MCP Interface and Operation Classification + +The mediator relies on the GitHub MCP server interface to observe and effect GitHub operations. The current open-source GitHub MCP server implementation and interface definition are available at: + +- **GitHub MCP Server Repository:** + https://github.com/github/github-mcp-server +- **Complete Tool Reference:** + https://github.com/github/github-mcp-server#tools + +This section classifies GitHub MCP server tools according to whether they **read**, **write**, or **read and write** GitHub state. This classification is used as input to DIFC enforcement. + +#### 11.6.1 Read-Only Operations + +These operations retrieve GitHub state and do not mutate repository data. + +**Context Toolset:** +- `get_me` — Get authenticated user profile +- `get_teams` — Get teams for user +- `get_team_members` — Get team members + +**Repository Toolset:** +- `get_file_contents` — Get file or directory contents +- `get_commit` — Get commit details +- `list_commits` — List commits +- `list_branches` — List branches +- `list_tags` — List tags +- `get_tag` — Get tag details +- `get_repository_tree` (git toolset) — Get repository tree +- `search_repositories` — Search repositories +- `search_code` — Search code +- `list_releases` — List releases +- `get_latest_release` — Get latest release +- `get_release_by_tag` — Get release by tag + +**Pull Request Toolset:** +- `list_pull_requests` — List pull requests +- `pull_request_read` — Get PR details, diff, status, files, reviews, comments +- `search_pull_requests` — Search pull requests + +**Issues Toolset:** +- `list_issues` — List issues +- `issue_read` — Get issue details, comments, sub-issues, labels +- `search_issues` — Search issues +- `get_label` — Get label +- `list_issue_types` — List issue types + +**Actions Toolset:** +- `list_workflows` — List workflows +- `list_workflow_runs` — List workflow runs +- `get_workflow_run` — Get workflow run +- `list_workflow_jobs` — List workflow jobs +- `get_job_logs` — Get job logs +- `get_workflow_run_logs` — Get workflow run logs +- `get_workflow_run_usage` — Get workflow usage +- `list_workflow_run_artifacts` — List workflow artifacts +- `download_workflow_run_artifact` — Download workflow artifact + +**Notifications Toolset:** +- `list_notifications` — List notifications +- `get_notification_details` — Get notification details + +**Discussions Toolset:** +- `list_discussions` — List discussions +- `get_discussion` — Get discussion +- `get_discussion_comments` — Get discussion comments +- `list_discussion_categories` — List discussion categories + +**Gists Toolset:** +- `list_gists` — List gists +- `get_gist` — Get gist content + +**Projects Toolset:** +- `list_projects` — List projects +- `get_project` — Get project +- `list_project_items` — List project items +- `get_project_item` — Get project item +- `list_project_fields` — List project fields +- `get_project_field` — Get project field + +**Organizations Toolset:** +- `search_orgs` — Search organizations + +**Users Toolset:** +- `search_users` — Search users + +**Stargazers Toolset:** +- `list_starred_repositories` — List starred repositories + +**Security Toolsets:** +- `list_code_scanning_alerts` — List code scanning alerts +- `get_code_scanning_alert` — Get code scanning alert +- `list_dependabot_alerts` — List Dependabot alerts +- `get_dependabot_alert` — Get Dependabot alert +- `list_secret_scanning_alerts` — List secret scanning alerts +- `get_secret_scanning_alert` — Get secret scanning alert +- `list_global_security_advisories` — List global security advisories +- `get_global_security_advisory` — Get global security advisory +- `list_repository_security_advisories` — List repository security advisories +- `list_org_repository_security_advisories` — List org repository security advisories + +**Labels Toolset:** +- `list_label` — List labels from repository + +These operations must satisfy **secrecy flow constraints** but do not impose integrity constraints on the caller. + +--- + +#### 11.6.2 Write-Only Operations + +These operations mutate GitHub state without requiring prior reads as part of their semantics. + +**Repository Toolset:** +- `create_repository` — Create repository +- `create_branch` — Create branch +- `create_or_update_file` — Create or update file +- `push_files` — Push files to repository +- `delete_file` — Delete file +- `fork_repository` — Fork repository + +**Pull Request Toolset:** +- `create_pull_request` — Open new pull request +- `add_comment_to_pending_review` — Add review comment to pending review +- `request_copilot_review` — Request Copilot review + +**Issues Toolset:** +- `add_issue_comment` — Add comment to issue +- `assign_copilot_to_issue` — Assign Copilot to issue + +**Actions Toolset:** +- `run_workflow` — Run workflow +- `rerun_workflow_run` — Rerun workflow run +- `rerun_failed_jobs` — Rerun failed jobs +- `cancel_workflow_run` — Cancel workflow run +- `delete_workflow_run_logs` — Delete workflow logs + +**Gists Toolset:** +- `create_gist` — Create gist + +**Notifications Toolset:** +- `dismiss_notification` — Dismiss notification +- `mark_all_notifications_read` — Mark all notifications as read +- `manage_notification_subscription` — Manage notification subscription +- `manage_repository_notification_subscription` — Manage repository notification subscription + +**Projects Toolset:** +- `add_project_item` — Add project item +- `delete_project_item` — Delete project item + +**Stargazers Toolset:** +- `star_repository` — Star repository +- `unstar_repository` — Unstar repository + +**Labels Toolset:** +- `label_write` — Create, update, or delete labels + +These operations must satisfy **integrity flow constraints** with respect to the target resource. + +--- + +#### 11.6.3 Read–Write Operations + +These operations read existing GitHub state and conditionally write new state. + +**Pull Request Toolset:** +- `merge_pull_request` — Merge pull request +- `update_pull_request` — Edit pull request +- `update_pull_request_branch` — Update pull request branch +- `pull_request_review_write` — Create, submit, or delete PR reviews + +**Issues Toolset:** +- `issue_write` — Create or update issue (update reads existing state) +- `sub_issue_write` — Add, remove, or reprioritize sub-issues + +**Gists Toolset:** +- `update_gist` — Update gist + +**Projects Toolset:** +- `update_project_item` — Update project item field values + +**Copilot Toolset (Remote Server Only):** +- `create_pull_request_with_copilot` — Perform task with Copilot coding agent + +These operations must satisfy **both secrecy and integrity constraints**, as they may propagate information from read objects into written objects. + +--- + +#### 11.6.4 GitHub Objects Subject to DIFC Labeling + +The following GitHub objects can be read or modified by the MCP tools listed above. These are the objects for which integrity and secrecy labels must be computed. + +**Identity and Access Objects:** +- **User** — GitHub user profile and identity +- **Team** — Organization team and membership +- **Organization** — GitHub organization + +**Repository Structure Objects:** +- **Repository** — Repository metadata and configuration +- **Branch** — Git branch reference +- **Tag** — Git tag reference +- **Commit** — Git commit object +- **Tree** — Git tree object (directory structure) +- **File** — Repository file content +- **Release** — GitHub release with assets + +**Collaboration Objects:** +- **Pull Request** — Pull request with metadata +- **PR Review** — Pull request review (approval, changes requested, comment) +- **PR Review Comment** — Review comment on specific code lines +- **PR Comment** — General comment on a pull request +- **Issue** — GitHub issue +- **Issue Comment** — Comment on an issue +- **Sub-Issue** — Child issue linked to parent issue +- **Label** — Repository label applied to issues/PRs +- **Issue Type** — Organization-defined issue type + +**Discussion Objects:** +- **Discussion** — GitHub Discussion thread +- **Discussion Comment** — Comment on a discussion +- **Discussion Category** — Category for organizing discussions + +**Project Management Objects:** +- **Project** — GitHub Project (v2) +- **Project Item** — Item in a project (linked issue or PR) +- **Project Field** — Custom field in a project + +**CI/CD Objects:** +- **Workflow** — GitHub Actions workflow definition +- **Workflow Run** — Execution instance of a workflow +- **Workflow Job** — Individual job within a workflow run +- **Workflow Log** — Logs from workflow/job execution +- **Workflow Artifact** — Build artifact from workflow run + +**Notification Objects:** +- **Notification** — User notification +- **Notification Subscription** — Subscription to repository/thread notifications + +**Gist Objects:** +- **Gist** — GitHub Gist (code snippet) + +**Security Objects:** +- **Code Scanning Alert** — Alert from code scanning analysis +- **Dependabot Alert** — Dependency vulnerability alert +- **Secret Scanning Alert** — Exposed secret alert +- **Security Advisory** — Repository or global security advisory + +**Interaction Objects:** +- **Star** — Repository star (user-to-repository relationship) + +Each object type requires label derivation rules as specified in Sections 5 and 6. Objects that can be modified (via write or read-write operations) are subject to integrity flow constraints; objects that can be read are subject to secrecy flow constraints. + +--- + +#### 11.6.5 Label Derivation by Object Type + +This section specifies how to compute integrity and secrecy labels for each GitHub object type using MCP tool calls. All derivations follow the principles in Sections 3–6. + +--- + +##### Identity and Access Objects + +**User** +- **Integrity Derivation:** + - Use `get_me` to retrieve authenticated user profile + - Use `search_users` to get user metadata + - Integrity is contextual: a user's role relative to a repository determines integrity + - Check repository collaborator status via `get_file_contents` on `.github/CODEOWNERS` or repository settings + - `I(user) = project:` if user has admin/maintain permissions on repository + - `I(user) = contributor:` if user has write/triage permissions + - `I(user) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - User profiles are generally public: `S(user) = ∅` (empty) + - Private user data (email, settings): `S = private:` + +**Team** +- **Integrity Derivation:** + - Use `get_teams` and `get_team_members` to enumerate team membership + - Team integrity derives from organization role assignments + - `I(team) = project:` if team has maintain permissions on repositories + - `I(team) = contributor:` if team has write permissions +- **Secrecy Derivation:** + - Use `search_orgs` to check organization visibility + - Public organizations: `S(team) = ∅` (empty) + - Private organizations: `S(team) = private:` + +**Organization** +- **Integrity Derivation:** + - Use `search_orgs` to retrieve organization metadata + - Organizations themselves are not integrity principals; members inherit roles + - `I(org) = project:` (organizations define the trust boundary) +- **Secrecy Derivation:** + - Check organization visibility settings + - `S(org) = ∅` (empty) for public organizations + - `S(org) = private:` for private organizations + +--- + +##### Repository Structure Objects + +**Repository** +- **Integrity Derivation:** + - Use `search_repositories` to get repository metadata + - Repository integrity reflects its protected branch configuration + - `I(repo) = project:` (repositories define trust boundaries) +- **Secrecy Derivation:** + - Use `search_repositories` to check visibility field + - `S(repo) = ∅` (empty) for public repositories + - `S(repo) = private:` for private repositories + +**Branch** +- **Integrity Derivation:** + - Use `list_branches` to enumerate branches + - Check if branch is protected (default branch, protection rules) + - `I(branch) = project:` if branch is protected + - `I(branch) = project:` if branch requires project approval + - `I(branch) = ∅` (empty) for unprotected feature branches +- **Secrecy Derivation:** + - Inherits from repository: `S(branch) = S(repo)` + +**Tag** +- **Integrity Derivation:** + - Use `list_tags` and `get_tag` to retrieve tag metadata + - Use `get_commit` on tagged commit to check author/committer + - `I(tag) = project:` if tag points to commit on protected branch + - `I(tag) = project:` if created by project + - `I(tag) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from repository: `S(tag) = S(repo)` + +**Commit** +- **Integrity Derivation:** + - Use `get_commit` to retrieve commit details + - Use `list_commits` to check branch membership + - Check if commit is reachable from protected branch + - `I(commit) = project:` if merged into protected branch + - `I(commit) = project:` if approved by project review + - `I(commit) = contributor:` if authored by contributor + - `I(commit) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from repository: `S(commit) = S(repo)` + - Check commit message for sensitive patterns: promote to `S = secret` if found + +**Tree** +- **Integrity Derivation:** + - Use `get_repository_tree` to retrieve tree structure + - Integrity inherits from the commit containing the tree + - `I(tree) = I(commit)` where commit references this tree +- **Secrecy Derivation:** + - Inherits from repository: `S(tree) = S(repo)` + +**File** +- **Integrity Derivation:** + - Use `get_file_contents` to retrieve file content and metadata + - Use `list_commits` with path filter to get file history + - File integrity derives from the commit that last modified it + - `I(file) = I(last_modifying_commit)` +- **Secrecy Derivation:** + - Base: `S(file) = S(repo)` + - Scan file path and content for sensitive patterns: + - Files matching `*.env`, `*.key`, `*.pem`, secrets patterns: `S = secret` + - Files in `.github/workflows/` may contain secrets: `S = secret` if secrets detected + +**Release** +- **Integrity Derivation:** + - Use `list_releases`, `get_latest_release`, or `get_release_by_tag` to retrieve release + - Check release author and associated tag + - `I(release) = project:` if created by project and tag is on protected branch + - `I(release) = project:` if created by project + - `I(release) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from repository: `S(release) = S(repo)` + +--- + +##### Collaboration Objects + +**Pull Request** +- **Integrity Derivation:** + - Use `pull_request_read` with `method: get` to retrieve PR metadata + - Use `pull_request_read` with `method: get_reviews` to check approvals + - Use `pull_request_read` with `method: get_status` to check CI status + - Check merge status and target branch + - `I(PR) = project:` if merged into protected branch + - `I(PR) = project:` if approved by project with passing checks + - `I(PR) = contributor:` if author is contributor + - `I(PR) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Base: `S(PR) = S(repo)` + - Scan PR body and diff for sensitive content: promote to `S = secret` if found + +**PR Review** +- **Integrity Derivation:** + - Use `pull_request_read` with `method: get_reviews` to retrieve reviews + - Check reviewer's role relative to repository + - `I(review) = project:` if reviewer is project + - `I(review) = contributor:` if reviewer is contributor + - `I(review) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from PR: `S(review) = S(PR)` + +**PR Review Comment** +- **Integrity Derivation:** + - Use `pull_request_read` with `method: get_review_comments` to retrieve comments + - Check comment author's role + - `I(comment) = I(author_role)` +- **Secrecy Derivation:** + - Inherits from PR: `S(comment) = S(PR)` + - Scan content for secrets: promote to `S = secret` if found + +**PR Comment** +- **Integrity Derivation:** + - Use `pull_request_read` with `method: get_comments` to retrieve comments + - Check comment author's role + - `I(comment) = I(author_role)` +- **Secrecy Derivation:** + - Inherits from PR: `S(comment) = S(PR)` + +**Issue** +- **Integrity Derivation:** + - Use `issue_read` with `method: get` to retrieve issue metadata + - Issues are user-submitted content, generally low integrity + - `I(issue) = contributor:` if author is contributor + - `I(issue) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from repository: `S(issue) = S(repo)` + - Scan issue body for sensitive content + +**Issue Comment** +- **Integrity Derivation:** + - Use `issue_read` with `method: get_comments` to retrieve comments + - Check comment author's role + - `I(comment) = I(author_role)` +- **Secrecy Derivation:** + - Inherits from issue: `S(comment) = S(issue)` + +**Sub-Issue** +- **Integrity Derivation:** + - Use `issue_read` with `method: get_sub_issues` to retrieve sub-issues + - Inherits from parent issue and own author + - `I(sub_issue) = min(I(parent_issue), I(author_role))` +- **Secrecy Derivation:** + - Inherits from parent: `S(sub_issue) = S(parent_issue)` + +**Label** +- **Integrity Derivation:** + - Use `list_label` or `get_label` to retrieve labels + - Labels are repository configuration, created by projects + - `I(label) = project:` (labels require write access to create) +- **Secrecy Derivation:** + - Inherits from repository: `S(label) = S(repo)` + +**Issue Type** +- **Integrity Derivation:** + - Use `list_issue_types` to retrieve organization issue types + - `I(issue_type) = project:` (organization-level configuration) +- **Secrecy Derivation:** + - Inherits from organization: `S(issue_type) = S(org)` + +--- + +##### Discussion Objects + +**Discussion** +- **Integrity Derivation:** + - Use `get_discussion` to retrieve discussion metadata + - Use `list_discussions` to enumerate discussions + - Discussions are community content, similar to issues + - `I(discussion) = contributor:` if author is contributor + - `I(discussion) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Inherits from repository: `S(discussion) = S(repo)` + +**Discussion Comment** +- **Integrity Derivation:** + - Use `get_discussion_comments` to retrieve comments + - Check comment author's role + - `I(comment) = I(author_role)` +- **Secrecy Derivation:** + - Inherits from discussion: `S(comment) = S(discussion)` + +**Discussion Category** +- **Integrity Derivation:** + - Use `list_discussion_categories` to retrieve categories + - Categories are repository configuration + - `I(category) = project:` +- **Secrecy Derivation:** + - Inherits from repository: `S(category) = S(repo)` + +--- + +##### Project Management Objects + +**Project** +- **Integrity Derivation:** + - Use `get_project` and `list_projects` to retrieve project metadata + - Projects are organizational/repository configuration + - `I(project) = project:` (requires write access to create) +- **Secrecy Derivation:** + - Check project visibility (public/private) + - `S(project) = ∅` (empty) for public projects + - `S(project) = private:` for private projects + +**Project Item** +- **Integrity Derivation:** + - Use `get_project_item` and `list_project_items` to retrieve items + - Items link to issues/PRs; integrity derives from linked object + - `I(item) = I(linked_issue_or_PR)` +- **Secrecy Derivation:** + - Inherits from project: `S(item) = S(project)` + +**Project Field** +- **Integrity Derivation:** + - Use `get_project_field` and `list_project_fields` to retrieve fields + - Fields are project configuration + - `I(field) = project:` +- **Secrecy Derivation:** + - Inherits from project: `S(field) = S(project)` + +--- + +##### CI/CD Objects + +**Workflow** +- **Integrity Derivation:** + - Use `list_workflows` to enumerate workflows + - Workflows are code in `.github/workflows/`, integrity derives from commit + - Use `get_file_contents` on workflow file to trace to commit + - `I(workflow) = I(commit_containing_workflow)` +- **Secrecy Derivation:** + - Base: `S(workflow) = S(repo)` + - Workflows may reference secrets: `S = secret` if secrets are used + +**Workflow Run** +- **Integrity Derivation:** + - Use `get_workflow_run` and `list_workflow_runs` to retrieve runs + - Workflow runs execute on specific commits + - `I(run) = I(triggering_commit)` +- **Secrecy Derivation:** + - `S(run) = secret` (runs may access repository secrets and produce sensitive output) + +**Workflow Job** +- **Integrity Derivation:** + - Use `list_workflow_jobs` to retrieve jobs within a run + - Inherits from workflow run + - `I(job) = I(workflow_run)` +- **Secrecy Derivation:** + - `S(job) = secret` (jobs may access secrets) + +**Workflow Log** +- **Integrity Derivation:** + - Use `get_job_logs` or `get_workflow_run_logs` to retrieve logs + - Logs are outputs of jobs + - `I(log) = I(job)` +- **Secrecy Derivation:** + - `S(log) = secret` (logs may contain secrets, credentials, internal paths) + - **Critical:** Logs must never flow to public-visible outputs + +**Workflow Artifact** +- **Integrity Derivation:** + - Use `list_workflow_run_artifacts` and `download_workflow_run_artifact` to retrieve artifacts + - Inherits from workflow run + - `I(artifact) = I(workflow_run)` +- **Secrecy Derivation:** + - `S(artifact) = secret` (artifacts may contain sensitive build outputs) + +--- + +##### Notification Objects + +**Notification** +- **Integrity Derivation:** + - Use `list_notifications` and `get_notification_details` to retrieve notifications + - Notifications reference other objects (issues, PRs, etc.) + - `I(notification) = I(referenced_object)` +- **Secrecy Derivation:** + - Notifications are user-private: `S(notification) = private:` + +**Notification Subscription** +- **Integrity Derivation:** + - Use `manage_notification_subscription` to manage subscriptions + - Subscriptions are user preferences + - `I(subscription) = contributor:` (user controls own subscriptions) +- **Secrecy Derivation:** + - `S(subscription) = private:` (user preferences are private) + +--- + +##### Gist Objects + +**Gist** +- **Integrity Derivation:** + - Use `get_gist` and `list_gists` to retrieve gists + - Gists are user-created content + - `I(gist) = contributor:` if owner is contributor + - `I(gist) = ∅` (empty) otherwise +- **Secrecy Derivation:** + - Check gist visibility (public/secret) + - `S(gist) = ∅` (empty) for public gists + - `S(gist) = private:` for secret gists + - Scan content for credentials: promote to `S = secret` if found + +--- + +##### Security Objects + +**Code Scanning Alert** +- **Integrity Derivation:** + - Use `list_code_scanning_alerts` and `get_code_scanning_alert` to retrieve alerts + - Alerts are generated by security tools + - `I(alert) = project:` (tool output, not user-controlled) +- **Secrecy Derivation:** + - `S(alert) = private:` (security findings are sensitive) + - For critical vulnerabilities: `S = secret` + +**Dependabot Alert** +- **Integrity Derivation:** + - Use `list_dependabot_alerts` and `get_dependabot_alert` to retrieve alerts + - `I(alert) = project:` (automated dependency analysis) +- **Secrecy Derivation:** + - `S(alert) = private:` (vulnerability information is sensitive) + +**Secret Scanning Alert** +- **Integrity Derivation:** + - Use `list_secret_scanning_alerts` and `get_secret_scanning_alert` to retrieve alerts + - `I(alert) = project:` (automated secret detection) +- **Secrecy Derivation:** + - `S(alert) = secret` (alerts reference actual secrets) + - **Critical:** Must never be disclosed publicly + +**Security Advisory** +- **Integrity Derivation:** + - Use `list_global_security_advisories`, `get_global_security_advisory`, + `list_repository_security_advisories`, `list_org_repository_security_advisories` + - Global advisories: `I(advisory) = project:github` (curated by GitHub) + - Repository advisories: `I(advisory) = project:` (created by projects) +- **Secrecy Derivation:** + - Published advisories: `S(advisory) = ∅` (empty) + - Draft advisories: `S(advisory) = private:` + +--- + +##### Interaction Objects + +**Star** +- **Integrity Derivation:** + - Use `list_starred_repositories` to retrieve stars + - Stars are user preferences, not integrity-bearing + - `I(star) = ∅` (empty, no integrity significance) +- **Secrecy Derivation:** + - Stars are public: `S(star) = ∅` (empty) + +--- + +#### 11.6.6 Summary of Label Derivation Tools + +| Object Category | Primary MCP Tools for Derivation | +|-----------------|----------------------------------| +| Identity/Access | `get_me`, `get_teams`, `get_team_members`, `search_users`, `search_orgs` | +| Repository | `search_repositories`, `list_branches`, `get_file_contents` | +| Commits/Trees | `get_commit`, `list_commits`, `get_repository_tree` | +| Tags/Releases | `list_tags`, `get_tag`, `list_releases`, `get_release_by_tag` | +| Pull Requests | `pull_request_read`, `list_pull_requests`, `search_pull_requests` | +| Issues | `issue_read`, `list_issues`, `search_issues` | +| Discussions | `get_discussion`, `list_discussions`, `get_discussion_comments` | +| Projects | `get_project`, `list_project_items`, `get_project_field` | +| Actions/CI | `list_workflows`, `get_workflow_run`, `list_workflow_jobs`, `get_job_logs` | +| Security | `list_code_scanning_alerts`, `list_dependabot_alerts`, `list_secret_scanning_alerts` | +| Notifications | `list_notifications`, `get_notification_details` | +| Gists | `get_gist`, `list_gists` | + +--- + +### 11.7 Guard Interface Implementation + +The MCP Gateway enforces DIFC policies through a **Guard** interface. Each backend MCP server (e.g., GitHub) can have a custom guard that handles resource labeling. This section specifies the interface that a GitHub DIFC guard must implement. + +#### 11.7.1 Guard Interface Definition + +A guard must implement the following interface: + +```go +type Guard interface { + // Name returns the identifier for this guard (e.g., "github") + Name() string + + // LabelResource determines the resource being accessed and its labels + // Called BEFORE the backend operation to perform coarse-grained access control + LabelResource(ctx context.Context, toolName string, args interface{}, + backend BackendCaller, caps *Capabilities) (*LabeledResource, OperationType, error) + + // LabelResponse labels the response data after a successful backend call + // Called AFTER the backend returns to enable fine-grained filtering + LabelResponse(ctx context.Context, toolName string, result interface{}, + backend BackendCaller, caps *Capabilities) (LabeledData, error) +} +``` + +#### 11.7.2 Method Specifications + +| Method | Purpose | Invocation Phase | Return Value | +|--------|---------|------------------|--------------| +| `Name()` | Returns guard identifier | Registration, logging | `string` (e.g., `"github"`) | +| `LabelResource()` | Labels target resource **before** operation | Phase 1: Pre-execution | `*LabeledResource`, `OperationType`, `error` | +| `LabelResponse()` | Labels response **after** operation | Phase 4: Post-execution | `LabeledData` or `nil`, `error` | + +#### 11.7.3 Operation Types + +The guard must classify each tool call into one of three operation types: + +```go +type OperationType int + +const ( + OperationRead OperationType = iota // Read-only operation + OperationWrite // Write-only operation + OperationReadWrite // Combined read-write operation +) +``` + +This classification determines which DIFC flow rules apply: +- **Read**: Secrecy constraints only (agent must have required secrecy clearance) +- **Write**: Integrity constraints only (agent must have required integrity endorsement) +- **ReadWrite**: Both constraints apply + +#### 11.7.4 Labeled Resource Structure + +The `LabeledResource` type represents a GitHub resource with its computed labels: + +```go +type LabeledResource struct { + Description string // Human-readable description (e.g., "repo:owner/name") + Secrecy SecrecyLabel // Secrecy requirements for this resource + Integrity IntegrityLabel // Integrity requirements for this resource + Structure *ResourceStructure // Optional: fine-grained field labels +} +``` + +For simple resources, `Structure` is `nil` and the labels apply uniformly. For complex responses (e.g., collections), `Structure` enables per-field or per-item labeling. + +#### 11.7.5 Labeled Data Types for Response Filtering + +The `LabelResponse` method returns one of several `LabeledData` implementations: + +**SimpleLabeledData** — Uniform labels for entire response: +```go +type SimpleLabeledData struct { + Data interface{} // The response data (unwrapped JSON) + Labels *LabeledResource // Labels for the entire response +} +``` + +**CollectionLabeledData** — Per-item labels for collections: +```go +type CollectionLabeledData struct { + Items []LabeledItem // Each item with its own labels +} + +type LabeledItem struct { + Data interface{} // Individual item data (unwrapped JSON) + Labels *LabeledResource // Labels specific to this item +} +``` + +**FilteredCollectionLabeledData** — Collection with filtered items: +```go +type FilteredCollectionLabeledData struct { + Accessible []LabeledItem // Items the agent can access + Filtered []LabeledItem // Items filtered due to DIFC policy + TotalCount int // Original collection size + FilterReason string // Why items were filtered +} +``` + +##### 11.7.5.1 MCP Response Format Contract + +**Critical:** The gateway handles MCP protocol wrapping/unwrapping transparently so guards work with clean JSON data. + +**MCP Content Format:** +Backend MCP servers return responses wrapped in the MCP content format: +```json +{ + "content": [ + { + "type": "text", + "text": "{\"items\": [{\"id\": 1, ...}, {\"id\": 2, ...}]}" + } + ] +} +``` + +The `text` field contains a JSON-encoded string with the actual response data. + +**Gateway Responsibility (Unwrap Before Guard):** + +Before calling `guard.LabelResponse()`, the gateway: +1. Detects if the response is MCP-wrapped (has `content[0].text` structure) +2. Extracts and parses the JSON from the `text` field +3. Passes the **unwrapped** data to the guard + +```go +// Gateway unwraps before calling guard +unwrappedData, wasMCPWrapped := unwrapMCPResponse(backendResult) +labeledData := guard.LabelResponse(ctx, toolName, unwrappedData, ...) +``` + +**Guard Responsibility (Work with Clean JSON):** + +The guard receives **unwrapped** JSON data and returns labeled items: +```json +// Guard receives unwrapped data: +{ + "tool_name": "list_issues", + "tool_result": [ + {"id": 1, "title": "Bug report", "private": false}, + {"id": 2, "title": "Security issue", "private": true} + ] +} + +// Guard returns labels for each item: +{ + "items": [ + {"index": 0, "secrecy": [], "integrity": ["contributor:owner/repo"]}, + {"index": 1, "secrecy": ["private:owner/repo"], "integrity": ["project:owner/repo"]} + ] +} +``` + +**Gateway Responsibility (Rewrap After Filtering):** + +After DIFC filtering, the gateway rewraps the result: +1. Filters items based on agent clearance and item labels +2. Reconstructs the MCP content format for the filtered result +3. Returns the MCP-wrapped response to the client + +```go +// After filtering, ToResult() rewraps as MCP if originally wrapped +result, err := labeledData.ToResult() +// Returns: {"content":[{"type":"text","text":"[{...filtered items...}]"}]} +``` + +**Contract Summary:** + +| Phase | Data Format | Responsibility | +|-------|-------------|----------------| +| Backend → Gateway | MCP-wrapped | Backend produces MCP format | +| Gateway → Guard | **Unwrapped JSON** | Gateway unwraps before guard call | +| Guard → Gateway | Labels only | Guard labels unwrapped items | +| Gateway → Client | MCP-wrapped | Gateway rewraps after filtering | + +**Rationale:** +- Guards should not need to understand MCP protocol details +- Clean JSON makes guard implementation simpler and more testable +- Gateway handles all protocol-level concerns transparently +- Preserves MCP compatibility with clients expecting wrapped responses + +#### 11.7.6 Backend Caller Interface + +Guards may need to make auxiliary read-only calls to the backend to gather metadata for label derivation (e.g., fetching repository visibility, checking user roles): + +```go +type BackendCaller interface { + // CallTool makes a read-only call to the backend MCP server + CallTool(ctx context.Context, toolName string, args interface{}) (interface{}, error) +} +``` + +For example, to label an issue, the guard might call `issue_read` to fetch the issue author, then determine if the author is a project. + +#### 11.7.7 DIFC Enforcement Flow + +The gateway's reference monitor uses guards in this seven-phase flow: + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ DIFC Enforcement Flow │ +├─────────────────────────────────────────────────────────────────────┤ +│ Phase 1: guard.LabelResource() │ +│ → Labels target resource, classifies operation type │ +│ │ +│ Phase 2: Reference Monitor coarse-grained check │ +│ → Compares agent labels vs resource labels │ +│ → DENY if flow rules violated │ +│ │ +│ Phase 3: Execute backend call (if Phase 2 allowed) │ +│ → Forward request to GitHub MCP server │ +│ → Backend returns MCP-wrapped response │ +│ │ +│ Phase 4: Gateway unwraps MCP response │ +│ → Extract JSON from {"content":[{"type":"text","text":…}]}│ +│ → Store wrapper for later rewrapping │ +│ │ +│ Phase 5: guard.LabelResponse(unwrapped_data) │ +│ → Guard receives clean JSON, not MCP format │ +│ → Labels response data for fine-grained filtering │ +│ │ +│ Phase 6: Reference Monitor fine-grained filtering │ +│ → Filter collection items based on per-item labels │ +│ → Remove items agent cannot access │ +│ → Rewrap filtered result in MCP format │ +│ │ +│ Phase 7: Return result (labels unchanged) │ +│ → Agent labels remain fixed at initial session values │ +│ → Future: explicit primitives for label changes │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +> **Note:** Automatic label accumulation is disabled in the current implementation. +> Agent secrecy and integrity labels remain fixed at their initial session values. +> Future versions will support explicit primitives for: +> - Adding secrecy tags (when agent explicitly accepts sensitive data) +> - Removing integrity tags (when agent performs untrusted operations) + +**Phase 4-6 Detail (MCP Unwrap/Rewrap):** + +``` +Backend Response (MCP-wrapped) + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ {"content":[{"type":"text","text":"[{\"id\":1,...},{\"id\":2,...}]"}]} │ +└─────────────────────────────────────────────────────────────────────┘ + │ + │ Phase 4: Gateway unwraps + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ [{"id": 1, "private": false}, {"id": 2, "private": true}] │ +└─────────────────────────────────────────────────────────────────────┘ + │ + │ Phase 5: Guard labels each item + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Items[0]: secrecy=[], integrity=[contributor:owner/repo] │ +│ Items[1]: secrecy=[private:owner/repo], integrity=[project:...] │ +└─────────────────────────────────────────────────────────────────────┘ + │ + │ Phase 6: Filter (agent lacks private:owner/repo clearance) + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Accessible: [{"id": 1, "private": false}] │ +│ Filtered: [{"id": 2}] (reason: secrecy violation) │ +└─────────────────────────────────────────────────────────────────────┘ + │ + │ Rewrap in MCP format + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ {"content":[{"type":"text","text":"[{\"id\":1,\"private\":false}]"}]}│ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +Client Response +``` + +#### 11.7.8 GitHub Guard Implementation Requirements + +A GitHub DIFC guard must: + +1. **Classify all GitHub MCP tools** by operation type using the classification in Section 11.5 + +2. **Map tool names and arguments to resources**: + - Extract `owner`, `repo`, `issue_number`, etc. from tool arguments + - Construct resource descriptions (e.g., `"issue:owner/repo#123"`) + +3. **Derive labels using derivation rules** from Section 11.5.5: + - Use `BackendCaller` to fetch metadata when needed + - Apply the label computation logic for each object type + +4. **Handle collections with per-item labels**: + - For `list_*` and `search_*` operations, return `CollectionLabeledData` + - Each item may have different labels (e.g., private vs public repos) + +5. **Support label accumulation**: + - Return accurate labels so the reference monitor can track information flow + - Enables detection of cross-repository information leakage + +6. **Work with unwrapped JSON data** (see Section 11.7.5.1): + - The gateway unwraps MCP responses before calling `LabelResponse` + - Guards receive and label clean JSON data, not MCP-wrapped format + - Guards should NOT attempt to parse or produce MCP content wrappers + +--- + +### 11.8 Remote Guard Architecture + +To support guards maintained in separate repositories, the gateway supports a **remote guard protocol**. This enables: + +- Guards implemented in any language (not just Go) +- Independent versioning and deployment of guards +- Third-party guard development without modifying the gateway +- Isolation between the gateway and guard logic + +#### 11.8.1 Architectural Options + +| Approach | Pros | Cons | +|----------|------|------| +| **Go Plugin** (`plugin` package) | Native performance | Same Go version required, Linux/macOS only, fragile | +| **gRPC Remote Guard** | Language-agnostic, well-defined protocol | Added latency, requires gRPC infrastructure | +| **HTTP Remote Guard** | Simple, language-agnostic | Added latency, less efficient | +| **MCP-based Guard** | Consistent with gateway architecture, reuses existing infrastructure | Added latency, requires guard to be MCP server | +| **WebAssembly (Wasm)** | Near-native performance, sandboxed, portable | Limited host interop, memory constraints, ecosystem still maturing | +| **Git Submodule** | Simple, compile-time | Requires gateway rebuild | + +**Recommended approach**: MCP-based remote guards for third-party development, or Wasm modules for performance-critical scenarios. + +##### Wasm vs MCP-Based Guards: Detailed Comparison + +| Dimension | MCP-Based Guard | Wasm Module | +|-----------|-----------------|-------------| +| **Performance** | Process isolation + IPC overhead (~1-10ms per call) | In-process, near-native (~μs per call) | +| **Isolation** | OS process boundary | Wasm sandbox (memory isolation) | +| **Language Support** | Any language with MCP SDK | Rust, C/C++, Go (TinyGo), AssemblyScript | +| **Backend Calls** | Native (guard is MCP client or uses callback) | Requires host functions (complex) | +| **Memory** | Separate process memory | Shared linear memory (limited to 4GB) | +| **Debugging** | Standard tools, logs, debuggers | Wasm-specific tooling required | +| **Distribution** | Container images, binaries | `.wasm` files (~100KB-10MB) | +| **Hot Reload** | Restart process | Load new module instantly | +| **Maturity** | MCP is established | Wasm component model still evolving | + +##### Wasm Guard Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Gateway Process │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────────────────────────────────┐ │ +│ │ Gateway │ │ Wasm Runtime (wasmtime) │ │ +│ │ Core │◄──►│ ┌─────────────────────────────────┐ │ │ +│ └─────────────┘ │ │ github-guard.wasm │ │ │ +│ │ │ │ ┌───────────┐ ┌─────────────┐ │ │ │ +│ │ │ │ │ label_ │ │ label_ │ │ │ │ +│ │ │ │ │ resource()│ │ response() │ │ │ │ +│ │ │ │ └───────────┘ └─────────────┘ │ │ │ +│ │ │ └─────────────────────────────────┘ │ │ +│ ▼ │ │ │ │ +│ ┌─────────────┐ │ ▼ (host function call) │ │ +│ │ Backend │◄───│─────────┤ │ │ +│ │ (GitHub) │ │ │ fetch_metadata() │ │ +│ └─────────────┘ └─────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +##### Wasm Host Functions for Backend Access + +The key challenge with Wasm is that modules cannot make network calls directly. The gateway must expose **host functions** that the Wasm module can call: + +```rust +// Guard Wasm module (Rust) +#[link(wasm_import_module = "gateway")] +extern "C" { + fn fetch_metadata(tool_ptr: *const u8, tool_len: u32, + args_ptr: *const u8, args_len: u32, + result_ptr: *mut u8, result_cap: u32) -> i32; +} + +#[no_mangle] +pub extern "C" fn label_resource(tool_ptr: *const u8, tool_len: u32, + args_ptr: *const u8, args_len: u32, + out_ptr: *mut u8, out_cap: u32) -> i32 { + // Parse inputs + let tool_name = unsafe { std::str::from_utf8_unchecked(...) }; + let args: Value = serde_json::from_slice(...); + + // Call host function to fetch metadata + let mut metadata_buf = [0u8; 4096]; + let metadata_len = unsafe { + fetch_metadata( + b"get_me\0".as_ptr(), 6, + b"{}\0".as_ptr(), 2, + metadata_buf.as_mut_ptr(), 4096 + ) + }; + + // Compute labels based on metadata + let labels = compute_labels(tool_name, &args, &metadata); + + // Write result to output buffer + let result_json = serde_json::to_vec(&labels).unwrap(); + // ... copy to out_ptr +} +``` + +```go +// Gateway host function implementation (Go with wasmtime) +func (r *WasmRuntime) registerHostFunctions(instance *wasmtime.Instance) { + // Register fetch_metadata host function + fetchMetadata := wasmtime.WrapFunc(r.store, func( + toolPtr, toolLen, argsPtr, argsLen int32, + resultPtr, resultCap int32, + ) int32 { + // Read tool name and args from Wasm memory + toolName := r.readString(instance, toolPtr, toolLen) + argsJSON := r.readBytes(instance, argsPtr, argsLen) + + // Call backend through gateway's existing connection + result, err := r.backend.CallTool(r.ctx, toolName, argsJSON) + if err != nil { + return -1 + } + + // Write result back to Wasm memory + return r.writeBytes(instance, resultPtr, resultCap, result) + }) + + instance.GetExport("fetch_metadata").Func().Set(fetchMetadata) +} +``` + +##### Wasm Component Model (Future) + +The **Wasm Component Model** (WASI Preview 2) will simplify this with proper interface types: + +```wit +// guard.wit - WebAssembly Interface Types definition +interface guard { + record labeled-resource { + description: string, + secrecy: list, + integrity: list, + } + + enum operation-type { + read, + write, + read-write, + } + + // Host-provided function for backend calls + fetch-metadata: func(tool: string, args: string) -> result + + // Guard-exported functions + label-resource: func(tool: string, args: string) -> result, string> + label-response: func(tool: string, result: string) -> result +} +``` + +##### When to Use Each Approach + +| Scenario | Recommended | Rationale | +|----------|-------------|-----------| +| Third-party guard development | MCP-based | Easier to develop, any language, standard tooling | +| High-throughput gateways (>1000 req/s) | Wasm | Eliminates IPC overhead | +| Complex label derivation with many backend calls | MCP-based | Simpler async I/O handling | +| Security-critical deployments | Wasm | Sandboxed execution, no process escape | +| Rapid iteration / debugging | MCP-based | Standard debugging tools | +| Edge/embedded deployment | Wasm | Single binary, smaller footprint | + +##### Hybrid Approach + +A gateway could support both types simultaneously: + +```toml +# MCP-based guard (development, third-party) +[guards.github-dev] +type = "mcp" +command = "docker" +args = ["run", "--rm", "-i", "ghcr.io/myorg/github-guard:dev"] + +# Wasm guard (production, performance) +[guards.github-prod] +type = "wasm" +module = "/opt/guards/github-guard.wasm" +``` + +This allows: +- Developing guards with MCP for simplicity and rapid iteration +- Compiling to Wasm for production performance +- Gradual migration as Wasm tooling matures + +#### 11.8.2 MCP-Based Remote Guard Protocol + +A remote guard is itself an MCP server that exposes two tools corresponding to the Guard interface methods: + +**Tool: `guard/label_resource`** + +Labels a resource before the operation executes. + +```json +{ + "name": "guard/label_resource", + "arguments": { + "tool_name": "issue_read", + "tool_args": { "owner": "github", "repo": "github-mcp-server", "issue_number": 42 }, + "backend_id": "github", + "agent_id": "demo-agent" + } +} +``` + +**Response:** + +```json +{ + "resource": { + "description": "issue:github/github-mcp-server#42", + "secrecy": ["repo:github/github-mcp-server"], + "integrity": ["contributor:github/github-mcp-server"] + }, + "operation": "read", + "metadata_requests": [] +} +``` + +**Tool: `guard/label_response`** + +Labels response data for fine-grained filtering. + +**Important:** The `result` field contains **unwrapped JSON data**, not MCP-wrapped format. The gateway extracts the actual response data from the MCP content wrapper before calling this tool (see Section 11.7.5.1). + +```json +{ + "name": "guard/label_response", + "arguments": { + "tool_name": "list_issues", + "tool_args": { "owner": "github", "repo": "github-mcp-server" }, + "result": [ { "number": 1, "title": "..." }, { "number": 2, "title": "..." } ], + "backend_id": "github" + } +} +``` + +Note: The `result` above is the unwrapped array, not the MCP format: +```json +// Backend returns MCP-wrapped: +{"content":[{"type":"text","text":"[{\"number\":1,...},{\"number\":2,...}]"}]} + +// Gateway unwraps before calling guard/label_response: +[{"number": 1, "title": "..."}, {"number": 2, "title": "..."}] +``` + +**Response:** + +```json +{ + "type": "collection", + "items": [ + { "index": 0, "secrecy": [], "integrity": ["contributor:github/github-mcp-server"] }, + { "index": 1, "secrecy": ["repo:github/github-mcp-server"], "integrity": ["contributor:github/github-mcp-server", "project:github/github-mcp-server"] } + ] +} +``` + +**Tool: `guard/fetch_metadata`** (optional) + +Allows the gateway to fetch metadata on behalf of the guard when the guard cannot directly call the backend. + +```json +{ + "name": "guard/fetch_metadata", + "arguments": { + "backend_id": "github", + "tool_name": "get_me", + "tool_args": {} + } +} +``` + +#### 11.8.3 Gateway Configuration for Remote Guards + +Remote guards are configured in the gateway configuration file: + +**TOML Configuration:** + +```toml +[guards.github] +type = "remote" +command = "docker" +args = ["run", "--rm", "-i", "ghcr.io/myorg/github-difc-guard:latest"] + +# Or connect to an already-running guard server +[guards.github] +type = "remote" +url = "http://localhost:8081/mcp" +``` + +**JSON Configuration:** + +```json +{ + "guards": { + "github": { + "type": "remote", + "container": "ghcr.io/myorg/github-difc-guard:latest" + } + } +} +``` + +#### 11.8.4 Guard-Backend Binding + +Guards are bound to backends by server ID. The gateway routes guard calls based on the backend being accessed: + +```toml +[servers.github] +command = "docker" +args = ["run", "--rm", "-i", "ghcr.io/github/github-mcp-server"] +guard = "github" # References [guards.github] +``` + +If no guard is specified, the gateway uses the built-in `noop` guard (allows all operations). + +#### 11.8.5 Metadata Fetch Protocol + +When a guard needs to call the backend to gather labeling information (e.g., checking if a user is a project, determining repository visibility), several approaches are available: + +##### Option A: Direct Backend Access + +The guard has its own connection to the backend and makes calls directly. + +**Architecture:** +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Gateway │ ───► │ Guard │ ───► │ Backend │ +│ (client) │ │ (server) │ │ (GitHub MCP)│ +└─────────────┘ └─────────────┘ └─────────────┘ + │ ▲ + └─────────────────────┘ + Guard connects directly +``` + +**Pros:** +- Guard has full control over backend calls +- No round-trip latency to gateway +- Guard can cache backend connections + +**Cons:** +- Guard needs its own credentials (e.g., `GITHUB_TOKEN`) +- Guard must manage MCP client lifecycle +- Duplicates gateway's backend connection logic + +**Implementation:** The guard embeds an MCP client and launches/connects to the backend: + +```go +// In guard's initialization +client, err := mcp.NewStdioClient("docker", []string{ + "run", "--rm", "-i", + "-e", "GITHUB_PERSONAL_ACCESS_TOKEN", + "ghcr.io/github/github-mcp-server", +}) + +// In label_resource handler +result, err := client.CallTool(ctx, "get_me", map[string]interface{}{}) +``` + +##### Option B: Gateway-Proxied Metadata Requests + +The guard requests metadata from the gateway, which fetches it from the backend and re-invokes the guard. This uses a two-phase call pattern. + +**Architecture:** +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Gateway │ ───► │ Guard │ │ Backend │ +│ (client) │ │ (server) │ │ (GitHub MCP)│ +└─────────────┘ └─────────────┘ └─────────────┘ + │ │ ▲ + │ 1. label_resource(...) │ + │◄──────────────────┘ │ + │ return: need_metadata │ + │ │ + │ 2. gateway fetches metadata ──────────┘ + │ │ + │ 3. label_resource(..., metadata) │ + │──────────────────►│ │ + │ return: labels │ │ + │◄──────────────────┘ │ +``` + +**Pros:** +- Guard doesn't need backend credentials +- Gateway controls all backend access (single point of policy) +- Guard remains stateless + +**Cons:** +- Additional round-trip for metadata fetching +- More complex protocol + +**Protocol:** + +Phase 1 — Guard signals it needs metadata: + +```json +// Request to guard +{ + "name": "guard/label_resource", + "arguments": { + "tool_name": "issue_read", + "tool_args": { "owner": "github", "repo": "github-mcp-server", "issue_number": 42 } + } +} + +// Response from guard (needs metadata) +{ + "status": "need_metadata", + "requests": [ + { "id": "user", "tool": "get_me", "args": {} }, + { "id": "repo", "tool": "search_repositories", "args": { "query": "repo:github/github-mcp-server" } } + ] +} +``` + +Phase 2 — Gateway fetches and re-invokes: + +```json +// Request to guard (with metadata) +{ + "name": "guard/label_resource", + "arguments": { + "tool_name": "issue_read", + "tool_args": { "owner": "github", "repo": "github-mcp-server", "issue_number": 42 }, + "metadata": { + "user": { "login": "octocat", "type": "User" }, + "repo": { "items": [{ "private": false, "permissions": { "admin": true } }] } + } + } +} + +// Response from guard (with labels) +{ + "status": "complete", + "resource": { + "description": "issue:github/github-mcp-server#42", + "secrecy": [], + "integrity": ["contributor:github/github-mcp-server", "project:github/github-mcp-server"] + }, + "operation": "read" +} +``` + +##### Option C: MCP Sampling-Style Callback + +MCP defines a `sampling` capability where servers can request LLM completions from clients. A similar pattern could allow guards to request backend calls from the gateway. + +**Architecture:** +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Gateway │ ◄──► │ Guard │ │ Backend │ +│ (client) │ │ (server) │ │ (GitHub MCP)│ +└─────────────┘ └─────────────┘ └─────────────┘ + │ │ ▲ + │ 1. label_resource(...) │ + │──────────────────►│ │ + │ │ │ + │ 2. guard sends request to gateway │ + │◄──────────────────┤ │ + │ { "method": "backend/call", ... } │ + │ │ + │ 3. gateway calls backend ─────────────┘ + │ │ + │ 4. gateway returns result to guard │ + │──────────────────►│ │ + │ │ + │ 5. guard returns labels │ + │◄──────────────────┘ │ +``` + +**Pros:** +- Single round-trip from gateway's perspective +- Guard can make multiple backend calls within one request +- Cleaner than two-phase protocol + +**Cons:** +- Requires extending MCP with custom request type +- More complex bidirectional communication + +**Protocol:** + +The gateway advertises a `backend/call` capability when connecting to the guard: + +```json +{ + "capabilities": { + "experimental": { + "backendCall": { "backends": ["github"] } + } + } +} +``` + +During `label_resource` processing, the guard sends a request to the gateway: + +```json +// Guard sends to gateway (server → client request) +{ + "jsonrpc": "2.0", + "id": "meta-1", + "method": "backend/call", + "params": { + "backend": "github", + "tool": "get_me", + "args": {} + } +} + +// Gateway responds +{ + "jsonrpc": "2.0", + "id": "meta-1", + "result": { "login": "octocat", "type": "User" } +} +``` + +##### Option D: Direct GitHub API Access + +The guard bypasses MCP entirely and calls the GitHub REST or GraphQL API directly. + +**Pros:** +- Full API access without MCP limitations +- Simpler if guard only needs specific endpoints + +**Cons:** +- Breaks MCP abstraction +- Duplicates API client logic +- May have different rate limits / auth + +**Recommendation:** + +| Scenario | Recommended Approach | +|----------|---------------------| +| Guard maintained alongside gateway | Option A (direct access) | +| Third-party guard, simple needs | Option B (gateway-proxied) | +| Third-party guard, complex needs | Option C (callback pattern) | +| Guard needs non-MCP data | Option D (direct API) | + +For a **GitHub guard in a separate repository**, **Option B (gateway-proxied)** is recommended because: +- Guard doesn't need its own GitHub credentials +- Gateway maintains control over all backend access +- Simpler guard implementation +- Metadata requests are auditable by the gateway + +#### 11.8.6 Credential and Trust Model + +This section clarifies the credential requirements for each component in the remote guard architecture. + +##### Credential Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Credential Flow │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ +│ │ Agent │────────►│ Gateway │────────►│ Backend (GitHub MCP) │ │ +│ └─────────┘ └─────────────┘ └─────────────────────────┘ │ +│ │ │ ▲ │ +│ │ Agent ID │ GITHUB_TOKEN │ │ +│ │ (session-based) │ (for backend calls) │ │ +│ │ │ │ │ +│ │ ┌─────▼─────┐ │ │ +│ │ │ Guard │ │ │ +│ │ │ (no creds)│─────────────────────┘ │ +│ │ └───────────┘ metadata via gateway │ +│ │ │ +└───────┼─────────────────────────────────────────────────────────────────────┘ + │ + │ Trust boundary: Agent has empty integrity (∅) + │ Guard is trusted (but credential-less) + │ Gateway holds all backend credentials +``` + +##### Component Credential Requirements + +| Component | Credentials Required | Trust Level | Notes | +|-----------|---------------------|-------------|-------| +| **Agent** | None (identified by session) | Empty integrity (∅) | DIFC labels enforce restrictions | +| **Gateway** | Backend credentials (e.g., `GITHUB_TOKEN`) | Trusted | Single credential holder | +| **Guard (MCP-based)** | None | Trusted | Relies on gateway for backend access | +| **Guard (Wasm)** | None | Sandboxed | Host functions provide backend access | +| **Backend** | Receives gateway's credentials | N/A | Standard MCP server | + +##### Why Guards Don't Need Backend Credentials + +With the gateway-proxied approach (Option B), guards operate as **pure labeling functions**: + +1. **Input**: Tool name, arguments, and (optionally) metadata from previous backend calls +2. **Processing**: Apply label derivation rules +3. **Output**: Labels and operation classification + +The guard never directly communicates with the backend. All backend interactions flow through the gateway: + +``` +Agent Request + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Gateway │ +│ 1. Receive tool call from agent │ +│ 2. Call guard.label_resource(tool, args) │ +│ └── Guard returns: "need_metadata" + requests │ +│ 3. Gateway calls backend with GITHUB_TOKEN │ +│ └── Backend returns: metadata │ +│ 4. Call guard.label_resource(tool, args, metadata) │ +│ └── Guard returns: labels, operation type │ +│ 5. Evaluate DIFC policy │ +│ 6. If allowed, call backend with GITHUB_TOKEN │ +│ 7. Call guard.label_response(tool, result) │ +│ 8. Filter response based on labels │ +│ 9. Return filtered result to agent │ +└─────────────────────────────────────────────────────────────┘ +``` + +##### Guard Configuration (No Credentials) + +A third-party GitHub guard is configured **without** any GitHub credentials: + +```toml +# Gateway configuration +[servers.github] +command = "docker" +args = ["run", "--rm", "-i", "-e", "GITHUB_PERSONAL_ACCESS_TOKEN", "ghcr.io/github/github-mcp-server"] +env = { "GITHUB_PERSONAL_ACCESS_TOKEN" = "${GITHUB_TOKEN}" } +guard = "github" + +[guards.github] +type = "mcp" +command = "docker" +args = ["run", "--rm", "-i", "ghcr.io/myorg/github-difc-guard:latest"] +# Note: NO credentials passed to guard +# Guard relies entirely on gateway-proxied metadata +``` + +##### Security Benefits + +**Principle of Least Privilege:** +- Guard only receives the minimum data needed for labeling +- Guard cannot exfiltrate credentials (it has none) +- Guard cannot make unauthorized backend calls + +**Auditability:** +- All backend calls flow through the gateway +- Gateway can log every metadata request from the guard +- No hidden communication channels between guard and backend + +**Isolation:** +- Compromised guard cannot access backend directly +- Guard runs in separate process/container +- Gateway controls what metadata the guard sees + +##### When Guards Need Their Own Credentials + +In some scenarios, guards may require their own credentials: + +| Scenario | Why | Credential Type | +|----------|-----|-----------------| +| Option A (direct access) | Guard connects directly to backend | Backend credentials | +| Option D (direct API) | Guard calls REST/GraphQL API | API tokens | +| Guard-specific services | Guard needs external data sources | Service-specific credentials | + +Example with direct access: + +```toml +[guards.github] +type = "mcp" +command = "docker" +args = ["run", "--rm", "-i", "-e", "GITHUB_TOKEN", "ghcr.io/myorg/github-difc-guard:latest"] +env = { "GITHUB_TOKEN" = "${GUARD_GITHUB_TOKEN}" } # Separate token for guard +``` + +**Recommendation**: Use gateway-proxied access (Option B) for third-party guards to avoid credential proliferation. + +##### Metadata Scoping + +When the guard requests metadata, the gateway should scope the requests appropriately: + +```json +// Guard requests +{ + "requests": [ + { "id": "user", "tool": "get_me", "args": {} }, + { "id": "repo", "tool": "search_repositories", "args": { "query": "repo:owner/name" } } + ] +} +``` + +The gateway may: +1. **Validate requests**: Ensure guard only requests read operations +2. **Cache responses**: Avoid redundant backend calls for the same metadata +3. **Redact sensitive fields**: Remove tokens, secrets, or PII from metadata before passing to guard +4. **Rate limit**: Prevent guards from overwhelming the backend with metadata requests + +```go +// Gateway metadata validation +func (g *Gateway) validateMetadataRequest(req MetadataRequest) error { + // Only allow read operations for metadata + if !isReadOnlyTool(req.Tool) { + return fmt.Errorf("guard cannot request write operation: %s", req.Tool) + } + + // Limit number of metadata requests per label_resource call + if g.metadataRequestCount > MaxMetadataRequests { + return fmt.Errorf("too many metadata requests") + } + + return nil +} +``` + +##### Distinguishing Agent Calls from Guard Metadata Calls + +A critical implementation detail: the gateway must distinguish between two types of backend calls: + +| Call Type | Origin | Subject to DIFC | Purpose | +|-----------|--------|-----------------|---------| +| **Agent call** | Agent request | Yes | Perform actual operation | +| **Metadata call** | Guard request | No | Gather data for labeling | + +**Why metadata calls bypass DIFC:** + +Metadata calls exist to *compute* labels—they cannot themselves be subject to label checks because no labels exist yet. This creates a necessary exception to DIFC enforcement: + +``` +Agent Request: issue_read(owner, repo, issue_number) + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Gateway │ +│ │ +│ 1. guard.label_resource("issue_read", args) │ +│ └── Guard: "I need metadata to label this" │ +│ │ +│ 2. Metadata call: get_me() ◄── BYPASSES DIFC (privileged) │ +│ └── Returns: { login: "octocat", ... } │ +│ │ +│ 3. guard.label_resource("issue_read", args, metadata) │ +│ └── Guard: { secrecy: [...], integrity: [...] } │ +│ │ +│ 4. DIFC check: Can agent read this resource? │ +│ └── If denied: return error │ +│ │ +│ 5. Agent call: issue_read() ◄── SUBJECT TO DIFC │ +│ └── Returns: { issue data } │ +│ │ +│ 6. guard.label_response(result) + filtering │ +│ └── Returns: filtered result to agent │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +**Implementation: Call Context Tagging** + +The gateway must tag each backend call with its context: + +```go +type CallContext int + +const ( + CallContextAgent CallContext = iota // Agent-initiated, DIFC enforced + CallContextMetadata // Guard-initiated, DIFC bypassed +) + +// In gateway's backend call handler +func (g *Gateway) callBackend(ctx context.Context, tool string, args interface{}) (interface{}, error) { + callCtx := GetCallContext(ctx) + + switch callCtx { + case CallContextAgent: + // Full DIFC enforcement + resource, op, err := g.guard.LabelResource(ctx, tool, args, ...) + if err != nil { + return nil, err + } + + result := g.evaluator.Evaluate(agentLabels, resource, op) + if !result.IsAllowed() { + return nil, fmt.Errorf("DIFC violation: %s", result.Reason) + } + + // Execute call + return g.backend.CallTool(ctx, tool, args) + + case CallContextMetadata: + // Privileged call - no DIFC checks + // But validate it's read-only + if !isReadOnlyTool(tool) { + return nil, fmt.Errorf("metadata calls must be read-only") + } + + return g.backend.CallTool(ctx, tool, args) + } +} +``` + +**Security Constraints on Metadata Calls:** + +Although metadata calls bypass DIFC, they are still constrained: + +| Constraint | Rationale | +|------------|-----------| +| **Read-only** | Guards cannot modify backend state | +| **Rate-limited** | Prevent denial of service | +| **Logged** | Audit trail for privileged calls | +| **Scoped to request** | Cannot cache across unrelated requests | +| **No credential exposure** | Responses sanitized before reaching guard | + +```go +// Metadata call security wrapper +func (g *Gateway) executeMetadataCall(ctx context.Context, req MetadataRequest) (interface{}, error) { + // Constraint 1: Read-only + if !isReadOnlyTool(req.Tool) { + log.Warn("[DIFC] Guard requested write operation: %s", req.Tool) + return nil, ErrMetadataWriteNotAllowed + } + + // Constraint 2: Rate limit + if !g.metadataRateLimiter.Allow() { + return nil, ErrMetadataRateLimitExceeded + } + + // Constraint 3: Log privileged call + log.Info("[DIFC] Privileged metadata call: tool=%s, guard=%s", req.Tool, g.currentGuard) + + // Execute call (bypasses DIFC) + result, err := g.backend.CallTool( + WithCallContext(ctx, CallContextMetadata), + req.Tool, + req.Args, + ) + if err != nil { + return nil, err + } + + // Constraint 5: Sanitize response + sanitized := g.sanitizeMetadataResponse(result) + + return sanitized, nil +} + +// Sanitize sensitive fields from metadata +func (g *Gateway) sanitizeMetadataResponse(result interface{}) interface{} { + // Remove fields that could leak credentials or secrets + // This is defense-in-depth; guard shouldn't need these anyway + return redactFields(result, []string{ + "token", + "secret", + "password", + "private_key", + "access_token", + }) +} +``` + +**Trust Assumption:** + +This design assumes the guard is **trusted but credential-less**: +- Trusted: Gateway executes metadata calls the guard requests +- Credential-less: Guard cannot independently access the backend + +If a guard is compromised, the worst it can do is: +1. Request excessive metadata (mitigated by rate limiting) +2. Return incorrect labels (leads to policy violations, not data exfiltration) +3. Observe metadata responses (mitigated by sanitization) + +A compromised guard **cannot**: +1. Make write calls to the backend +2. Access credentials directly +3. Bypass the gateway's final DIFC enforcement + +#### 11.8.7 Remote Guard Lifecycle + +1. **Startup**: Gateway launches remote guard process (or connects to URL) +2. **Initialization**: Gateway calls `initialize` on guard MCP server +3. **Tool discovery**: Gateway verifies guard exposes required tools +4. **Request handling**: Gateway invokes guard tools for each backend request +5. **Shutdown**: Gateway terminates guard process on exit + +#### 11.7.7 Example: Third-Party GitHub Guard Repository + +A separate repository for a GitHub DIFC guard might have this structure: + +``` +github-difc-guard/ +├── README.md +├── Dockerfile +├── go.mod +├── cmd/ +│ └── guard/ +│ └── main.go # MCP server entrypoint +├── internal/ +│ ├── labeler/ +│ │ ├── repository.go # Repository label derivation +│ │ ├── issue.go # Issue label derivation +│ │ ├── pullrequest.go # PR label derivation +│ │ └── ... +│ └── tools/ +│ ├── label_resource.go +│ └── label_response.go +└── pkg/ + └── github/ + └── client.go # GitHub API client for metadata +``` + +The guard would be published as a container image and configured in the gateway: + +```toml +[guards.github] +type = "remote" +container = "ghcr.io/myorg/github-difc-guard:v1.2.0" +env = { "GITHUB_TOKEN" = "${GITHUB_TOKEN}" } +``` + +--- + + +## 12. Summary + +This proposal defines a DIFC labeling scheme for GitHub data in which labels are *derived, monotonic, and auditably reconstructible*. Integrity reflects current endorsement, secrecy constrains release, AI agents attenuate trust, and an external mediator enforces DIFC policies over GitHub interactions. The result is a practical and principled foundation for secure automation over collaborative repositories. diff --git a/examples/guards/EXTERNAL_GUARD_QUICKSTART.md b/examples/guards/EXTERNAL_GUARD_QUICKSTART.md new file mode 100644 index 00000000..9ac0ac38 --- /dev/null +++ b/examples/guards/EXTERNAL_GUARD_QUICKSTART.md @@ -0,0 +1,447 @@ +# External WASM Guard Quick Start Guide + +This guide explains how to create, build, and host WASM guards in a separate repository from the MCP Gateway. + +## Overview + +WASM guards can be developed and maintained in separate repositories, then loaded by the gateway at runtime. This allows: +- Independent versioning and development +- Team-specific guard implementations +- Secure distribution via GitHub Releases or Packages + +## GitHub Storage Options for WASM Modules + +GitHub provides several secure ways to host WASM modules: + +### 1. GitHub Releases (Recommended) +**Best for**: Versioned guard releases +- Attach `.wasm` files as release assets +- Access via stable URLs: `https://github.com/owner/repo/releases/download/v1.0.0/guard.wasm` +- Supports checksums for verification +- Public or private repositories + +### 2. GitHub Packages (Container Registry) +**Best for**: OCI-compatible workflows +- Package WASM as OCI artifacts +- Access via `ghcr.io/owner/guard:tag` +- Requires OCI tooling to extract WASM +- More complex but consistent with container workflows + +### 3. Git LFS (Large File Storage) +**Best for**: Development/testing +- Store WASM in repository with Git LFS +- Clone repository to access guards +- Less suitable for production distribution + +**Recommendation**: Use **GitHub Releases** for production guard distribution. It's simple, secure, and provides stable URLs. + +## Quick Start: Creating a Separate Guard Repository + +### Step 1: Fork or Create Guard Repository + +```bash +# Option A: Fork the sample guard +gh repo fork githubnext/gh-aw-mcpg --clone +cd gh-aw-mcpg/examples/guards/sample-guard + +# Option B: Create from scratch +mkdir my-difc-guard && cd my-difc-guard +git init +``` + +### Step 2: Set Up Guard Project + +If starting from scratch, create the minimal structure: + +```bash +# Create guard source +cat > main.go << 'EOF' +package main + +import ( + "encoding/json" + "fmt" + "unsafe" +) + +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 + +//export label_resource +func labelResource(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + // Read input + input := readBytes(inputPtr, inputLen) + var req map[string]interface{} + json.Unmarshal(input, &req) + + // Extract owner/repo for repo-scoped tags + toolArgs, _ := req["tool_args"].(map[string]interface{}) + owner, _ := toolArgs["owner"].(string) + repo, _ := toolArgs["repo"].(string) + + // Create response with empty labels (public, no endorsement) + // Per DIFC spec: empty secrecy = public, empty integrity = no endorsement + output := map[string]interface{}{ + "resource": map[string]interface{}{ + "description": fmt.Sprintf("resource:%s", req["tool_name"]), + "secrecy": []string{}, // empty = public + "integrity": []string{}, // empty = no endorsement + }, + "operation": "read", + } + + // Example: add repo-scoped contributor tag for write operations + if req["tool_name"] == "create_issue" && owner != "" && repo != "" { + output["resource"].(map[string]interface{})["integrity"] = []string{ + "contributor:" + owner + "/" + repo, + } + output["operation"] = "write" + } + + // Write output + outputJSON, _ := json.Marshal(output) + copy(readBytes(outputPtr, uint32(len(outputJSON))), outputJSON) + return int32(len(outputJSON)) +} + +//export label_response +func labelResponse(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + return 0 // No fine-grained labeling +} + +func readBytes(ptr, length uint32) []byte { + return unsafe.Slice((*byte)(unsafe.Pointer(uintptr(ptr))), length) +} + +func main() {} +EOF + +# Create Makefile +cat > Makefile << 'EOF' +.PHONY: build clean + +GO123 := $(HOME)/go/bin/go1.23.4 + +build: + @echo "Building WASM guard with TinyGo + Go 1.23.4..." + @if [ -x "$(GO123)" ]; then \ + export GOROOT=$$($(GO123) env GOROOT) && \ + tinygo build -o guard.wasm -target=wasi main.go && \ + echo "✓ Built guard.wasm"; \ + else \ + echo "Error: Go 1.23.4 required."; \ + echo "Install: go install golang.org/dl/go1.23.4@latest && ~/go/bin/go1.23.4 download"; \ + exit 1; \ + fi + +clean: + rm -f guard.wasm +EOF + +# Create README +cat > README.md << 'EOF' +# My DIFC Guard + +Custom DIFC guard for MCP Gateway. + +## Build + +Requires: +- Go 1.23.4: `go install golang.org/dl/go1.23.4@latest && ~/go/bin/go1.23.4 download` +- TinyGo 0.34+: https://tinygo.org/getting-started/install/ + +Build: `make build` +EOF +``` + +### Step 3: Build Guard + +```bash +# Install Go 1.23.4 (if not already installed) +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download + +# Verify Go 1.23.4 is installed +~/go/bin/go1.23.4 version # Should show go1.23.4 + +# Install TinyGo (if not already installed) +# macOS: brew tap tinygo-org/tools && brew install tinygo +# Linux: See https://tinygo.org/getting-started/install/ + +# Build the guard +make build +# Creates: guard.wasm +``` + +### Step 4: Verify Guard + +```bash +# Check the WASM file +file guard.wasm +# Should show: guard.wasm: WebAssembly (wasm) binary module version 0x1 (MVP) + +# Check size (should be reasonable, typically < 5MB) +ls -lh guard.wasm +``` + +### Step 5: Create GitHub Repository and Release + +```bash +# Initialize git (if not already done) +git init +git add . +git commit -m "Initial guard implementation" + +# Create GitHub repository +gh repo create my-org/my-difc-guard --private --source=. --push + +# Create a release with the WASM file +git tag v1.0.0 +git push origin v1.0.0 +gh release create v1.0.0 guard.wasm \ + --title "v1.0.0" \ + --notes "Initial release of DIFC guard" +``` + +### Step 6: Configure Gateway to Use External Guard + +Update your gateway configuration to reference the guard: + +**Option A: Local file** (for development): +```toml +[servers.github] +container = "ghcr.io/github/github-mcp-server" +guard = "myguard" + +[guards.myguard] +type = "wasm" +path = "/path/to/local/guard.wasm" +``` + +**Option B: GitHub Release URL** (for production): +```toml +[servers.github] +container = "ghcr.io/github/github-mcp-server" +guard = "myguard" + +[guards.myguard] +type = "wasm" +url = "https://github.com/my-org/my-difc-guard/releases/download/v1.0.0/guard.wasm" +sha256 = "abc123..." # Required for URL-based loading +cache_dir = "/var/cache/mcp-guards" # Optional, defaults to system temp +``` + +**JSON Configuration** (for stdin): +```json +{ + "guards": { + "myguard": { + "type": "wasm", + "url": "https://github.com/my-org/my-difc-guard/releases/download/v1.0.0/guard.wasm", + "sha256": "abc123...", + "cacheDir": "/var/cache/mcp-guards" + } + } +} +``` + +**Private Repository Access**: Set the `GITHUB_TOKEN` environment variable to download guards from private GitHub repositories. + +## Security Best Practices + +### 1. Verify WASM Integrity + +Always verify downloaded WASM modules: + +```bash +# Generate checksum when building +sha256sum guard.wasm > guard.wasm.sha256 + +# Include checksum in release notes +gh release create v1.0.0 guard.wasm guard.wasm.sha256 \ + --title "v1.0.0" \ + --notes "SHA256: $(cat guard.wasm.sha256)" + +# Verify before loading (in deployment scripts) +echo "expected_sha256 guard.wasm" | sha256sum -c - +``` + +## Host Functions Available to Guards + +WASM guards can import host functions from the `env` module to interact with the gateway: + +### call_backend + +Allows guards to make read-only calls to backend MCP servers for gathering metadata: + +```go +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 +``` + +### host_log + +Allows guards to send log messages back to the gateway host for debugging and monitoring: + +```go +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) + +// Log levels: 0=debug, 1=info, 2=warn, 3=error +``` + +**Using the guardsdk for logging** (recommended): + +```go +import sdk "github.com/githubnext/gh-aw-mcpg/examples/guards/guardsdk" + +func labelResource(req *sdk.LabelResourceRequest) (*sdk.LabelResourceResponse, error) { + // Log at different levels + sdk.LogDebug("Processing tool: " + req.ToolName) + sdk.LogInfo("Starting resource labeling") + sdk.LogWarn("Fallback to default labels") + sdk.LogError("Critical error occurred") + + // Formatted logging + sdk.Logf(sdk.LogLevelInfo, "Processing %s with %d args", req.ToolName, len(req.ToolArgs)) + + // ... rest of labeling logic +} +``` + +**Without guardsdk** (direct host function use): + +```go +import "unsafe" + +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) + +const ( + LogLevelDebug = 0 + LogLevelInfo = 1 + LogLevelWarn = 2 + LogLevelError = 3 +) + +func logInfo(msg string) { + b := []byte(msg) + hostLog(LogLevelInfo, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} +``` + +Log messages from guards appear in gateway debug output (with `DEBUG=guard:*` environment variable) and are prefixed with the guard name for easy identification. + +### 2. Use Private Repositories + +For sensitive guard logic: +```bash +# Create private repository +gh repo create my-org/my-difc-guard --private --source=. --push + +# Private releases require authentication +# Set GITHUB_TOKEN in gateway environment +export GITHUB_TOKEN="ghp_..." +``` + +### 3. Sign Releases + +Use GPG to sign releases: +```bash +# Sign the WASM file +gpg --detach-sign --armor guard.wasm + +# Include signature in release +gh release create v1.0.0 guard.wasm guard.wasm.asc \ + --title "v1.0.0 (signed)" \ + --notes "GPG signed release" +``` + +### 4. Audit Guard Code + +Before using external guards: +- Review source code +- Verify build reproducibility +- Test in isolated environment +- Monitor guard behavior + +## Development Workflow + +### Iterative Development + +```bash +# 1. Make changes to guard logic +vi main.go + +# 2. Build and test locally +make build +# Test with local gateway configuration + +# 3. Commit and create new release +git add main.go +git commit -m "Update guard logic" +git push +git tag v1.0.1 +git push origin v1.0.1 +gh release create v1.0.1 guard.wasm --title "v1.0.1" + +# 4. Update gateway configuration to new version +# Change url to: .../releases/download/v1.0.1/guard.wasm +``` + +### Testing Guards + +```bash +# Test guard locally before releasing +cd /path/to/gateway +cat > test-config.toml << EOF +[servers.testserver] +container = "test-mcp-server" +guard = "testguard" + +[guards.testguard] +type = "wasm" +path = "/path/to/your/guard.wasm" +EOF + +# Run gateway with test config +./awmg --config test-config.toml +``` + +## Example: Complete Guard Repository + +See the sample guard in the main repository: +```bash +# View the complete example +git clone https://github.com/githubnext/gh-aw-mcpg +cd gh-aw-mcpg/examples/guards/sample-guard +cat main.go # Review guard implementation +cat Makefile # Review build process +make build # Build the guard +``` + +## Troubleshooting + +### Build fails with "requires go version 1.19 through 1.23" +**Solution**: Install Go 1.23.4 specifically for guard compilation: +```bash +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download +``` + +### TinyGo not found +**Solution**: Install TinyGo from https://tinygo.org/getting-started/install/ + +### Guard doesn't export functions +**Problem**: Compiled with standard Go instead of TinyGo +**Solution**: Ensure TinyGo is in PATH and Makefile uses it + +### "failed to read WASM file" +**Solution**: Check file path in configuration is absolute or relative to gateway working directory + +## Resources + +- TinyGo documentation: https://tinygo.org/docs/ +- WASI specification: https://wasi.dev/ +- WebAssembly documentation: https://webassembly.org/ +- GitHub Releases API: https://docs.github.com/en/rest/releases diff --git a/examples/guards/echo-guard/README.md b/examples/guards/echo-guard/README.md new file mode 100644 index 00000000..db25357c --- /dev/null +++ b/examples/guards/echo-guard/README.md @@ -0,0 +1,80 @@ +# Echo Guard + +A simple debugging guard that prints all request data to stdout. + +## Purpose + +Use this guard to understand what data is passed to guards during labeling decisions. It logs: + +- **Tool Name** - The MCP tool being called +- **Tool Args** - All arguments passed to the tool (JSON formatted) +- **Capabilities** - Agent capabilities if present +- **Tool Result** - The result from the backend (in `label_response`) + +## Building + +```bash +# Install Go 1.23.4 (if not already installed) +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download + +# Build +export GOROOT=$(~/go/bin/go1.23.4 env GOROOT) +tinygo build -o guard.wasm -target=wasi main.go +``` + +## Configuration + +```toml +[servers.github] +container = "ghcr.io/github/github-mcp-server" +guard = "echo" + +[guards.echo] +type = "wasm" +path = "./examples/guards/echo-guard/guard.wasm" +``` + +## Example Output + +When a `get_issue` tool is called: + +``` +=== label_resource called === +Tool Name: get_issue +Tool Args: + { + "owner": "octocat", + "repo": "hello-world", + "issue_number": 42 + } +============================= +``` + +When the response is received: + +``` +=== label_response called === +Tool Name: get_issue +Tool Result: + { + "number": 42, + "title": "Found a bug", + "user": { + "login": "octocat" + }, + "labels": [ + {"name": "bug"} + ] + } +============================= +``` + +## Behavior + +The echo guard always returns: +- **Resource**: Public, untrusted with description `echo:` +- **Operation**: Read +- **Response labeling**: None (passes through unchanged) + +This makes it safe to use for debugging without affecting access control. diff --git a/examples/guards/echo-guard/codex.config.toml b/examples/guards/echo-guard/codex.config.toml new file mode 100644 index 00000000..705cca89 --- /dev/null +++ b/examples/guards/echo-guard/codex.config.toml @@ -0,0 +1,23 @@ +# Codex config for Echo Guard Demo +# This connects Codex to the gateway running the echo guard + +model = "gpt-5.1-codex-max" +sandbox_mode = "workspace-write" +model_reasoning_effort = "high" + +[mcp_servers.github] +url = "http://127.0.0.1:8000/mcp/github" +transport = "streamablehttp" +tool_timeout_sec = 120000 +startup_timeout_ms = 180000 +bearer_token_env_var = "AGENT_ID" + +[mcp_servers.fetch] +url = "http://127.0.0.1:8000/mcp/fetch" +transport = "streamablehttp" +tool_timeout_sec = 120000 +startup_timeout_ms = 180000 +bearer_token_env_var = "AGENT_ID" + +[projects."/workspace/"] +trust_level="trusted" diff --git a/examples/guards/echo-guard/demo-config.toml b/examples/guards/echo-guard/demo-config.toml new file mode 100644 index 00000000..33103d99 --- /dev/null +++ b/examples/guards/echo-guard/demo-config.toml @@ -0,0 +1,33 @@ +# Echo Guard Demo Configuration +# This config runs GitHub MCP server with the echo guard to see all tool calls + +[gateway] +port = 8000 +api_key = "demo-key-12345" + +# Define the echo guard +[guards.echo] +type = "wasm" +path = "./examples/guards/echo-guard/guard.wasm" + +# GitHub MCP server with echo guard attached +[servers.github] +command = "docker" +args = [ + "run", "--rm", "-i", + "-e", "GITHUB_PERSONAL_ACCESS_TOKEN", + "-e", "NO_COLOR=1", "-e", "TERM=dumb", + "ghcr.io/github/github-mcp-server:latest" +] +guard = "echo" + +# Also add fetch for testing without GitHub token +[servers.fetch] +command = "docker" +args = [ + "run", "--rm", "-i", + "-e", "NO_COLOR=1", "-e", "TERM=dumb", + "-e", "PYTHONUNBUFFERED=1", + "mcp/fetch" +] +guard = "echo" diff --git a/examples/guards/echo-guard/go.mod b/examples/guards/echo-guard/go.mod new file mode 100644 index 00000000..c90d960c --- /dev/null +++ b/examples/guards/echo-guard/go.mod @@ -0,0 +1,7 @@ +module echo-guard + +go 1.25.0 + +require github.com/githubnext/gh-aw-mcpg v0.0.0 + +replace github.com/githubnext/gh-aw-mcpg => ../../.. diff --git a/examples/guards/echo-guard/go.sum b/examples/guards/echo-guard/go.sum new file mode 100644 index 00000000..e69de29b diff --git a/examples/guards/echo-guard/main.go b/examples/guards/echo-guard/main.go new file mode 100644 index 00000000..8e8363e5 --- /dev/null +++ b/examples/guards/echo-guard/main.go @@ -0,0 +1,71 @@ +// Echo Guard - A simple guard that logs all inputs for debugging +// +// This guard prints all request data to help understand what information +// is available to guards during labeling decisions. +// +// Build with: +// +// export GOROOT=$(~/go/bin/go1.23.4 env GOROOT) +// tinygo build -o guard.wasm -target=wasi main.go +package main + +import ( + "encoding/json" + "fmt" + + sdk "github.com/github/gh-aw-mcpg/examples/guards/guardsdk" +) + +func init() { + sdk.RegisterLabelResource(labelResource) + sdk.RegisterLabelResponse(labelResponse) +} + +func labelResource(req *sdk.LabelResourceRequest) (*sdk.LabelResourceResponse, error) { + // Log to gateway host using the new logging API + sdk.LogInfo(fmt.Sprintf("label_resource called for tool: %s", req.ToolName)) + + // Print the request for debugging (goes to WASM stdout) + fmt.Println("=== label_resource called ===") + fmt.Printf("Tool Name: %s\n", req.ToolName) + fmt.Println("Tool Args:") + prettyPrint(req.ToolArgs) + if req.Capabilities != nil { + fmt.Println("Capabilities:") + prettyPrint(req.Capabilities) + } + fmt.Println("=============================") + + // Return a simple public resource label + return &sdk.LabelResourceResponse{ + Resource: sdk.NewPublicResource(fmt.Sprintf("echo:%s", req.ToolName)), + Operation: sdk.OperationRead, + }, nil +} + +func labelResponse(req *sdk.LabelResponseRequest) (*sdk.LabelResponseResponse, error) { + // Print the response for debugging + fmt.Println("=== label_response called ===") + fmt.Printf("Tool Name: %s\n", req.ToolName) + fmt.Println("Tool Result:") + prettyPrint(req.ToolResult) + if req.Capabilities != nil { + fmt.Println("Capabilities:") + prettyPrint(req.Capabilities) + } + fmt.Println("=============================") + + // No fine-grained labeling + return nil, nil +} + +func prettyPrint(v interface{}) { + data, err := json.MarshalIndent(v, " ", " ") + if err != nil { + fmt.Printf(" (error marshaling: %v)\n", err) + return + } + fmt.Printf(" %s\n", string(data)) +} + +func main() {} diff --git a/examples/guards/guardsdk/README.md b/examples/guards/guardsdk/README.md new file mode 100644 index 00000000..05fba20d --- /dev/null +++ b/examples/guards/guardsdk/README.md @@ -0,0 +1,478 @@ +# Guard SDK + +A support library for building WASM guards for MCP Gateway. + +## Overview + +The `guardsdk` package simplifies guard development by handling: + +- **Memory management** - WASM host/guest communication +- **JSON marshaling** - Request/response serialization +- **Backend calls** - Simplified interface to call backend MCP tools +- **Standard types** - Common request/response structures +- **Helper functions** - Argument extraction and label constructors + +## Installation + +The Guard SDK is currently available on the `lpcox/github-difc` branch. + +**In your guard's go.mod:** + +``` +module my-guard + +go 1.23.4 + +require github.com/githubnext/gh-aw-mcpg v0.0.0 + +replace github.com/githubnext/gh-aw-mcpg => github.com/githubnext/gh-aw-mcpg lpcox/github-difc +``` + +**Import in your code:** + +```go +import sdk "github.com/githubnext/gh-aw-mcpg/examples/guards/guardsdk" +``` + +## Quick Start + +```go +package main + +import ( + "fmt" + sdk "github.com/githubnext/gh-aw-mcpg/examples/guards/guardsdk" +) + +func init() { + sdk.RegisterLabelResource(labelResource) + sdk.RegisterLabelResponse(labelResponse) +} + +func labelResource(req *sdk.LabelResourceRequest) (*sdk.LabelResourceResponse, error) { + return &sdk.LabelResourceResponse{ + Resource: sdk.NewPublicResource(fmt.Sprintf("resource:%s", req.ToolName)), + Operation: sdk.OperationRead, + }, nil +} + +func labelResponse(req *sdk.LabelResponseRequest) (*sdk.LabelResponseResponse, error) { + return nil, nil // No fine-grained labeling +} + +func main() {} +``` + +## Building + +Requires Go 1.23.4 and TinyGo 0.34+: + +```bash +# Install Go 1.23.4 +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download + +# Build with TinyGo +export GOROOT=$(~/go/bin/go1.23.4 env GOROOT) +tinygo build -o guard.wasm -target=wasi main.go +``` + +## Host Functions + +WASM guards run in a sandboxed wazero runtime inside the gateway process. They cannot make direct network calls or access the filesystem. Instead, the gateway provides **host functions** that guards can import to interact with the outside world. + +The Guard SDK wraps these host functions for convenient use. If you're not using the SDK, you can import them directly. + +### call_backend + +Allows guards to make **read-only** calls to backend MCP servers for gathering metadata needed for labeling decisions. + +**SDK Usage (recommended):** + +```go +import sdk "github.com/githubnext/gh-aw-mcpg/examples/guards/guardsdk" + +// Generic call - returns interface{} +result, err := sdk.CallBackend("get_issue", map[string]interface{}{ + "owner": "octocat", + "repo": "hello-world", + "issue_number": 42, +}) + +// Typed call - unmarshals to specific type +type Issue struct { + Number int `json:"number"` + Title string `json:"title"` +} +issue, err := sdk.CallBackendTyped[Issue]("get_issue", args) +``` + +**Direct import (without SDK):** + +```go +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 +``` + +**Parameters:** +- `toolNamePtr`, `toolNameLen`: Pointer and length of the tool name string +- `argsPtr`, `argsLen`: Pointer and length of JSON-encoded arguments +- `resultPtr`, `resultSize`: Pointer and size of buffer for result + +**Returns:** Result length on success, or `0xFFFFFFFF` (max uint32) on error. + +**Limitations:** +- Read-only: Guards can query backend state but cannot modify it +- 1MB result buffer limit +- Calls are synchronous and block the guard execution + +### host_log + +Allows guards to send log messages back to the gateway for debugging and monitoring. + +**SDK Usage (recommended):** + +```go +import sdk "github.com/githubnext/gh-aw-mcpg/examples/guards/guardsdk" + +// Log at different levels +sdk.LogDebug("Processing tool: " + toolName) +sdk.LogInfo("Starting resource labeling") +sdk.LogWarn("Fallback to default labels") +sdk.LogError("Critical error occurred") + +// Formatted logging +sdk.Logf(sdk.LogLevelInfo, "Processing %s with %d args", toolName, len(args)) +``` + +**Log Levels:** + +| Constant | Value | Description | +|----------|-------|-------------| +| `sdk.LogLevelDebug` | 0 | Debug messages (verbose) | +| `sdk.LogLevelInfo` | 1 | Informational messages | +| `sdk.LogLevelWarn` | 2 | Warning messages | +| `sdk.LogLevelError` | 3 | Error messages | + +**Direct import (without SDK):** + +```go +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) +``` + +**Parameters:** +- `level`: Log level (0=debug, 1=info, 2=warn, 3=error) +- `msgPtr`, `msgLen`: Pointer and length of the message string + +**Viewing Guard Logs:** + +Guard log messages appear in gateway debug output. Enable with: + +```bash +# Enable all guard logs +DEBUG=guard:* ./awmg --config config.toml + +# Enable specific guard logs +DEBUG=guard:myguard ./awmg --config config.toml +``` + +Log messages are prefixed with the guard name for easy identification: +``` +[guard:myguard] INFO: Processing create_issue for owner/repo +``` + +## API Reference + +### Types + +#### LabelResourceRequest + +Input for labeling a resource before access. + +```go +type LabelResourceRequest struct { + ToolName string // Name of the tool being called + ToolArgs map[string]interface{} // Arguments passed to the tool + Capabilities interface{} // Agent capabilities (optional) +} +``` + +**Helper methods:** + +```go +// Extract typed values from ToolArgs +req.GetString("owner") // (string, bool) +req.GetInt("issue_number") // (int, bool) +req.GetFloat("amount") // (float64, bool) +req.GetBool("draft") // (bool, bool) +req.GetStringSlice("labels") // ([]string, bool) +req.GetOwnerRepo() // (owner, repo string, ok bool) +``` + +#### LabelResourceResponse + +Output from labeling a resource. + +```go +type LabelResourceResponse struct { + Resource ResourceLabels // Security labels for the resource + Operation Operation // "read", "write", or "read-write" +} +``` + +#### ResourceLabels + +Security classification for a resource. Per DIFC conventions: +- Empty secrecy `[]` means public (no sensitivity restrictions) +- Empty integrity `[]` means no endorsement +- Tags must be repo-scoped: `contributor:`, `maintainer:`, `private:` + +```go +type ResourceLabels struct { + Description string // Human-readable description + Secrecy []string // Secrecy tags (e.g., [], ["private:owner/repo"], ["secret"]) + Integrity []string // Integrity tags (e.g., [], ["contributor:owner/repo"]) +} +``` + +#### LabelResponseResponse (Path-Based Labeling) + +Output from labeling a response. The **path-based format is preferred** as it doesn't copy data. + +```go +type LabelResponseResponse struct { + // Path-based format (preferred): paths and labels without data copying + LabeledPaths []PathLabel `json:"labeled_paths,omitempty"` + DefaultLabels *ResourceLabels `json:"default_labels,omitempty"` + ItemsPath string `json:"items_path,omitempty"` + + // Legacy format: items with copied data (deprecated) + Items []LabeledItem `json:"items,omitempty"` +} + +type PathLabel struct { + Path string `json:"path"` // JSON Pointer (RFC 6901), e.g., "/items/0" + Labels ResourceLabels `json:"labels"` // DIFC labels for this element +} +``` + +**JSON Schema for Path-Based Response:** + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "items_path": { + "type": "string", + "description": "JSON Pointer to the array containing items (e.g., '/items', '' for root)" + }, + "labeled_paths": { + "type": "array", + "items": { + "type": "object", + "required": ["path", "labels"], + "properties": { + "path": { + "type": "string", + "description": "JSON Pointer (RFC 6901) to the element" + }, + "labels": { + "type": "object", + "required": ["secrecy", "integrity"], + "properties": { + "description": { "type": "string" }, + "secrecy": { "type": "array", "items": { "type": "string" } }, + "integrity": { "type": "array", "items": { "type": "string" } } + } + } + } + } + }, + "default_labels": { + "type": "object", + "properties": { + "description": { "type": "string" }, + "secrecy": { "type": "array", "items": { "type": "string" } }, + "integrity": { "type": "array", "items": { "type": "string" } } + } + } + } +} +``` + +**Example - Path-Based Response:** + +```go +func labelResponse(req *sdk.LabelResponseRequest) (*sdk.LabelResponseResponse, error) { + // Check if response has an "items" array + resultMap, ok := req.ToolResult.(map[string]interface{}) + if !ok { + return nil, nil // No labeling for non-object responses + } + + items, ok := resultMap["items"].([]interface{}) + if !ok || len(items) == 0 { + return nil, nil + } + + // Label each item using paths (no data copying) + labels := make([]sdk.PathLabel, len(items)) + for i, item := range items { + // Determine labels based on item content + repoID, isPrivate := getRepoInfo(item) + + labels[i] = sdk.PathLabel{ + Path: fmt.Sprintf("/items/%d", i), + Labels: sdk.NewRepoResource( + fmt.Sprintf("Item %d", i), + repoID, + isPrivate, + []string{}, // empty = no endorsement + ), + } + } + + return sdk.NewPathLabelResponseResponse("/items", labels...). + WithDefaultLabels(sdk.NewPublicResource("default")), nil +} +``` + +### Label Constructors + +```go +// Create a public resource with no endorsement (empty labels) +sdk.NewPublicResource("issue:owner/repo#123") +// → Secrecy: [], Integrity: [] + +// Create a repo-scoped private resource with expanded integrity +sdk.NewPrivateResource("issue:owner/repo#123", "owner/repo", sdk.ContributorIntegrity("owner/repo")) +// → Secrecy: ["private:owner/repo"], Integrity: ["contributor:owner/repo"] + +// Create a resource with custom secrecy and integrity +sdk.NewResource("issue:owner/repo#123", + []string{"private:owner/repo", "secret"}, + sdk.MaintainerIntegrity("owner/repo")) +// → Secrecy: ["private:owner/repo", "secret"], Integrity: ["contributor:owner/repo", "maintainer:owner/repo"] + +// Create a repo-scoped resource (public or private based on visibility) +sdk.NewRepoResource("issue:owner/repo#123", "owner/repo", isPrivate, sdk.ContributorIntegrity("owner/repo")) +``` + +### Integrity Hierarchy Helpers + +GitHub integrity tags are hierarchical. Guards must expand them to include all implied levels: + +```go +// Contributor level (just contributor) +sdk.ContributorIntegrity("owner/repo") +// → ["contributor:owner/repo"] + +// Maintainer level (implies contributor) +sdk.MaintainerIntegrity("owner/repo") +// → ["contributor:owner/repo", "maintainer:owner/repo"] + +// Project level (implies maintainer and contributor) +sdk.ProjectIntegrity("owner/repo") +// → ["contributor:owner/repo", "maintainer:owner/repo", "project:owner/repo"] +``` + +### Operations + +```go +sdk.OperationRead // "read" - Read-only access +sdk.OperationWrite // "write" - Write/modify access +sdk.OperationReadWrite // "read-write" - Both read and write +``` + +### Backend Calls + +Guards can call backend MCP tools to gather metadata for labeling decisions: + +```go +// Generic call - returns interface{} +result, err := sdk.CallBackend("get_issue", map[string]interface{}{ + "owner": "octocat", + "repo": "hello-world", + "issue_number": 42, +}) + +// Typed call - unmarshals to specific type +type Issue struct { + Number int `json:"number"` + Title string `json:"title"` + User struct { + Login string `json:"login"` + } `json:"user"` +} + +issue, err := sdk.CallBackendTyped[Issue]("get_issue", args) +``` + +## Example: GitHub Guard + +See [example/main.go](example/main.go) for a complete example that: + +1. Labels write operations (create_issue, merge_pull_request) +2. Checks repository visibility for read operations +3. Inspects issue details for fine-grained labeling +4. Detects sensitive issue labels (security, confidential) + +## Common Patterns + +### Checking Repository Visibility + +```go +func checkRepoPrivate(owner, repo string) bool { + result, err := sdk.CallBackend("search_repositories", map[string]interface{}{ + "query": fmt.Sprintf("repo:%s/%s", owner, repo), + }) + if err != nil { + return false + } + + if data, ok := result.(map[string]interface{}); ok { + if items, ok := data["items"].([]interface{}); ok && len(items) > 0 { + if first, ok := items[0].(map[string]interface{}); ok { + if private, ok := first["private"].(bool); ok { + return private + } + } + } + } + return false +} +``` + +### Write vs Read Operations + +```go +func labelResource(req *sdk.LabelResourceRequest) (*sdk.LabelResourceResponse, error) { + resp := &sdk.LabelResourceResponse{ + Resource: sdk.NewPublicResource(req.ToolName), + Operation: sdk.OperationRead, + } + + // Categorize by tool name patterns + switch { + case strings.HasPrefix(req.ToolName, "create_"), + strings.HasPrefix(req.ToolName, "update_"), + strings.HasPrefix(req.ToolName, "delete_"): + resp.Operation = sdk.OperationWrite + resp.Resource.Integrity = []string{"contributor"} + + case strings.HasPrefix(req.ToolName, "merge_"): + resp.Operation = sdk.OperationReadWrite + resp.Resource.Integrity = []string{"maintainer"} + } + + return resp, nil +} +``` + +## Limitations + +- **TinyGo required** - Standard Go doesn't support WASM function exports +- **Read-only backend calls** - Guards can only read from backends, not write +- **1MB response limit** - Backend call results are limited to 1MB diff --git a/examples/guards/guardsdk/example/go.mod b/examples/guards/guardsdk/example/go.mod new file mode 100644 index 00000000..2584c163 --- /dev/null +++ b/examples/guards/guardsdk/example/go.mod @@ -0,0 +1,7 @@ +module example-guard + +go 1.23.4 + +require github.com/githubnext/gh-aw-mcpg v0.0.0 + +replace github.com/githubnext/gh-aw-mcpg => ../../../.. diff --git a/examples/guards/guardsdk/example/go.sum b/examples/guards/guardsdk/example/go.sum new file mode 100644 index 00000000..6ec57ffd --- /dev/null +++ b/examples/guards/guardsdk/example/go.sum @@ -0,0 +1,118 @@ +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= +github.com/clipperhouse/uax29/v2 v2.2.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= +github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/itchyny/go-yaml v0.0.0-20251001235044-fca9a0999f15/go.mod h1:Tmbz8uw5I/I6NvVpEGuhzlElCGS5hPoXJkt7l+ul6LE= +github.com/itchyny/gojq v0.12.18/go.mod h1:4hPoZ/3lN9fDL1D+aK7DY1f39XZpY9+1Xpjz8atrEkg= +github.com/itchyny/timefmt-go v0.1.7/go.mod h1:5E46Q+zj7vbTgWY8o5YkMeYb4I6GeWLFnetPy5oBrAI= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/modelcontextprotocol/go-sdk v1.1.0/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY= +github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tetratelabs/wazero v1.11.0/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= +golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457/go.mod h1:pRgIJT+bRLFKnoM1ldnzKoxTIn14Yxz928LQRYYgIN0= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/examples/guards/guardsdk/example/main.go b/examples/guards/guardsdk/example/main.go new file mode 100644 index 00000000..ad77ac3d --- /dev/null +++ b/examples/guards/guardsdk/example/main.go @@ -0,0 +1,149 @@ +// Example guard using the guardsdk package +// +// This demonstrates how to build a DIFC guard using the SDK, +// which handles all the low-level WASM memory management and JSON marshaling. +// +// Build with: +// +// export GOROOT=$(~/go/bin/go1.23.4 env GOROOT) +// tinygo build -o guard.wasm -target=wasi main.go +package main + +import ( + "fmt" + + sdk "github.com/github/gh-aw-mcpg/examples/guards/guardsdk" +) + +func init() { + sdk.RegisterLabelResource(labelResource) + sdk.RegisterLabelResponse(labelResponse) +} + +func labelResource(req *sdk.LabelResourceRequest) (*sdk.LabelResourceResponse, error) { + // Log the incoming request using host logging + sdk.LogDebug(fmt.Sprintf("labelResource called for tool: %s", req.ToolName)) + + // Extract owner/repo for repo-scoped tags + owner, repo, hasRepo := req.GetOwnerRepo() + repoID := "" + if hasRepo { + repoID = owner + "/" + repo + } + + // Default response - empty labels (public, no endorsement) + resp := &sdk.LabelResourceResponse{ + Resource: sdk.NewPublicResource(fmt.Sprintf("resource:%s", req.ToolName)), + Operation: sdk.OperationRead, + } + + switch req.ToolName { + // Write operations - contributor level + case "create_issue", "update_issue", "create_pull_request": + resp.Operation = sdk.OperationWrite + if repoID != "" { + resp.Resource.Integrity = sdk.ContributorIntegrity(repoID) + } + + // Read-write operations - maintainer level (expands to contributor + maintainer) + case "merge_pull_request": + resp.Operation = sdk.OperationReadWrite + if repoID != "" { + resp.Resource.Integrity = sdk.MaintainerIntegrity(repoID) + } + + // Read operations with repository visibility check + case "list_issues", "list_pull_requests": + labelByRepoVisibility(req, resp) + + case "get_issue": + labelByRepoVisibility(req, resp) + labelByIssueDetails(req, resp) + } + + return resp, nil +} + +func labelResponse(req *sdk.LabelResponseRequest) (*sdk.LabelResponseResponse, error) { + // No fine-grained response labeling in this example + return nil, nil +} + +// labelByRepoVisibility checks if the repository is private +func labelByRepoVisibility(req *sdk.LabelResourceRequest, resp *sdk.LabelResourceResponse) { + owner, repo, ok := req.GetOwnerRepo() + if !ok { + return + } + repoID := owner + "/" + repo + + // Call backend to check repository visibility + result, err := sdk.CallBackend("search_repositories", map[string]interface{}{ + "query": fmt.Sprintf("repo:%s", repoID), + }) + if err != nil { + return + } + + // Check if private - use repo-scoped tag + if repoData, ok := result.(map[string]interface{}); ok { + if items, ok := repoData["items"].([]interface{}); ok && len(items) > 0 { + if firstItem, ok := items[0].(map[string]interface{}); ok { + if private, ok := firstItem["private"].(bool); ok && private { + resp.Resource.Secrecy = []string{"private:" + repoID} + } + } + } + } +} + +// labelByIssueDetails adds labels based on issue-specific information +func labelByIssueDetails(req *sdk.LabelResourceRequest, resp *sdk.LabelResourceResponse) { + owner, repo, ok := req.GetOwnerRepo() + if !ok { + return + } + + issueNum, ok := req.GetInt("issue_number") + if !ok { + return + } + + // Get issue details from backend + result, err := sdk.CallBackend("get_issue", map[string]interface{}{ + "owner": owner, + "repo": repo, + "issue_number": issueNum, + }) + if err != nil { + return + } + + issueData, ok := result.(map[string]interface{}) + if !ok { + return + } + + // Update description with author + if user, ok := issueData["user"].(map[string]interface{}); ok { + if login, ok := user["login"].(string); ok { + resp.Resource.Description = fmt.Sprintf("issue:%s/%s#%d by %s", owner, repo, issueNum, login) + } + } + + // Check for sensitive labels - use "secret" for highest secrecy + if labels, ok := issueData["labels"].([]interface{}); ok { + for _, label := range labels { + if labelData, ok := label.(map[string]interface{}); ok { + if name, ok := labelData["name"].(string); ok { + if name == "security" || name == "confidential" { + resp.Resource.Secrecy = append(resp.Resource.Secrecy, "secret") + break + } + } + } + } + } +} + +func main() {} diff --git a/examples/guards/guardsdk/guardsdk.go b/examples/guards/guardsdk/guardsdk.go new file mode 100644 index 00000000..642f0720 --- /dev/null +++ b/examples/guards/guardsdk/guardsdk.go @@ -0,0 +1,534 @@ +//go:build tinygo.wasm || wasm + +// Package guardsdk provides utilities for building WASM guards for MCP Gateway. +// +// This SDK simplifies guard development by handling: +// - Memory management for WASM host/guest communication +// - JSON marshaling/unmarshaling +// - Backend tool calls via host functions +// - Standard request/response types +// +// Example usage: +// +// package main +// +// import "github.com/github/gh-aw-mcpg/examples/guards/guardsdk" +// +// func init() { +// guardsdk.RegisterLabelResource(myLabelResource) +// guardsdk.RegisterLabelResponse(myLabelResponse) +// } +// +// func myLabelResource(req *guardsdk.LabelResourceRequest) (*guardsdk.LabelResourceResponse, error) { +// // Your labeling logic here +// return &guardsdk.LabelResourceResponse{ +// Resource: guardsdk.NewPublicResource("my-resource"), +// Operation: guardsdk.OperationRead, +// }, nil +// } +// +// func myLabelResponse(req *guardsdk.LabelResponseRequest) (*guardsdk.LabelResponseResponse, error) { +// return nil, nil // No fine-grained labeling +// } +// +// func main() {} +package guardsdk + +import ( + "encoding/json" + "fmt" + "unsafe" +) + +// Operation types for resource access +type Operation string + +const ( + OperationRead Operation = "read" + OperationWrite Operation = "write" + OperationReadWrite Operation = "read-write" +) + +// LabelResourceRequest contains the input for labeling a resource +type LabelResourceRequest struct { + ToolName string `json:"tool_name"` + ToolArgs map[string]interface{} `json:"tool_args"` + Capabilities interface{} `json:"capabilities,omitempty"` +} + +// LabelResourceResponse contains the output from labeling a resource +type LabelResourceResponse struct { + Resource ResourceLabels `json:"resource"` + Operation Operation `json:"operation"` +} + +// LabelResponseRequest contains the input for labeling a response +type LabelResponseRequest struct { + ToolName string `json:"tool_name"` + ToolResult interface{} `json:"tool_result"` + Capabilities interface{} `json:"capabilities,omitempty"` +} + +// LabelResponseResponse contains the output from labeling a response. +// Use either the legacy Items format OR the new PathLabels format. +// +// Legacy format (requires copying data): +// +// response := &LabelResponseResponse{ +// Items: []LabeledItem{ +// {Data: originalItem, Labels: labels}, +// }, +// } +// +// Path-based format (preferred, no data copying): +// +// response := NewPathLabelResponseResponse("/items", +// PathLabel{Path: "/items/0", Labels: NewPublicResource("Item 0")}, +// PathLabel{Path: "/items/1", Labels: NewPrivateResource("Item 1", "verified")}, +// ) +type LabelResponseResponse struct { + // Legacy format: items with copied data + Items []LabeledItem `json:"items,omitempty"` + + // Path-based format (preferred): paths and labels without data copying + LabeledPaths []PathLabel `json:"labeled_paths,omitempty"` + DefaultLabels *ResourceLabels `json:"default_labels,omitempty"` + ItemsPath string `json:"items_path,omitempty"` +} + +// PathLabel associates a JSON Pointer path with labels. +// Paths use RFC 6901 JSON Pointer syntax: "/items/0", "/results/5", etc. +type PathLabel struct { + Path string `json:"path"` + Labels ResourceLabels `json:"labels"` +} + +// NewPathLabelResponseResponse creates a path-based label response. +// This is the preferred format as it doesn't require copying response data. +// +// Example: +// +// response := NewPathLabelResponseResponse("/items", +// PathLabel{Path: "/items/0", Labels: NewPublicResource("Issue #1")}, +// PathLabel{Path: "/items/1", Labels: NewPrivateResource("Issue #2", "verified")}, +// ) +func NewPathLabelResponseResponse(itemsPath string, labels ...PathLabel) *LabelResponseResponse { + return &LabelResponseResponse{ + ItemsPath: itemsPath, + LabeledPaths: labels, + } +} + +// WithDefaultLabels adds default labels for items not explicitly labeled. +func (r *LabelResponseResponse) WithDefaultLabels(labels ResourceLabels) *LabelResponseResponse { + r.DefaultLabels = &labels + return r +} + +// ResourceLabels contains security labels for a resource +type ResourceLabels struct { + Description string `json:"description"` + Secrecy []string `json:"secrecy"` + Integrity []string `json:"integrity"` +} + +// LabeledItem represents a single item with its labels (legacy format) +type LabeledItem struct { + Data interface{} `json:"data"` + Labels ResourceLabels `json:"labels"` +} + +// --- Helper constructors for common label patterns --- +// +// Label conventions (consistent with docs/github-difc.md): +// - Empty secrecy [] means public (no sensitivity restrictions) +// - Empty integrity [] means no endorsement +// - Integrity tags must be repo-scoped: contributor:, maintainer:, project: +// - Secrecy tags must be repo-scoped: private: +// - Guards must expand hierarchical integrity tags (maintainer implies contributor, etc.) + +// NewPublicResource creates a ResourceLabels for a public resource with no endorsement. +// Use empty slices for both secrecy and integrity per DIFC conventions. +func NewPublicResource(description string) ResourceLabels { + return ResourceLabels{ + Description: description, + Secrecy: []string{}, + Integrity: []string{}, + } +} + +// NewPrivateResource creates a ResourceLabels for a private repo resource. +// The repo parameter should be in "owner/repo" format. +// integrityTags should already be expanded (e.g., use ContributorIntegrity, MaintainerIntegrity, or ProjectIntegrity). +func NewPrivateResource(description string, repo string, integrityTags []string) ResourceLabels { + return ResourceLabels{ + Description: description, + Secrecy: []string{"private:" + repo}, + Integrity: integrityTags, + } +} + +// ContributorIntegrity returns the expanded integrity tags for contributor level. +func ContributorIntegrity(repo string) []string { + return []string{"contributor:" + repo} +} + +// MaintainerIntegrity returns the expanded integrity tags for maintainer level. +// Per DIFC spec, maintainer implies contributor, so both are included. +func MaintainerIntegrity(repo string) []string { + return []string{"contributor:" + repo, "maintainer:" + repo} +} + +// ProjectIntegrity returns the expanded integrity tags for project level. +// Per DIFC spec, project implies maintainer and contributor, so all are included. +func ProjectIntegrity(repo string) []string { + return []string{"contributor:" + repo, "maintainer:" + repo, "project:" + repo} +} + +// NewRepoResource creates a ResourceLabels with repo-scoped secrecy. +// Use isPrivate=true for private repos, false for public repos. +// integrityTags should already be expanded (use ContributorIntegrity, MaintainerIntegrity, or ProjectIntegrity). +func NewRepoResource(description string, repo string, isPrivate bool, integrityTags []string) ResourceLabels { + secrecy := []string{} + if isPrivate { + secrecy = []string{"private:" + repo} + } + return ResourceLabels{ + Description: description, + Secrecy: secrecy, + Integrity: integrityTags, + } +} + +// NewResource creates a ResourceLabels with custom secrecy and integrity +func NewResource(description string, secrecy, integrity []string) ResourceLabels { + return ResourceLabels{ + Description: description, + Secrecy: secrecy, + Integrity: integrity, + } +} + +// --- Tool argument helpers --- + +// GetString extracts a string from tool arguments +func (r *LabelResourceRequest) GetString(key string) (string, bool) { + val, ok := r.ToolArgs[key].(string) + return val, ok +} + +// GetInt extracts an integer from tool arguments (JSON numbers are float64) +func (r *LabelResourceRequest) GetInt(key string) (int, bool) { + if val, ok := r.ToolArgs[key].(float64); ok { + return int(val), true + } + return 0, false +} + +// GetFloat extracts a float from tool arguments +func (r *LabelResourceRequest) GetFloat(key string) (float64, bool) { + val, ok := r.ToolArgs[key].(float64) + return val, ok +} + +// GetBool extracts a boolean from tool arguments +func (r *LabelResourceRequest) GetBool(key string) (bool, bool) { + val, ok := r.ToolArgs[key].(bool) + return val, ok +} + +// GetStringSlice extracts a string slice from tool arguments +func (r *LabelResourceRequest) GetStringSlice(key string) ([]string, bool) { + arr, ok := r.ToolArgs[key].([]interface{}) + if !ok { + return nil, false + } + result := make([]string, 0, len(arr)) + for _, v := range arr { + if s, ok := v.(string); ok { + result = append(result, s) + } + } + return result, true +} + +// GetOwnerRepo extracts owner and repo from tool arguments (common pattern) +func (r *LabelResourceRequest) GetOwnerRepo() (owner, repo string, ok bool) { + owner, ownerOk := r.GetString("owner") + repo, repoOk := r.GetString("repo") + return owner, repo, ownerOk && repoOk +} + +// --- Backend calling --- + +// callBackend is imported from the host (gateway) environment +// +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 + +// hostLog is imported from the host (gateway) environment +// Log levels: 0=debug, 1=info, 2=warn, 3=error +// +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) + +// --- Logging --- + +// Log levels for host logging +const ( + LogLevelDebug = 0 + LogLevelInfo = 1 + LogLevelWarn = 2 + LogLevelError = 3 +) + +// LogDebug sends a debug level log message to the gateway host +func LogDebug(msg string) { + if len(msg) == 0 { + return + } + b := []byte(msg) + hostLog(LogLevelDebug, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// LogInfo sends an info level log message to the gateway host +func LogInfo(msg string) { + if len(msg) == 0 { + return + } + b := []byte(msg) + hostLog(LogLevelInfo, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// LogWarn sends a warning level log message to the gateway host +func LogWarn(msg string) { + if len(msg) == 0 { + return + } + b := []byte(msg) + hostLog(LogLevelWarn, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// LogError sends an error level log message to the gateway host +func LogError(msg string) { + if len(msg) == 0 { + return + } + b := []byte(msg) + hostLog(LogLevelError, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// Logf sends a formatted log message at the specified level to the gateway host +func Logf(level int, format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + if len(msg) == 0 { + return + } + b := []byte(msg) + hostLog(uint32(level), uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// CallBackend calls a tool on the backend MCP server +// This is a read-only call for gathering metadata to inform labeling decisions +func CallBackend(toolName string, args interface{}) (interface{}, error) { + // Marshal args to JSON + argsJSON, err := json.Marshal(args) + if err != nil { + return nil, fmt.Errorf("failed to marshal args: %w", err) + } + + // Allocate buffers + toolNameBytes := []byte(toolName) + resultBuf := make([]byte, 1024*1024) // 1MB result buffer + + // Get pointers + var toolNamePtr, argsJSONPtr *byte + if len(toolNameBytes) > 0 { + toolNamePtr = &toolNameBytes[0] + } + if len(argsJSON) > 0 { + argsJSONPtr = &argsJSON[0] + } + + // Call the host function + resultLen := callBackend( + uint32(uintptr(unsafe.Pointer(toolNamePtr))), + uint32(len(toolNameBytes)), + uint32(uintptr(unsafe.Pointer(argsJSONPtr))), + uint32(len(argsJSON)), + uint32(uintptr(unsafe.Pointer(&resultBuf[0]))), + uint32(len(resultBuf)), + ) + + if resultLen < 0 { + return nil, fmt.Errorf("backend call failed with error code: %d", resultLen) + } + + // Parse result + var result interface{} + if err := json.Unmarshal(resultBuf[:resultLen], &result); err != nil { + return nil, fmt.Errorf("failed to unmarshal backend result: %w", err) + } + + return result, nil +} + +// CallBackendTyped calls a backend tool and unmarshals the result into the provided type +func CallBackendTyped[T any](toolName string, args interface{}) (*T, error) { + result, err := CallBackend(toolName, args) + if err != nil { + return nil, err + } + + // Re-marshal and unmarshal to get proper typing + data, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("failed to re-marshal result: %w", err) + } + + var typed T + if err := json.Unmarshal(data, &typed); err != nil { + return nil, fmt.Errorf("failed to unmarshal to type: %w", err) + } + + return &typed, nil +} + +// --- Handler registration --- + +// LabelResourceFunc is the function signature for labeling resources +type LabelResourceFunc func(*LabelResourceRequest) (*LabelResourceResponse, error) + +// LabelResponseFunc is the function signature for labeling responses +type LabelResponseFunc func(*LabelResponseRequest) (*LabelResponseResponse, error) + +var ( + labelResourceHandler LabelResourceFunc + labelResponseHandler LabelResponseFunc +) + +// RegisterLabelResource registers the handler for label_resource calls +func RegisterLabelResource(handler LabelResourceFunc) { + labelResourceHandler = handler +} + +// RegisterLabelResponse registers the handler for label_response calls +func RegisterLabelResponse(handler LabelResponseFunc) { + labelResponseHandler = handler +} + +// --- WASM exports (called by the gateway) --- + +// Error codes returned by WASM functions +const ( + // errGeneral indicates a general error (handler not registered, parse error, etc.) + errGeneral = -1 + // errBufferTooSmall indicates the output buffer is too small + // When returning this, the guard should write the required size (as uint32 little-endian) + // to the first 4 bytes of the output buffer so the gateway can retry with a larger buffer. + errBufferTooSmall = -2 +) + +// writeRequiredSize writes the required buffer size to the output buffer (little-endian uint32) +// This is called when the output is too large to fit in the provided buffer. +func writeRequiredSize(outputPtr uint32, requiredSize uint32) { + sizeBytes := []byte{ + byte(requiredSize), + byte(requiredSize >> 8), + byte(requiredSize >> 16), + byte(requiredSize >> 24), + } + writeBytes(outputPtr, sizeBytes) +} + +// label_resource is the WASM export called by the gateway +// +//export label_resource +func labelResource(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + if labelResourceHandler == nil { + return errGeneral // No handler registered + } + + // Read input + input := readBytes(inputPtr, inputLen) + var req LabelResourceRequest + if err := json.Unmarshal(input, &req); err != nil { + return errGeneral + } + + // Call handler + resp, err := labelResourceHandler(&req) + if err != nil { + return errGeneral + } + + // Marshal output + outputJSON, err := json.Marshal(resp) + if err != nil { + return errGeneral + } + + // Check if output fits in buffer + if uint32(len(outputJSON)) > outputSize { + // Signal buffer too small and write required size + writeRequiredSize(outputPtr, uint32(len(outputJSON))) + return errBufferTooSmall + } + + writeBytes(outputPtr, outputJSON) + return int32(len(outputJSON)) +} + +// label_response is the WASM export called by the gateway +// +//export label_response +func labelResponse(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + if labelResponseHandler == nil { + return 0 // No handler = no fine-grained labeling + } + + // Read input + input := readBytes(inputPtr, inputLen) + var req LabelResponseRequest + if err := json.Unmarshal(input, &req); err != nil { + return errGeneral + } + + // Call handler + resp, err := labelResponseHandler(&req) + if err != nil { + return errGeneral + } + + // If nil response, no fine-grained labeling + if resp == nil || len(resp.Items) == 0 { + return 0 + } + + // Marshal output + outputJSON, err := json.Marshal(resp) + if err != nil { + return errGeneral + } + + // Check if output fits in buffer + if uint32(len(outputJSON)) > outputSize { + // Signal buffer too small and write required size + writeRequiredSize(outputPtr, uint32(len(outputJSON))) + return errBufferTooSmall + } + + writeBytes(outputPtr, outputJSON) + return int32(len(outputJSON)) +} + +// --- Memory helpers --- + +func readBytes(ptr, length uint32) []byte { + return unsafe.Slice((*byte)(unsafe.Pointer(uintptr(ptr))), length) +} + +func writeBytes(ptr uint32, data []byte) { + dest := unsafe.Slice((*byte)(unsafe.Pointer(uintptr(ptr))), len(data)) + copy(dest, data) +} diff --git a/examples/guards/sample-guard-js/guard.js b/examples/guards/sample-guard-js/guard.js new file mode 100644 index 00000000..d06bc649 --- /dev/null +++ b/examples/guards/sample-guard-js/guard.js @@ -0,0 +1,101 @@ +// Sample DIFC Guard implemented in JavaScript +// This demonstrates that JavaScript guards are easier than Go guards: +// - No TinyGo requirement +// - Works with any wazero version +// - Native WASM support +// - Easy to compile and use + +// Host function imports (provided by gateway via wazero) +// Note: These are imported automatically by the WASM runtime +// +// Available host functions: +// - call_backend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize) -> int32 +// - host_log(level, msgPtr, msgLen) -> void +// +// Log levels: 0=debug, 1=info, 2=warn, 3=error + +const LOG_DEBUG = 0; +const LOG_INFO = 1; +const LOG_WARN = 2; +const LOG_ERROR = 3; + +// Helper function to log messages to the gateway host +function logToHost(level, message) { + const msgBytes = new TextEncoder().encode(message); + // Allocate memory for the message (simplified - in real use, use proper WASM memory allocation) + const ptr = allocateMemory(msgBytes.length); + new Uint8Array(memory.buffer, ptr, msgBytes.length).set(msgBytes); + host_log(level, ptr, msgBytes.length); +} + +// Guard function: label_resource +// Called before accessing a resource to determine its DIFC labels +function label_resource(inputPtr, inputLen, outputPtr, outputSize) { + try { + // Read input JSON from WASM memory + const inputBytes = new Uint8Array(memory.buffer, inputPtr, inputLen); + const inputStr = new TextDecoder().decode(inputBytes); + const input = JSON.parse(inputStr); + + // Log the incoming request (if host_log is available) + if (typeof host_log !== 'undefined') { + logToHost(LOG_DEBUG, `label_resource called for tool: ${input.tool_name}`); + } + + // Default labels + const output = { + resource: { + description: `resource:${input.tool_name}`, + secrecy: ["public"], + integrity: ["untrusted"] + }, + operation: "read" + }; + + // Label based on tool name + switch (input.tool_name) { + case "create_issue": + case "update_issue": + case "create_pull_request": + output.operation = "write"; + output.resource.integrity = ["contributor"]; + break; + + case "merge_pull_request": + output.operation = "read-write"; + output.resource.integrity = ["maintainer"]; + break; + + case "list_issues": + case "get_issue": + case "list_pull_requests": + output.operation = "read"; + output.resource.secrecy = ["public"]; + break; + } + + // Write output JSON + const outputStr = JSON.stringify(output); + const outputBytes = new TextEncoder().encode(outputStr); + + if (outputBytes.length > outputSize) { + return -1; // Output too large + } + + new Uint8Array(memory.buffer, outputPtr, outputBytes.length).set(outputBytes); + return outputBytes.length; + } catch (e) { + return -1; // Error + } +} + +// Guard function: label_response +// Called after a backend call to label response data +function label_response(inputPtr, inputLen, outputPtr, outputSize) { + try { + // For this sample, we don't do fine-grained labeling + return 0; + } catch (e) { + return -1; + } +} diff --git a/examples/guards/sample-guard/Makefile b/examples/guards/sample-guard/Makefile new file mode 100644 index 00000000..ae38814d --- /dev/null +++ b/examples/guards/sample-guard/Makefile @@ -0,0 +1,27 @@ +.PHONY: build clean + +build: + @echo "Building WASM guard..." + @if command -v tinygo >/dev/null 2>&1; then \ + echo "TinyGo found, attempting build..."; \ + for go_bin in go1.23 go1.23.9 go1.23.10 go1.23.8; do \ + if command -v $$go_bin >/dev/null 2>&1; then \ + echo "Found $$go_bin, using for TinyGo..."; \ + GOROOT=$$($$go_bin env GOROOT) tinygo build -o guard.wasm -target=wasi main.go 2>&1 && \ + echo "✓ Successfully built guard with TinyGo + $$go_bin" && exit 0; \ + fi; \ + done; \ + echo "No Go 1.23 found. Trying TinyGo with system Go..."; \ + tinygo build -o guard.wasm -target=wasi main.go 2>&1 && \ + echo "✓ Successfully built guard with TinyGo" && exit 0; \ + echo "TinyGo build failed (likely Go version incompatibility)"; \ + echo "Install Go 1.23: go install golang.org/dl/go1.23.9@latest && go1.23.9 download"; \ + else \ + echo "TinyGo not found. Install from: https://tinygo.org"; \ + fi; \ + echo "Falling back to standard Go (function exports won't work)..."; \ + GOOS=wasip1 GOARCH=wasm go build -o guard.wasm main.go; \ + echo "⚠ Warning: Guard compiled with standard Go won't export functions properly" + +clean: + rm -f guard.wasm diff --git a/examples/guards/sample-guard/README.md b/examples/guards/sample-guard/README.md new file mode 100644 index 00000000..f30e1af1 --- /dev/null +++ b/examples/guards/sample-guard/README.md @@ -0,0 +1,369 @@ +# Sample DIFC Guard for WASM + +This is a sample DIFC guard written in Go that compiles to WebAssembly (WASM). + +> **Tip**: For simpler guard development, use the [Guard SDK](../guardsdk/README.md) which handles memory management and JSON marshaling for you. See [guardsdk/example](../guardsdk/example/) for a simplified version of this guard. + +## Requirements and Limitations + +### TinyGo + Go 1.23 Requirement + +**TinyGo is required** for proper WASM function exports. Standard Go's `wasip1` target does not support the `//export` directive needed for guard functions. + +**Version Compatibility**: +- **Gateway**: Go 1.25 (current project version) +- **Guards**: Go 1.23 (for TinyGo compatibility) +- **TinyGo**: 0.34+ (supports Go 1.19-1.23) + +**Key insight**: WASM is version-independent! A guard compiled with Go 1.23 works perfectly with a gateway compiled with Go 1.25. The gateway and guard communicate only through: +- JSON data in linear memory +- Function calls via exported symbols + +There is no Go version coupling between the gateway and guards. + +### Setup + +**For Gateway Development** (Go 1.25): +```bash +# Already installed - use for gateway +go version # Should show go1.25 +``` + +**For Guard Development** (Go 1.23 + TinyGo): + +#### macOS + +```bash +# Install Go 1.23.4 alongside your main Go version +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download + +# Verify installation +~/go/bin/go1.23.4 version # Should show go1.23.4 + +# Install TinyGo via Homebrew +brew tap tinygo-org/tools +brew install tinygo + +# Verify TinyGo +tinygo version +``` + +#### Linux (Debian/Ubuntu) + +```bash +# Install Go 1.23.4 alongside your main Go version +go install golang.org/dl/go1.23.4@latest +~/go/bin/go1.23.4 download + +# Install TinyGo +# See: https://tinygo.org/getting-started/install/ +curl -sSfL -O https://github.com/tinygo-org/tinygo/releases/download/v0.34.0/tinygo_0.34.0_amd64.deb +sudo dpkg -i tinygo_0.34.0_amd64.deb +``` + +#### Other Platforms + +See the [TinyGo installation guide](https://tinygo.org/getting-started/install/) for additional platforms. + +### Building + +To compile this guard to WASM using TinyGo with Go 1.23: + +```bash +# Set GOROOT to use Go 1.23.4 +export GOROOT=$(~/go/bin/go1.23.4 env GOROOT) +tinygo build -o guard.wasm -target=wasi main.go +``` + +Or use the Makefile (tries Go 1.23 automatically): +```bash +make build +``` + +## Overview + +WASM guards run **inside the gateway process** in a sandboxed wazero runtime. They cannot make direct network calls or access the filesystem. + +### Guard Execution Model + +``` +┌───────────────────────────────────────────────────────────────┐ +│ Gateway Process │ +│ │ +│ ┌─────────────────────────────────┐ │ +│ │ WasmGuard (Go) │ │ +│ │ ┌───────────────────────────┐ │ │ +│ │ │ guard.wasm │ │ │ +│ │ │ (sandboxed in wazero) │ │ │ +│ │ │ │ │ │ +│ │ │ - label_resource() │ │ │ +│ │ │ - label_response() │ │ │ +│ │ │ - call_backend() ─────────┼──┼───► BackendCaller │ +│ │ └───────────────────────────┘ │ │ │ +│ └─────────────────────────────────┘ │ │ +│ ▼ │ +│ MCP Backend │ +└───────────────────────────────────────────────────────────────┘ +``` + +Guards: +- Run in-process (not separate CLI) +- Execute in sandboxed wazero runtime +- Cannot make direct network/file I/O +- Call backend via controlled host function + +## Interface + +### Exported Functions (from WASM to Gateway) + +#### `label_resource(inputPtr, inputLen, outputPtr, outputSize uint32) int32` +Labels a resource before access. + +**Input** (JSON at inputPtr): +```json +{ + "tool_name": "create_issue", + "tool_args": {"owner": "org", "repo": "repo", "title": "Bug"} +} +``` + +**Output** (JSON at outputPtr): +```json +{ + "resource": { + "description": "resource:create_issue", + "secrecy": ["public"], + "integrity": ["contributor"] + }, + "operation": "write" +} +``` + +**Returns**: Length of output JSON (>0), 0 for empty, or negative for error + +#### `label_response(inputPtr, inputLen, outputPtr, outputSize uint32) int32` +Labels response data for fine-grained filtering. + +**Input** (JSON at inputPtr): +```json +{ + "tool_name": "list_issues", + "tool_result": { "items": [...] } +} +``` + +**Output** (JSON at outputPtr) - **Path-Based Format (Preferred)**: + +The path-based format uses JSON Pointer (RFC 6901) paths to label elements in the response without copying data. This is more efficient for large responses. + +```json +{ + "items_path": "/items", + "labeled_paths": [ + { + "path": "/items/0", + "labels": { + "description": "Issue #1 in public repo", + "secrecy": ["public"], + "integrity": ["untrusted"] + } + }, + { + "path": "/items/1", + "labels": { + "description": "Issue #2 in private repo", + "secrecy": ["repo:corp/internal-tools"], + "integrity": ["github_verified"] + } + } + ], + "default_labels": { + "description": "Unlabeled item", + "secrecy": ["public"], + "integrity": ["untrusted"] + } +} +``` + +**Path-Based Response Schema**: + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `items_path` | string | No | JSON Pointer to the array containing items (e.g., `"/items"`, `""` for root array) | +| `labeled_paths` | array | Yes | Array of path-label pairs | +| `labeled_paths[].path` | string | Yes | JSON Pointer (RFC 6901) to the element (e.g., `"/items/0"`) | +| `labeled_paths[].labels` | object | Yes | DIFC labels for this element | +| `labeled_paths[].labels.description` | string | No | Human-readable description | +| `labeled_paths[].labels.secrecy` | string[] | Yes | Secrecy tags (e.g., `["public"]`, `["repo:owner/name"]`) | +| `labeled_paths[].labels.integrity` | string[] | Yes | Integrity tags (e.g., `["untrusted"]`, `["github_verified"]`) | +| `default_labels` | object | No | Labels for items not explicitly matched by a path | + +**Output** (JSON at outputPtr) - **Legacy Format (Deprecated)**: + +The legacy format copies data for each item. Use path-based format for better performance. + +```json +{ + "items": [ + {"data": {...}, "labels": {"secrecy": ["public"]}} + ] +} +``` + +**Returns**: Length of output JSON, 0 for no labeling, or negative for error + +### Host Functions (Imported from Gateway) + +The gateway provides host functions that WASM guards can import to interact with the outside world. These are the only way for sandboxed guards to communicate with external systems. + +#### `call_backend` + +Makes read-only calls to backend MCP servers for gathering metadata needed for labeling decisions. + +```go +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 +``` + +**Parameters**: +- `toolNamePtr`, `toolNameLen`: Pointer and length of the tool name string in WASM memory +- `argsPtr`, `argsLen`: Pointer and length of JSON-encoded arguments +- `resultPtr`, `resultSize`: Pointer and size of buffer for the result + +**Returns**: +- Positive: Length of result JSON written to `resultPtr` +- `0xFFFFFFFF` (max uint32): Error occurred + +**Example Usage**: +```go +func callBackendHelper(toolName string, args interface{}) ([]byte, error) { + argsJSON, _ := json.Marshal(args) + toolNameBytes := []byte(toolName) + resultBuf := make([]byte, 1024*1024) // 1MB buffer + + resultLen := callBackend( + uint32(uintptr(unsafe.Pointer(&toolNameBytes[0]))), + uint32(len(toolNameBytes)), + uint32(uintptr(unsafe.Pointer(&argsJSON[0]))), + uint32(len(argsJSON)), + uint32(uintptr(unsafe.Pointer(&resultBuf[0]))), + uint32(len(resultBuf)), + ) + + if resultLen == 0xFFFFFFFF { + return nil, fmt.Errorf("backend call failed") + } + return resultBuf[:resultLen], nil +} + +// Usage +repoInfo, err := callBackendHelper("search_repositories", map[string]interface{}{ + "query": "repo:owner/name", +}) +``` + +**Limitations**: +- Read-only: Guards can query but cannot modify backend state +- 1MB result buffer limit +- Synchronous: Blocks guard execution until complete + +#### `host_log` + +Sends log messages from the guard back to the gateway for debugging and monitoring. + +```go +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) +``` + +**Parameters**: +- `level`: Log level (0=debug, 1=info, 2=warn, 3=error) +- `msgPtr`, `msgLen`: Pointer and length of the message string in WASM memory + +**Log Levels**: +| Value | Level | Description | +|-------|-------|-------------| +| 0 | Debug | Verbose debugging information | +| 1 | Info | Informational messages | +| 2 | Warn | Warning messages | +| 3 | Error | Error messages | + +**Example Usage**: +```go +const ( + LogLevelDebug = 0 + LogLevelInfo = 1 + LogLevelWarn = 2 + LogLevelError = 3 +) + +func logInfo(msg string) { + b := []byte(msg) + hostLog(LogLevelInfo, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +func logDebug(msg string) { + b := []byte(msg) + hostLog(LogLevelDebug, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// Usage in label_resource +func labelResource(...) int32 { + logInfo("Processing tool: create_issue") + // ... labeling logic + logDebug("Resource labeled successfully") +} +``` + +**Viewing Guard Logs**: + +Guard logs appear in gateway debug output. Enable with: + +```bash +# Enable all guard logs +DEBUG=guard:* ./awmg --config config.toml + +# Enable logs for specific guard +DEBUG=guard:github ./awmg --config config.toml +``` + +Log messages are prefixed with the guard name: +``` +[guard:github] INFO: Processing tool: create_issue +[guard:github] DEBUG: Resource labeled successfully +``` + +> **Tip**: For simpler logging, use the [Guard SDK](../guardsdk/README.md) which provides `sdk.LogInfo()`, `sdk.LogDebug()`, `sdk.LogWarn()`, and `sdk.LogError()` helper functions. + +## Example Configuration + +```toml +[servers.github] +container = "ghcr.io/github/github-mcp-server" +guard = "github" + +[guards.github] +type = "wasm" +path = "./examples/guards/sample-guard/guard.wasm" +``` + +## Implementation Notes + +- **In-process execution**: Guard runs inside gateway, not as separate process +- **Sandboxed**: wazero runtime prevents direct I/O and network access +- **TinyGo required**: Standard Go doesn't support `//export` for WASM +- **JSON-based**: All data exchange uses JSON (TinyGo-compatible) +- **Simple types**: No complex Go types across WASM boundary +- **Read-only backend**: Guards can only read from backend, not write + +## TinyGo Limitations + +TinyGo has some standard library limitations: +- ✓ encoding/json - Works +- ✓ fmt - Works +- ✓ Basic stdlib - Works +- ✗ Reflection - Limited +- ✗ Some stdlib packages - Not available + +The guard interface is designed to work within these constraints using simple JSON data exchange. diff --git a/examples/guards/sample-guard/go.mod b/examples/guards/sample-guard/go.mod new file mode 100644 index 00000000..406b3e4d --- /dev/null +++ b/examples/guards/sample-guard/go.mod @@ -0,0 +1,3 @@ +module guard + +go 1.23 diff --git a/examples/guards/sample-guard/main.go b/examples/guards/sample-guard/main.go new file mode 100644 index 00000000..14cc93e4 --- /dev/null +++ b/examples/guards/sample-guard/main.go @@ -0,0 +1,484 @@ +package main + +import ( + "encoding/json" + "fmt" + "unsafe" +) + +// This is a sample DIFC guard that runs as a WASM module inside the gateway +// It uses exported functions and host function imports for sandbox security + +// callBackend is imported from the host (gateway) environment +// It allows the guard to make read-only calls to the backend MCP server +// +//go:wasmimport env call_backend +func callBackend(toolNamePtr, toolNameLen, argsPtr, argsLen, resultPtr, resultSize uint32) int32 + +// hostLog is imported from the host (gateway) environment +// It allows the guard to send log messages back to the gateway +// Log levels: 0=debug, 1=info, 2=warn, 3=error +// +//go:wasmimport env host_log +func hostLog(level, msgPtr, msgLen uint32) + +// Log level constants +const ( + LogLevelDebug = 0 + LogLevelInfo = 1 + LogLevelWarn = 2 + LogLevelError = 3 +) + +// logDebug sends a debug level log message to the gateway +func logDebug(msg string) { + b := []byte(msg) + hostLog(LogLevelDebug, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// logInfo sends an info level log message to the gateway +func logInfo(msg string) { + b := []byte(msg) + hostLog(LogLevelInfo, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// logWarn sends a warning level log message to the gateway +func logWarn(msg string) { + b := []byte(msg) + hostLog(LogLevelWarn, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// logError sends an error level log message to the gateway +func logError(msg string) { + b := []byte(msg) + hostLog(LogLevelError, uint32(uintptr(unsafe.Pointer(&b[0]))), uint32(len(b))) +} + +// Request structures +type LabelResourceInput struct { + ToolName string `json:"tool_name"` + ToolArgs map[string]interface{} `json:"tool_args"` + Capabilities interface{} `json:"capabilities,omitempty"` +} + +type LabelResponseInput struct { + ToolName string `json:"tool_name"` + ToolResult interface{} `json:"tool_result"` + Capabilities interface{} `json:"capabilities,omitempty"` +} + +// Response structures +type LabelResourceOutput struct { + Resource ResourceLabels `json:"resource"` + Operation string `json:"operation"` +} + +type ResourceLabels struct { + Description string `json:"description"` + Secrecy []string `json:"secrecy"` + Integrity []string `json:"integrity"` +} + +type LabelResponseOutput struct { + Items []LabeledItem `json:"items,omitempty"` +} + +type LabeledItem struct { + Data interface{} `json:"data"` + Labels ResourceLabels `json:"labels"` +} + +// label_resource is called by the gateway to label a resource before access +// +//export label_resource +func labelResource(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + // Read input JSON from WASM memory + input := readBytes(inputPtr, inputLen) + var req LabelResourceInput + if err := json.Unmarshal(input, &req); err != nil { + logError("failed to unmarshal label_resource input") + return -1 + } + + logDebug(fmt.Sprintf("labeling resource for tool: %s", req.ToolName)) + + // Extract owner/repo for repo-scoped tags + owner, _ := req.ToolArgs["owner"].(string) + repo, _ := req.ToolArgs["repo"].(string) + repoID := "" + if owner != "" && repo != "" { + repoID = owner + "/" + repo + } + + // Default labels - empty secrecy (public) and empty integrity (no endorsement) + output := LabelResourceOutput{ + Resource: ResourceLabels{ + Description: fmt.Sprintf("resource:%s", req.ToolName), + Secrecy: []string{}, + Integrity: []string{}, + }, + Operation: "read", + } + + // Determine labels based on tool name + switch req.ToolName { + case "create_issue", "update_issue", "create_pull_request": + output.Operation = "write" + // Contributor level: only contributor tag + if repoID != "" { + output.Resource.Integrity = []string{"contributor:" + repoID} + } + + case "merge_pull_request": + output.Operation = "read-write" + // Maintainer level: contributor + maintainer (hierarchical expansion) + if repoID != "" { + output.Resource.Integrity = []string{"contributor:" + repoID, "maintainer:" + repoID} + } + + case "list_issues", "list_pull_requests": + output.Operation = "read" + // Label based on repository visibility + labelByRepoVisibility(&output, req.ToolArgs) + + case "get_issue": + output.Operation = "read" + // Label based on repository visibility first + labelByRepoVisibility(&output, req.ToolArgs) + + // Use tool arguments to get issue-specific information + // ToolArgs contains: owner, repo, issue_number + if owner, ok := req.ToolArgs["owner"].(string); ok { + if repo, ok := req.ToolArgs["repo"].(string); ok { + if issueNum, ok := req.ToolArgs["issue_number"].(float64); ok { + // Call backend to get issue details for labeling + issueInfo, err := callBackendHelper("get_issue", map[string]interface{}{ + "owner": owner, + "repo": repo, + "issue_number": int(issueNum), + }) + + if err == nil { + if issueData, ok := issueInfo.(map[string]interface{}); ok { + // Label based on issue author + if user, ok := issueData["user"].(map[string]interface{}); ok { + if login, ok := user["login"].(string); ok { + output.Resource.Description = fmt.Sprintf("issue:%s/%s#%d by %s", owner, repo, int(issueNum), login) + } + } + + // Check for sensitive labels + if labels, ok := issueData["labels"].([]interface{}); ok { + for _, label := range labels { + if labelData, ok := label.(map[string]interface{}); ok { + if name, ok := labelData["name"].(string); ok { + if name == "security" || name == "confidential" { + // Use repo-scoped private tag plus sensitivity indicator + output.Resource.Secrecy = []string{"private:" + owner + "/" + repo, "secret"} + } + } + } + } + } + } + } + } + } + } + } + + // Marshal output + outputJSON, err := json.Marshal(output) + if err != nil { + return -1 + } + + // Check output size + if uint32(len(outputJSON)) > outputSize { + return -1 // Output too large + } + + // Write output to WASM memory + writeBytes(outputPtr, outputJSON) + return int32(len(outputJSON)) +} + +// label_response is called by the gateway to label response data. +// Uses the path-based labeling format which is more efficient as it doesn't +// require copying response data - just returns paths and labels. +// +//export label_response +func labelResponse(inputPtr, inputLen, outputPtr, outputSize uint32) int32 { + // Read input JSON from WASM memory + input := readBytes(inputPtr, inputLen) + var req LabelResponseInput + if err := json.Unmarshal(input, &req); err != nil { + return -1 + } + + // Check if this is a collection response that needs fine-grained labeling + response := labelResponseItems(req.ToolName, req.ToolResult) + if response == nil { + // No fine-grained labeling needed + return 0 + } + + // Marshal response + outputJSON, err := json.Marshal(response) + if err != nil { + return -1 + } + + // Check buffer size + if uint32(len(outputJSON)) > outputSize { + // Return -2 to indicate buffer too small, write required size + sizeBytes := []byte{ + byte(len(outputJSON)), + byte(len(outputJSON) >> 8), + byte(len(outputJSON) >> 16), + byte(len(outputJSON) >> 24), + } + writeBytes(outputPtr, sizeBytes) + return -2 + } + + writeBytes(outputPtr, outputJSON) + return int32(len(outputJSON)) +} + +// PathLabelResponse is the path-based labeling format +type PathLabelResponse struct { + LabeledPaths []PathLabel `json:"labeled_paths"` + DefaultLabels *ResourceLabels `json:"default_labels,omitempty"` + ItemsPath string `json:"items_path,omitempty"` +} + +// PathLabel associates a JSON Pointer path with labels +type PathLabel struct { + Path string `json:"path"` + Labels ResourceLabels `json:"labels"` +} + +// labelResponseItems checks if this is a collection and labels each item by path +func labelResponseItems(toolName string, result interface{}) *PathLabelResponse { + // Check common collection patterns + resultMap, ok := result.(map[string]interface{}) + if !ok { + // Not a map - might be direct array or single item + if arr, ok := result.([]interface{}); ok { + return labelArrayItems(toolName, arr, "") + } + return nil + } + + // Check for "items" array (common in GitHub search results) + if items, ok := resultMap["items"].([]interface{}); ok && len(items) > 0 { + return labelArrayItems(toolName, items, "/items") + } + + // Check for direct array results (e.g., list_issues) + // No collection found + return nil +} + +// labelArrayItems labels each item in an array using path-based format +func labelArrayItems(toolName string, items []interface{}, itemsPath string) *PathLabelResponse { + if len(items) == 0 { + return nil + } + + labels := make([]PathLabel, 0, len(items)) + + for i, item := range items { + path := fmt.Sprintf("%s/%d", itemsPath, i) + itemLabels := labelSingleItem(toolName, item) + labels = append(labels, PathLabel{ + Path: path, + Labels: itemLabels, + }) + } + + return &PathLabelResponse{ + ItemsPath: itemsPath, + LabeledPaths: labels, + DefaultLabels: &ResourceLabels{ + Description: "Unlabeled item", + Secrecy: []string{}, // empty = public + Integrity: []string{}, // empty = no endorsement + }, + } +} + +// labelSingleItem determines labels for a single item based on its content +func labelSingleItem(toolName string, item interface{}) ResourceLabels { + itemMap, ok := item.(map[string]interface{}) + if !ok { + return ResourceLabels{ + Description: "Unknown item", + Secrecy: []string{}, // empty = public + Integrity: []string{}, // empty = no endorsement + } + } + + // Extract repo info for scoped tags + repoID := "" + if repo, ok := itemMap["repository"].(map[string]interface{}); ok { + if fullName, ok := repo["full_name"].(string); ok { + repoID = fullName + } + } else if fullName, ok := itemMap["full_name"].(string); ok { + repoID = fullName + } + + // Check for repository visibility + // Items with private repos get repo-scoped private tag + if repo, ok := itemMap["repository"].(map[string]interface{}); ok { + if private, ok := repo["private"].(bool); ok && private { + secrecy := []string{} + if repoID != "" { + secrecy = []string{"private:" + repoID} + } + return ResourceLabels{ + Description: describeItem(toolName, itemMap), + Secrecy: secrecy, + Integrity: []string{}, // empty = no endorsement + } + } + } + + // Check for direct "private" field (e.g., in repo objects) + if private, ok := itemMap["private"].(bool); ok && private { + secrecy := []string{} + if repoID != "" { + secrecy = []string{"private:" + repoID} + } + return ResourceLabels{ + Description: describeItem(toolName, itemMap), + Secrecy: secrecy, + Integrity: []string{}, // empty = no endorsement + } + } + + // Default: public repository (empty secrecy and integrity) + return ResourceLabels{ + Description: describeItem(toolName, itemMap), + Secrecy: []string{}, + Integrity: []string{}, + } +} + +// describeItem generates a human-readable description for an item +func describeItem(toolName string, item map[string]interface{}) string { + // Try common identifier fields + if number, ok := item["number"].(float64); ok { + if title, ok := item["title"].(string); ok { + return fmt.Sprintf("Issue/PR #%d: %s", int(number), truncateString(title, 50)) + } + return fmt.Sprintf("Issue/PR #%d", int(number)) + } + + if fullName, ok := item["full_name"].(string); ok { + return fmt.Sprintf("Repository: %s", fullName) + } + + if login, ok := item["login"].(string); ok { + return fmt.Sprintf("User: %s", login) + } + + return fmt.Sprintf("Item from %s", toolName) +} + +// truncateString truncates a string to maxLen characters +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen-3] + "..." +} + +// Helper functions + +// labelByRepoVisibility checks repository visibility and updates secrecy labels +func labelByRepoVisibility(output *LabelResourceOutput, toolArgs map[string]interface{}) { + owner, _ := toolArgs["owner"].(string) + repo, _ := toolArgs["repo"].(string) + if owner == "" || repo == "" { + return + } + repoID := owner + "/" + repo + + // Call the backend via host function to check visibility + repoInfo, err := callBackendHelper("search_repositories", map[string]interface{}{ + "query": fmt.Sprintf("repo:%s", repoID), + }) + + if err == nil { + // Check if repository is private + if repoData, ok := repoInfo.(map[string]interface{}); ok { + if items, ok := repoData["items"].([]interface{}); ok && len(items) > 0 { + if firstItem, ok := items[0].(map[string]interface{}); ok { + if private, ok := firstItem["private"].(bool); ok && private { + output.Resource.Secrecy = []string{"private:" + repoID} + } + } + } + } + } +} + +func readBytes(ptr, length uint32) []byte { + return unsafe.Slice((*byte)(unsafe.Pointer(uintptr(ptr))), length) +} + +func writeBytes(ptr uint32, data []byte) { + dest := unsafe.Slice((*byte)(unsafe.Pointer(uintptr(ptr))), len(data)) + copy(dest, data) +} + +// callBackendHelper wraps the call_backend host function with a nicer interface +func callBackendHelper(toolName string, args interface{}) (interface{}, error) { + // Marshal args to JSON + argsJSON, err := json.Marshal(args) + if err != nil { + return nil, fmt.Errorf("failed to marshal args: %w", err) + } + + // Allocate buffers + toolNameBytes := []byte(toolName) + resultBuf := make([]byte, 1024*1024) // 1MB result buffer + + // Get pointers + var toolNamePtr, argsJSONPtr *byte + if len(toolNameBytes) > 0 { + toolNamePtr = &toolNameBytes[0] + } + if len(argsJSON) > 0 { + argsJSONPtr = &argsJSON[0] + } + + // Call the host function + resultLen := callBackend( + uint32(uintptr(unsafe.Pointer(toolNamePtr))), + uint32(len(toolNameBytes)), + uint32(uintptr(unsafe.Pointer(argsJSONPtr))), + uint32(len(argsJSON)), + uint32(uintptr(unsafe.Pointer(&resultBuf[0]))), + uint32(len(resultBuf)), + ) + + if resultLen < 0 { + return nil, fmt.Errorf("backend call failed with error code: %d", resultLen) + } + + // Parse result + var result interface{} + if err := json.Unmarshal(resultBuf[:resultLen], &result); err != nil { + return nil, fmt.Errorf("failed to unmarshal backend result: %w", err) + } + + return result, nil +} + +func main() { + // Required for WASM compilation, but not called when used as a library +} diff --git a/go.mod b/go.mod index 11b224e2..e2ea795b 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/itchyny/gojq v0.12.18 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/stretchr/testify v1.11.1 + github.com/tetratelabs/wazero v1.11.0 ) require ( diff --git a/go.sum b/go.sum index d4752289..53b4c2b2 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,8 @@ github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tetratelabs/wazero v1.11.0 h1:+gKemEuKCTevU4d7ZTzlsvgd1uaToIDtlQlmNbwqYhA= +github.com/tetratelabs/wazero v1.11.0/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= diff --git a/internal/cmd/flags_difc.go b/internal/cmd/flags_difc.go index 2503a9bc..52f862ff 100644 --- a/internal/cmd/flags_difc.go +++ b/internal/cmd/flags_difc.go @@ -11,17 +11,27 @@ import ( // DIFC flag defaults const ( - defaultEnableDIFC = false + defaultEnableDIFC = false + defaultDIFCFilter = false + defaultConfigExtensions = false ) // DIFC flag variables var ( - enableDIFC bool + enableDIFC bool + difcFilter bool + enableConfigExt bool // Enable config extensions (guards, session labels) + sessionSecrecy string // Comma-separated initial secrecy labels + sessionIntegrity string // Comma-separated initial integrity labels ) func init() { RegisterFlag(func(cmd *cobra.Command) { - cmd.Flags().BoolVar(&enableDIFC, "enable-difc", getDefaultEnableDIFC(), "Enable DIFC enforcement and session requirement (requires sys___init call before tool access)") + cmd.Flags().BoolVar(&enableDIFC, "enable-difc", getDefaultEnableDIFC(), "Enable DIFC enforcement (sessions are auto-created from Authorization header)") + cmd.Flags().BoolVar(&difcFilter, "difc-filter", getDefaultDIFCFilter(), "Enable DIFC response filtering based on path labels (requires --enable-difc)") + cmd.Flags().BoolVar(&enableConfigExt, "enable-config-extensions", getDefaultConfigExtensions(), "Enable config extensions (guards, session labels) - required for DIFC features") + cmd.Flags().StringVar(&sessionSecrecy, "session-secrecy", getDefaultSessionSecrecy(), "Comma-separated initial secrecy labels for agent sessions (requires --enable-config-extensions)") + cmd.Flags().StringVar(&sessionIntegrity, "session-integrity", getDefaultSessionIntegrity(), "Comma-separated initial integrity labels for agent sessions (requires --enable-config-extensions)") }) } @@ -36,3 +46,39 @@ func getDefaultEnableDIFC() bool { } return defaultEnableDIFC } + +// getDefaultDIFCFilter returns the default DIFC filter setting, checking MCP_GATEWAY_DIFC_FILTER +// environment variable first, then falling back to the hardcoded default (false) +func getDefaultDIFCFilter() bool { + if envFilter := os.Getenv("MCP_GATEWAY_DIFC_FILTER"); envFilter != "" { + switch strings.ToLower(envFilter) { + case "1", "true", "yes", "on": + return true + } + } + return defaultDIFCFilter +} + +// getDefaultConfigExtensions returns the default config extensions setting, +// checking MCP_GATEWAY_CONFIG_EXTENSIONS environment variable first +func getDefaultConfigExtensions() bool { + if envConfigExt := os.Getenv("MCP_GATEWAY_CONFIG_EXTENSIONS"); envConfigExt != "" { + switch strings.ToLower(envConfigExt) { + case "1", "true", "yes", "on": + return true + } + } + return defaultConfigExtensions +} + +// getDefaultSessionSecrecy returns the default session secrecy labels from +// MCP_GATEWAY_SESSION_SECRECY environment variable +func getDefaultSessionSecrecy() string { + return os.Getenv("MCP_GATEWAY_SESSION_SECRECY") +} + +// getDefaultSessionIntegrity returns the default session integrity labels from +// MCP_GATEWAY_SESSION_INTEGRITY environment variable +func getDefaultSessionIntegrity() string { + return os.Getenv("MCP_GATEWAY_SESSION_INTEGRITY") +} diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 95bc2cdd..3d84d095 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -176,6 +176,33 @@ func run(cmd *cobra.Command, args []string) error { log.Println("Environment validation passed") } + // Validate extension flag prerequisites + // Extension features require --enable-config-extensions to be set + hasExtensionFeatures := enableDIFC || difcFilter || sessionSecrecy != "" || sessionIntegrity != "" + if hasExtensionFeatures && !enableConfigExt { + var features []string + if enableDIFC { + features = append(features, "--enable-difc") + } + if difcFilter { + features = append(features, "--difc-filter") + } + if sessionSecrecy != "" { + features = append(features, "--session-secrecy") + } + if sessionIntegrity != "" { + features = append(features, "--session-integrity") + } + return fmt.Errorf("the following flags require --enable-config-extensions (or MCP_GATEWAY_CONFIG_EXTENSIONS=1): %s", strings.Join(features, ", ")) + } + + // Set config extensions flag before loading config + // This determines whether DIFC extensions (guards, session labels) are validated + config.SetConfigExtensionsEnabled(enableConfigExt) + if enableConfigExt { + log.Println("Config extensions enabled (guards, session labels)") + } + // Load configuration var cfg *config.Config var err error @@ -210,12 +237,44 @@ func run(cmd *cobra.Command, args []string) error { // Apply command-line flags to config cfg.EnableDIFC = enableDIFC + cfg.DIFCFilter = difcFilter cfg.SequentialLaunch = sequentialLaunch + // Apply session labels from CLI flags (these override config file settings) + secrecyLabels := parseSessionLabels(sessionSecrecy) + integrityLabels := parseSessionLabels(sessionIntegrity) + if len(secrecyLabels) > 0 || len(integrityLabels) > 0 { + // Ensure Gateway config exists + if cfg.Gateway == nil { + cfg.Gateway = &config.GatewayConfig{} + } + // Ensure Session config exists + if cfg.Gateway.Session == nil { + cfg.Gateway.Session = &config.SessionConfig{} + } + // Apply CLI flags (override config file) + if len(secrecyLabels) > 0 { + cfg.Gateway.Session.Secrecy = secrecyLabels + } + if len(integrityLabels) > 0 { + cfg.Gateway.Session.Integrity = integrityLabels + } + log.Printf("Session labels configured: secrecy=%v, integrity=%v", + cfg.Gateway.Session.Secrecy, cfg.Gateway.Session.Integrity) + logger.LogInfoMd("startup", "Session labels: secrecy=%v, integrity=%v", + cfg.Gateway.Session.Secrecy, cfg.Gateway.Session.Integrity) + } + if enableDIFC { log.Println("DIFC enforcement and session requirement enabled") + if difcFilter { + log.Println("DIFC response filtering enabled") + } } else { log.Println("DIFC enforcement disabled (sessions auto-created for standard MCP client compatibility)") + if difcFilter { + log.Println("Warning: --difc-filter has no effect without --enable-difc") + } } if sequentialLaunch { @@ -454,3 +513,24 @@ func SetVersion(v string) { config.SetGatewayVersion(v) mcp.SetClientGatewayVersion(v) } + +// parseSessionLabels parses a comma-separated list of labels into a slice +func parseSessionLabels(input string) []string { + if input == "" { + return nil + } + labels := strings.Split(input, ",") + // Trim whitespace from each label + var result []string + for _, label := range labels { + trimmed := strings.TrimSpace(label) + if trimmed != "" { + result = append(result, trimmed) + } + } + // Return nil instead of empty slice for consistency + if len(result) == 0 { + return nil + } + return result +} diff --git a/internal/cmd/root_test.go b/internal/cmd/root_test.go index 6b316ebb..3b407bc5 100644 --- a/internal/cmd/root_test.go +++ b/internal/cmd/root_test.go @@ -63,6 +63,164 @@ func TestGetDefaultLogDir(t *testing.T) { } } +func TestGetDefaultEnableDIFC(t *testing.T) { + tests := []struct { + name string + envValue string + want bool + }{ + { + name: "no environment variable set", + envValue: "", + want: false, + }, + { + name: "environment variable set to 1", + envValue: "1", + want: true, + }, + { + name: "environment variable set to true", + envValue: "true", + want: true, + }, + { + name: "environment variable set to TRUE (uppercase)", + envValue: "TRUE", + want: true, + }, + { + name: "environment variable set to yes", + envValue: "yes", + want: true, + }, + { + name: "environment variable set to on", + envValue: "on", + want: true, + }, + { + name: "environment variable set to 0", + envValue: "0", + want: false, + }, + { + name: "environment variable set to false", + envValue: "false", + want: false, + }, + { + name: "environment variable set to invalid value", + envValue: "invalid", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Save original value and restore after test + originalValue := os.Getenv("MCP_GATEWAY_ENABLE_DIFC") + t.Cleanup(func() { + if originalValue != "" { + os.Setenv("MCP_GATEWAY_ENABLE_DIFC", originalValue) + } else { + os.Unsetenv("MCP_GATEWAY_ENABLE_DIFC") + } + }) + + // Set test environment variable + if tt.envValue != "" { + os.Setenv("MCP_GATEWAY_ENABLE_DIFC", tt.envValue) + } else { + os.Unsetenv("MCP_GATEWAY_ENABLE_DIFC") + } + + // Test getDefaultEnableDIFC + got := getDefaultEnableDIFC() + assert.Equal(t, tt.want, got, "getDefaultEnableDIFC() should return expected value") + }) + } +} + +func TestGetDefaultDIFCFilter(t *testing.T) { + tests := []struct { + name string + envValue string + want bool + }{ + { + name: "no environment variable set", + envValue: "", + want: false, + }, + { + name: "environment variable set to 1", + envValue: "1", + want: true, + }, + { + name: "environment variable set to true", + envValue: "true", + want: true, + }, + { + name: "environment variable set to TRUE (uppercase)", + envValue: "TRUE", + want: true, + }, + { + name: "environment variable set to yes", + envValue: "yes", + want: true, + }, + { + name: "environment variable set to on", + envValue: "on", + want: true, + }, + { + name: "environment variable set to 0", + envValue: "0", + want: false, + }, + { + name: "environment variable set to false", + envValue: "false", + want: false, + }, + { + name: "environment variable set to invalid value", + envValue: "invalid", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Save original value and restore after test + originalValue := os.Getenv("MCP_GATEWAY_DIFC_FILTER") + t.Cleanup(func() { + if originalValue != "" { + os.Setenv("MCP_GATEWAY_DIFC_FILTER", originalValue) + } else { + os.Unsetenv("MCP_GATEWAY_DIFC_FILTER") + } + }) + + // Set test environment variable + if tt.envValue != "" { + os.Setenv("MCP_GATEWAY_DIFC_FILTER", tt.envValue) + } else { + os.Unsetenv("MCP_GATEWAY_DIFC_FILTER") + } + + // Test getDefaultDIFCFilter + got := getDefaultDIFCFilter() + assert.Equal(t, tt.want, got, "getDefaultDIFCFilter() should return expected value") + }) + } +} + func TestDefaultConfigFile(t *testing.T) { // Verify that the default config file is empty (no default config loading) assert.Empty(t, defaultConfigFile, "defaultConfigFile should be empty string") @@ -499,3 +657,176 @@ func TestWriteGatewayConfig(t *testing.T) { assert.Contains(t, output, DefaultListenPort) }) } + +// TestParseSessionLabels tests the parseSessionLabels helper function +func TestParseSessionLabels(t *testing.T) { + tests := []struct { + name string + input string + want []string + }{ + { + name: "empty string", + input: "", + want: nil, + }, + { + name: "single label", + input: "private:github/my-repo", + want: []string{"private:github/my-repo"}, + }, + { + name: "multiple labels", + input: "contributor:github/repo,maintainer:github/repo", + want: []string{"contributor:github/repo", "maintainer:github/repo"}, + }, + { + name: "labels with spaces", + input: " contributor:github/repo , maintainer:github/repo ", + want: []string{"contributor:github/repo", "maintainer:github/repo"}, + }, + { + name: "labels with empty parts", + input: "contributor:github/repo,,maintainer:github/repo", + want: []string{"contributor:github/repo", "maintainer:github/repo"}, + }, + { + name: "only whitespace", + input: " ", + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseSessionLabels(tt.input) + assert.Equal(t, tt.want, got) + }) + } +} + +// TestGetDefaultSessionLabels tests the environment variable defaults for session labels +func TestGetDefaultSessionLabels(t *testing.T) { + t.Run("session secrecy from env", func(t *testing.T) { + // Save original value + original := os.Getenv("MCP_GATEWAY_SESSION_SECRECY") + t.Cleanup(func() { + if original != "" { + os.Setenv("MCP_GATEWAY_SESSION_SECRECY", original) + } else { + os.Unsetenv("MCP_GATEWAY_SESSION_SECRECY") + } + }) + + os.Setenv("MCP_GATEWAY_SESSION_SECRECY", "private:github/test-repo") + got := getDefaultSessionSecrecy() + assert.Equal(t, "private:github/test-repo", got) + }) + + t.Run("session integrity from env", func(t *testing.T) { + // Save original value + original := os.Getenv("MCP_GATEWAY_SESSION_INTEGRITY") + t.Cleanup(func() { + if original != "" { + os.Setenv("MCP_GATEWAY_SESSION_INTEGRITY", original) + } else { + os.Unsetenv("MCP_GATEWAY_SESSION_INTEGRITY") + } + }) + + os.Setenv("MCP_GATEWAY_SESSION_INTEGRITY", "contributor:github/test-repo,maintainer:github/test-repo") + got := getDefaultSessionIntegrity() + assert.Equal(t, "contributor:github/test-repo,maintainer:github/test-repo", got) + }) + + t.Run("empty session secrecy when env not set", func(t *testing.T) { + // Save original value + original := os.Getenv("MCP_GATEWAY_SESSION_SECRECY") + t.Cleanup(func() { + if original != "" { + os.Setenv("MCP_GATEWAY_SESSION_SECRECY", original) + } else { + os.Unsetenv("MCP_GATEWAY_SESSION_SECRECY") + } + }) + + os.Unsetenv("MCP_GATEWAY_SESSION_SECRECY") + got := getDefaultSessionSecrecy() + assert.Empty(t, got) + }) +} + +// TestGetDefaultConfigExtensions tests the environment variable default for config extensions +func TestGetDefaultConfigExtensions(t *testing.T) { + tests := []struct { + name string + envValue string + want bool + }{ + { + name: "no environment variable set", + envValue: "", + want: false, + }, + { + name: "environment variable set to 1", + envValue: "1", + want: true, + }, + { + name: "environment variable set to true", + envValue: "true", + want: true, + }, + { + name: "environment variable set to TRUE (uppercase)", + envValue: "TRUE", + want: true, + }, + { + name: "environment variable set to yes", + envValue: "yes", + want: true, + }, + { + name: "environment variable set to on", + envValue: "on", + want: true, + }, + { + name: "environment variable set to 0", + envValue: "0", + want: false, + }, + { + name: "environment variable set to false", + envValue: "false", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Save original value and restore after test + originalValue := os.Getenv("MCP_GATEWAY_CONFIG_EXTENSIONS") + t.Cleanup(func() { + if originalValue != "" { + os.Setenv("MCP_GATEWAY_CONFIG_EXTENSIONS", originalValue) + } else { + os.Unsetenv("MCP_GATEWAY_CONFIG_EXTENSIONS") + } + }) + + // Set test environment variable + if tt.envValue != "" { + os.Setenv("MCP_GATEWAY_CONFIG_EXTENSIONS", tt.envValue) + } else { + os.Unsetenv("MCP_GATEWAY_CONFIG_EXTENSIONS") + } + + // Test getDefaultConfigExtensions + got := getDefaultConfigExtensions() + assert.Equal(t, tt.want, got, "getDefaultConfigExtensions() should return expected value") + }) + } +} diff --git a/internal/config/config_core.go b/internal/config/config_core.go index 69386c58..d1d25221 100644 --- a/internal/config/config_core.go +++ b/internal/config/config_core.go @@ -24,12 +24,18 @@ type Config struct { // Servers maps server names to their configurations Servers map[string]*ServerConfig `toml:"servers" json:"servers"` + // Guards holds guard configurations (optional, experimental) + Guards map[string]*GuardConfig `toml:"guards" json:"guards,omitempty"` + // Gateway holds global gateway settings Gateway *GatewayConfig `toml:"gateway" json:"gateway,omitempty"` // EnableDIFC enables Decentralized Information Flow Control EnableDIFC bool `toml:"enable_difc" json:"enable_difc,omitempty"` + // DIFCFilter enables DIFC response filtering (removes content that violates agent labels) + DIFCFilter bool `toml:"difc_filter" json:"difc_filter,omitempty"` + // SequentialLaunch launches servers sequentially instead of in parallel SequentialLaunch bool `toml:"sequential_launch" json:"sequential_launch,omitempty"` } @@ -54,6 +60,9 @@ type GatewayConfig struct { // PayloadDir is the directory for storing large payloads PayloadDir string `toml:"payload_dir" json:"payload_dir,omitempty"` + + // Session holds initial DIFC labels for agent sessions + Session *SessionConfig `toml:"session" json:"session,omitempty"` } // ServerConfig represents an individual MCP server configuration. @@ -81,6 +90,9 @@ type ServerConfig struct { // Tools is an optional list of tools to filter/expose Tools []string `toml:"tools" json:"tools,omitempty"` + + // Guard is the guard ID to use for this server (references a guard in the guards section) + Guard string `toml:"guard" json:"guard,omitempty"` } // LoadFromFile loads configuration from a TOML file. diff --git a/internal/config/config_difc.go b/internal/config/config_difc.go new file mode 100644 index 00000000..a5eeecdb --- /dev/null +++ b/internal/config/config_difc.go @@ -0,0 +1,74 @@ +// Package config provides configuration loading and parsing. +// This file defines DIFC (Decentralized Information Flow Control) configuration types. +package config + +func init() { + // Register a stdin converter for session configuration + RegisterStdinConverter(func(cfg *Config, stdinCfg *StdinConfig) { + // Convert session config if present + if stdinCfg.Gateway != nil && stdinCfg.Gateway.Session != nil { + if cfg.Gateway == nil { + cfg.Gateway = &GatewayConfig{} + } + cfg.Gateway.Session = &SessionConfig{ + Secrecy: stdinCfg.Gateway.Session.Secrecy, + Integrity: stdinCfg.Gateway.Session.Integrity, + } + } + }) +} + +// GuardConfig represents a DIFC guard configuration (experimental). +type GuardConfig struct { + // Type is the guard type: "wasm" for WebAssembly guards + Type string `toml:"type" json:"type"` + + // Path is the path to the WASM file (mutually exclusive with URL) + Path string `toml:"path" json:"path,omitempty"` + + // URL is the URL to download WASM file from (mutually exclusive with Path) + URL string `toml:"url" json:"url,omitempty"` + + // SHA256 is the checksum for URL downloads (required when URL is set) + SHA256 string `toml:"sha256" json:"sha256,omitempty"` + + // CacheDir is the directory to cache downloaded WASM files (optional) + CacheDir string `toml:"cache_dir" json:"cacheDir,omitempty"` +} + +// StdinGuardConfig represents a DIFC guard configuration from stdin JSON (experimental). +type StdinGuardConfig struct { + // Type is the guard type: "wasm" for WebAssembly guards + Type string `json:"type"` + + // Path is the path to the WASM file (mutually exclusive with URL) + Path string `json:"path,omitempty"` + + // URL is the URL to download WASM file from (mutually exclusive with Path) + URL string `json:"url,omitempty"` + + // SHA256 is the checksum for URL downloads (required when URL is set) + SHA256 string `json:"sha256,omitempty"` + + // CacheDir is the directory to cache downloaded WASM files (optional) + CacheDir string `json:"cacheDir,omitempty"` +} + +// SessionConfig represents initial DIFC labels for agent sessions. +// See github-difc.md section 11.5 for specification. +type SessionConfig struct { + // Secrecy holds initial secrecy clearance tags + Secrecy []string `toml:"secrecy" json:"secrecy,omitempty"` + + // Integrity holds initial integrity clearance tags + Integrity []string `toml:"integrity" json:"integrity,omitempty"` +} + +// StdinSessionConfig represents session configuration from stdin JSON. +type StdinSessionConfig struct { + // Secrecy holds initial secrecy clearance tags + Secrecy []string `json:"secrecy,omitempty"` + + // Integrity holds initial integrity clearance tags + Integrity []string `json:"integrity,omitempty"` +} diff --git a/internal/config/config_stdin.go b/internal/config/config_stdin.go index 00c4042f..305e0a27 100644 --- a/internal/config/config_stdin.go +++ b/internal/config/config_stdin.go @@ -15,6 +15,9 @@ type StdinConfig struct { // MCPServers maps server names to their configurations MCPServers map[string]*StdinServerConfig `json:"mcpServers"` + // Guards holds guard configurations (optional, experimental) + Guards map[string]*StdinGuardConfig `json:"guards,omitempty"` + // Gateway holds global gateway settings Gateway *StdinGatewayConfig `json:"gateway,omitempty"` @@ -25,12 +28,13 @@ type StdinConfig struct { // StdinGatewayConfig represents gateway configuration in stdin JSON format. // Uses pointers for optional fields to distinguish between unset and zero values. type StdinGatewayConfig struct { - Port *int `json:"port,omitempty"` - APIKey string `json:"apiKey,omitempty"` - Domain string `json:"domain,omitempty"` - StartupTimeout *int `json:"startupTimeout,omitempty"` - ToolTimeout *int `json:"toolTimeout,omitempty"` - PayloadDir string `json:"payloadDir,omitempty"` + Port *int `json:"port,omitempty"` + APIKey string `json:"apiKey,omitempty"` + Domain string `json:"domain,omitempty"` + StartupTimeout *int `json:"startupTimeout,omitempty"` + ToolTimeout *int `json:"toolTimeout,omitempty"` + PayloadDir string `json:"payloadDir,omitempty"` + Session *StdinSessionConfig `json:"session,omitempty"` } // StdinServerConfig represents a single server configuration in stdin JSON format. @@ -64,6 +68,9 @@ type StdinServerConfig struct { // Tools is an optional list of tools to filter/expose Tools []string `json:"tools,omitempty"` + + // Guard is the guard ID to use for this server (references a guard in the guards section) + Guard string `json:"guard,omitempty"` } // LoadFromStdin loads configuration from stdin JSON. diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 194e2f5f..5183befa 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1096,3 +1096,203 @@ args = ["run", "--rm", "-i", "mcp/memory"] _, ok = cfg.Servers["memory"] assert.True(t, ok, "Server 'memory' not found") } + +// TestLoadFromStdin_WithSessionLabels tests session label configuration from stdin JSON +func TestLoadFromStdin_WithSessionLabels(t *testing.T) { + // Enable config extensions for session label support + SetConfigExtensionsEnabled(true) + ResetSchemaCache() + t.Cleanup(func() { + SetConfigExtensionsEnabled(false) + ResetSchemaCache() + }) + + jsonConfig := `{ + "mcpServers": { + "test": { + "type": "stdio", + "container": "test/server:latest" + } + }, + "gateway": { + "port": 3000, + "domain": "localhost", + "apiKey": "test-key", + "session": { + "secrecy": ["private:github/my-repo"], + "integrity": ["contributor:github/my-repo", "maintainer:github/my-repo"] + } + } + }` + + r, w, _ := os.Pipe() + oldStdin := os.Stdin + os.Stdin = r + go func() { + w.Write([]byte(jsonConfig)) + w.Close() + }() + + cfg, err := LoadFromStdin() + os.Stdin = oldStdin + + require.NoError(t, err, "LoadFromStdin() failed") + require.NotNil(t, cfg.Gateway, "Gateway config should not be nil") + require.NotNil(t, cfg.Gateway.Session, "Session config should not be nil") + + assert.Equal(t, []string{"private:github/my-repo"}, cfg.Gateway.Session.Secrecy) + assert.Equal(t, []string{"contributor:github/my-repo", "maintainer:github/my-repo"}, cfg.Gateway.Session.Integrity) +} + +// TestLoadFromStdin_WithEmptySessionLabels tests that empty session labels are handled correctly +func TestLoadFromStdin_WithEmptySessionLabels(t *testing.T) { + // Enable config extensions for session label support + SetConfigExtensionsEnabled(true) + t.Cleanup(func() { SetConfigExtensionsEnabled(false) }) + + jsonConfig := `{ + "mcpServers": { + "test": { + "type": "stdio", + "container": "test/server:latest" + } + }, + "gateway": { + "port": 3000, + "domain": "localhost", + "apiKey": "test-key", + "session": { + "secrecy": [], + "integrity": [] + } + } + }` + + r, w, _ := os.Pipe() + oldStdin := os.Stdin + os.Stdin = r + go func() { + w.Write([]byte(jsonConfig)) + w.Close() + }() + + cfg, err := LoadFromStdin() + os.Stdin = oldStdin + + require.NoError(t, err, "LoadFromStdin() failed") + require.NotNil(t, cfg.Gateway, "Gateway config should not be nil") + require.NotNil(t, cfg.Gateway.Session, "Session config should not be nil") + + assert.Empty(t, cfg.Gateway.Session.Secrecy) + assert.Empty(t, cfg.Gateway.Session.Integrity) +} + +// TestLoadFromStdin_NoSessionConfig tests that missing session config is handled correctly +func TestLoadFromStdin_NoSessionConfig(t *testing.T) { + // Enable config extensions for session label support + SetConfigExtensionsEnabled(true) + ResetSchemaCache() + t.Cleanup(func() { + SetConfigExtensionsEnabled(false) + ResetSchemaCache() + }) + + jsonConfig := `{ + "mcpServers": { + "test": { + "type": "stdio", + "container": "test/server:latest" + } + }, + "gateway": { + "port": 3000, + "domain": "localhost", + "apiKey": "test-key" + } + }` + + r, w, _ := os.Pipe() + oldStdin := os.Stdin + os.Stdin = r + go func() { + w.Write([]byte(jsonConfig)) + w.Close() + }() + + cfg, err := LoadFromStdin() + os.Stdin = oldStdin + + require.NoError(t, err, "LoadFromStdin() failed") + require.NotNil(t, cfg.Gateway, "Gateway config should not be nil") + assert.Nil(t, cfg.Gateway.Session, "Session config should be nil when not specified") +} + +// TestLoadFromFile_WithSessionLabels tests session label configuration from TOML file +func TestLoadFromFile_WithSessionLabels(t *testing.T) { + tmpDir := t.TempDir() + tmpFile := filepath.Join(tmpDir, "config.toml") + + tomlContent := ` +[gateway] +port = 3000 + +[gateway.session] +secrecy = ["private:github/my-repo"] +integrity = ["contributor:github/my-repo", "maintainer:github/my-repo"] + +[servers.test] +command = "docker" +args = ["run", "--rm", "-i", "test/container:latest"] +` + + err := os.WriteFile(tmpFile, []byte(tomlContent), 0644) + require.NoError(t, err, "Failed to write temp TOML file") + + cfg, err := LoadFromFile(tmpFile) + require.NoError(t, err, "LoadFromFile() failed") + require.NotNil(t, cfg, "LoadFromFile() returned nil config") + require.NotNil(t, cfg.Gateway, "Gateway config should not be nil") + require.NotNil(t, cfg.Gateway.Session, "Session config should not be nil") + + assert.Equal(t, []string{"private:github/my-repo"}, cfg.Gateway.Session.Secrecy) + assert.Equal(t, []string{"contributor:github/my-repo", "maintainer:github/my-repo"}, cfg.Gateway.Session.Integrity) +} + +// TestLoadFromStdin_SessionLabelsWithoutExtensions tests that session labels are rejected when extensions are disabled +func TestLoadFromStdin_SessionLabelsWithoutExtensions(t *testing.T) { + // Ensure config extensions are disabled + SetConfigExtensionsEnabled(false) + ResetSchemaCache() + t.Cleanup(func() { ResetSchemaCache() }) + + jsonConfig := `{ + "mcpServers": { + "test": { + "type": "stdio", + "container": "test/server:latest" + } + }, + "gateway": { + "port": 3000, + "domain": "localhost", + "apiKey": "test-key", + "session": { + "secrecy": ["private:github/my-repo"] + } + } + }` + + r, w, _ := os.Pipe() + oldStdin := os.Stdin + os.Stdin = r + go func() { + w.Write([]byte(jsonConfig)) + w.Close() + }() + + _, err := LoadFromStdin() + os.Stdin = oldStdin + + require.Error(t, err, "LoadFromStdin() should fail with session config when extensions are disabled") + assert.Contains(t, err.Error(), "session", "Error should mention session field") +} diff --git a/internal/config/validation_schema.go b/internal/config/validation_schema.go index 1764abd3..11e3601c 100644 --- a/internal/config/validation_schema.go +++ b/internal/config/validation_schema.go @@ -25,6 +25,10 @@ var ( // gatewayVersion stores the version string to include in error messages gatewayVersion = "dev" + // configExtensionsEnabled controls whether DIFC config extensions (guards, session labels) + // are validated. When false, only the upstream MCP Gateway spec is used. + configExtensionsEnabled = false + // logSchema is the debug logger for schema validation logSchema = logger.New("config:validation_schema") @@ -49,6 +53,29 @@ var ( schemaErr error ) +// SetConfigExtensionsEnabled enables or disables config extensions (guards, session labels). +// This must be called before any config loading/validation occurs. +// When disabled, only the upstream MCP Gateway spec is validated. +// Note: Changing this after schema compilation requires calling ResetSchemaCache(). +func SetConfigExtensionsEnabled(enabled bool) { + configExtensionsEnabled = enabled + logSchema.Printf("Config extensions %s", map[bool]string{true: "enabled", false: "disabled"}[enabled]) +} + +// ConfigExtensionsEnabled returns whether config extensions are enabled +func ConfigExtensionsEnabled() bool { + return configExtensionsEnabled +} + +// ResetSchemaCache resets the cached schema, forcing recompilation on next use. +// This is primarily for testing purposes when config extension settings change. +func ResetSchemaCache() { + schemaOnce = sync.Once{} + cachedSchema = nil + schemaErr = nil + logSchema.Print("Schema cache reset") +} + // SetGatewayVersion sets the gateway version for error reporting func SetGatewayVersion(version string) { if version != "" { @@ -157,6 +184,22 @@ func fetchAndFixSchema(url string) ([]byte, error) { } } + // Conditionally add DIFC extensions to the schema + // These extensions are only applied when --enable-config-extensions is set + if configExtensionsEnabled { + logSchema.Print("Applying config extensions (guards, session labels)") + + // Add DIFC guard support to the schema + // This extends the upstream schema to support guard configuration for DIFC enforcement + addGuardSchemaSupport(schema) + + // Add session label support to the gateway config + // This extends the upstream schema to support DIFC session initialization (github-difc.md section 11.5) + addSessionSchemaSupport(schema) + } else { + logSchema.Print("Config extensions disabled - using upstream schema only") + } + fixedBytes, err := json.Marshal(schema) if err != nil { return nil, fmt.Errorf("failed to marshal fixed schema: %w", err) @@ -459,3 +502,129 @@ func validateStringPatterns(stdinCfg *StdinConfig) error { return nil } + +// addGuardSchemaSupport extends the upstream schema to support DIFC guard configuration. +// This adds: +// 1. "guard" property to stdioServerConfig and httpServerConfig (optional string reference) +// 2. "guards" property to the root schema (map of guard configurations) +// +// Guard Configuration Schema: +// +// { +// "guards": { +// "my-guard": { +// "type": "wasm", +// "path": "/path/to/guard.wasm", +// "url": "https://example.com/guard.wasm" // alternative to path +// } +// }, +// "mcpServers": { +// "my-server": { +// "guard": "my-guard", +// ... +// } +// } +// } +func addGuardSchemaSupport(schema map[string]interface{}) { + // Define the guard reference property (used in server configs) + guardRefProperty := map[string]interface{}{ + "type": "string", + "description": "Reference to a guard defined in the guards section (requires --enable-difc)", + } + + // Add "guard" property to stdioServerConfig and httpServerConfig + if definitions, ok := schema["definitions"].(map[string]interface{}); ok { + // Add to stdioServerConfig + if stdioConfig, ok := definitions["stdioServerConfig"].(map[string]interface{}); ok { + if properties, ok := stdioConfig["properties"].(map[string]interface{}); ok { + properties["guard"] = guardRefProperty + } + } + + // Add to httpServerConfig + if httpConfig, ok := definitions["httpServerConfig"].(map[string]interface{}); ok { + if properties, ok := httpConfig["properties"].(map[string]interface{}); ok { + properties["guard"] = guardRefProperty + } + } + + // Add to customServerConfig + if customConfig, ok := definitions["customServerConfig"].(map[string]interface{}); ok { + if properties, ok := customConfig["properties"].(map[string]interface{}); ok { + properties["guard"] = guardRefProperty + } + } + + // Add guardConfig definition + definitions["guardConfig"] = map[string]interface{}{ + "type": "object", + "description": "WASM guard configuration for DIFC enforcement", + "properties": map[string]interface{}{ + "type": map[string]interface{}{ + "type": "string", + "enum": []string{"wasm"}, + "description": "Guard type (currently only 'wasm' is supported)", + }, + "path": map[string]interface{}{ + "type": "string", + "description": "Local file path to the WASM guard module", + }, + "url": map[string]interface{}{ + "type": "string", + "pattern": "^https?://.+", + "description": "URL to download the WASM guard module from", + }, + }, + "required": []string{"type"}, + } + } + + // Add "guards" property to root schema + if properties, ok := schema["properties"].(map[string]interface{}); ok { + properties["guards"] = map[string]interface{}{ + "type": "object", + "description": "DIFC guard definitions (requires --enable-difc flag)", + "additionalProperties": map[string]interface{}{ + "$ref": "#/definitions/guardConfig", + }, + } + } +} + +// addSessionSchemaSupport extends the schema to support session label configuration +// in the gateway config section. See github-difc.md section 11.5 for specification. +func addSessionSchemaSupport(schema map[string]interface{}) { + // Add sessionConfig definition + if definitions, ok := schema["definitions"].(map[string]interface{}); ok { + definitions["sessionConfig"] = map[string]interface{}{ + "type": "object", + "description": "DIFC session label configuration for initializing agent clearances", + "properties": map[string]interface{}{ + "secrecy": map[string]interface{}{ + "type": "array", + "description": "Initial secrecy clearance tags (e.g., private:owner/repo)", + "items": map[string]interface{}{ + "type": "string", + }, + }, + "integrity": map[string]interface{}{ + "type": "array", + "description": "Initial integrity clearance tags (e.g., contributor:owner/repo, maintainer:owner/repo)", + "items": map[string]interface{}{ + "type": "string", + }, + }, + }, + "additionalProperties": false, + } + + // Add "session" property to gatewayConfig + if gatewayConfig, ok := definitions["gatewayConfig"].(map[string]interface{}); ok { + if properties, ok := gatewayConfig["properties"].(map[string]interface{}); ok { + properties["session"] = map[string]interface{}{ + "$ref": "#/definitions/sessionConfig", + } + } + } + } +} diff --git a/internal/difc/agent.go b/internal/difc/agent.go index 7d05ff5d..28250f55 100644 --- a/internal/difc/agent.go +++ b/internal/difc/agent.go @@ -57,23 +57,19 @@ func (a *AgentLabels) DropIntegrityTag(tag Tag) { log.Printf("[DIFC] Agent %s dropped integrity tag: %s", a.AgentID, tag) } -// AccumulateFromRead updates agent labels after reading data -// Agent gains secrecy and integrity tags from what they read +// AccumulateFromRead is a no-op in the current implementation. +// Agent labels remain fixed at their initial values set during session creation. +// +// Automatic label accumulation is disabled because: +// 1. Secrecy tainting would cause uncontrolled label growth across operations +// 2. Integrity accumulation is semantically incorrect (integrity = endorsement, not influence) +// +// Future versions will support explicit primitives for: +// - Adding secrecy tags (when agent receives sensitive data it wants to track) +// - Removing integrity tags (when agent performs untrusted operations) func (a *AgentLabels) AccumulateFromRead(resource *LabeledResource) { - a.mu.Lock() - defer a.mu.Unlock() - - // Gain secrecy tags from the data we read - if resource.Secrecy.Label != nil && !resource.Secrecy.Label.IsEmpty() { - a.Secrecy.Label.Union(resource.Secrecy.Label) - log.Printf("[DIFC] Agent %s accumulated secrecy tags from read: %v", a.AgentID, resource.Secrecy.Label.GetTags()) - } - - // Gain integrity tags from the data we read (we're influenced by it) - if resource.Integrity.Label != nil && !resource.Integrity.Label.IsEmpty() { - a.Integrity.Label.Union(resource.Integrity.Label) - log.Printf("[DIFC] Agent %s accumulated integrity tags from read: %v", a.AgentID, resource.Integrity.Label.GetTags()) - } + // No-op: automatic label accumulation is disabled + // Agent labels are immutable after session initialization } // Clone creates a copy of the agent labels diff --git a/internal/difc/difc_test.go b/internal/difc/difc_test.go index 775c3d9c..8ff37695 100644 --- a/internal/difc/difc_test.go +++ b/internal/difc/difc_test.go @@ -404,17 +404,25 @@ func TestAgentRegistry(t *testing.T) { assert.True(t, agent2.Secrecy.Label.Contains("secret"), "Expected agent to retain added tags") }) - t.Run("AccumulateFromRead updates agent labels", func(t *testing.T) { + t.Run("AccumulateFromRead is a no-op (labels immutable after init)", func(t *testing.T) { agent := registry.GetOrCreate("agent-3") + // Record initial state + initialSecrecyCount := len(agent.Secrecy.Label.GetTags()) + initialIntegrityCount := len(agent.Integrity.Label.GetTags()) + resource := NewLabeledResource("data-source") resource.Secrecy.Label.Add("confidential") resource.Integrity.Label.Add("verified") + // AccumulateFromRead should be a no-op agent.AccumulateFromRead(resource) - assert.True(t, agent.Secrecy.Label.Contains("confidential"), "Expected agent to gain secrecy tag from read") - assert.True(t, agent.Integrity.Label.Contains("verified"), "Expected agent to gain integrity tag from read") + // Labels should be unchanged + assert.Equal(t, initialSecrecyCount, len(agent.Secrecy.Label.GetTags()), "Secrecy labels should not change") + assert.Equal(t, initialIntegrityCount, len(agent.Integrity.Label.GetTags()), "Integrity labels should not change") + assert.False(t, agent.Secrecy.Label.Contains("confidential"), "Agent should NOT gain secrecy tag from read") + assert.False(t, agent.Integrity.Label.Contains("verified"), "Agent should NOT gain integrity tag from read") }) } diff --git a/internal/difc/evaluator.go b/internal/difc/evaluator.go index b6802773..e03ab3df 100644 --- a/internal/difc/evaluator.go +++ b/internal/difc/evaluator.go @@ -254,6 +254,7 @@ func (e *Evaluator) FilterCollection( Filtered: []LabeledItem{}, TotalCount: len(collection.Items), FilterReason: "DIFC policy", + mcpWrapper: collection.mcpWrapper, // Propagate MCP wrapper for rewrapping } for _, item := range collection.Items { diff --git a/internal/difc/path_labels.go b/internal/difc/path_labels.go new file mode 100644 index 00000000..51c8b014 --- /dev/null +++ b/internal/difc/path_labels.go @@ -0,0 +1,313 @@ +package difc + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" +) + +// PathLabels represents a collection of labeled paths in a JSON response. +// Guards return this structure to indicate which elements in the response +// have specific DIFC labels, without copying the data itself. +// +// Example guard response: +// +// { +// "labeled_paths": [ +// { "path": "/items/0", "labels": { "secrecy": ["public"], "integrity": ["untrusted"] } }, +// { "path": "/items/1", "labels": { "secrecy": ["repo_private"], "integrity": ["github_verified"] } } +// ], +// "default_labels": { "secrecy": ["public"], "integrity": ["untrusted"] } +// } +type PathLabels struct { + // LabeledPaths maps JSON Pointer paths (RFC 6901) to their labels + LabeledPaths []PathLabel `json:"labeled_paths"` + + // DefaultLabels are applied to elements not matched by any path + // If nil, unmatched elements inherit the resource-level labels + DefaultLabels *PathLabelEntry `json:"default_labels,omitempty"` + + // ItemsPath specifies where the collection items are located (e.g., "/items", "" for root array) + // This helps the gateway understand the structure for filtering + ItemsPath string `json:"items_path,omitempty"` +} + +// PathLabel associates a JSON Pointer path with DIFC labels +type PathLabel struct { + // Path is a JSON Pointer (RFC 6901) to the element + // Examples: "/items/0", "/results/5", "/data/users/0" + Path string `json:"path"` + + // Labels for this path + Labels PathLabelEntry `json:"labels"` +} + +// PathLabelEntry contains the DIFC labels for a path +type PathLabelEntry struct { + Description string `json:"description,omitempty"` + Secrecy []string `json:"secrecy"` + Integrity []string `json:"integrity"` +} + +// PathLabeledData implements LabeledData for path-based labels. +// It combines the original response data with path labels from the guard. +type PathLabeledData struct { + // OriginalData is the unmodified response from the backend + OriginalData interface{} + + // PathLabels contains the guard's labeling decisions + PathLabels *PathLabels + + // resolvedItems caches the resolved items with their labels + resolvedItems []LabeledItem + resolved bool +} + +// NewPathLabeledData creates a new PathLabeledData from the original response and path labels +func NewPathLabeledData(originalData interface{}, pathLabels *PathLabels) (*PathLabeledData, error) { + pld := &PathLabeledData{ + OriginalData: originalData, + PathLabels: pathLabels, + } + + // Resolve items eagerly to catch any path resolution errors + if err := pld.resolve(); err != nil { + return nil, fmt.Errorf("failed to resolve path labels: %w", err) + } + + return pld, nil +} + +// resolve applies path labels to the original data +func (p *PathLabeledData) resolve() error { + if p.resolved { + return nil + } + + // Get the items array from the original data + items, err := p.getItems() + if err != nil { + return err + } + + if items == nil { + // No collection to label, treat as single item + p.resolvedItems = []LabeledItem{{ + Data: p.OriginalData, + Labels: p.pathEntryToResource(p.PathLabels.DefaultLabels), + }} + p.resolved = true + return nil + } + + // Create a map of index -> labels for quick lookup + indexLabels := make(map[int]*PathLabelEntry) + for _, pl := range p.PathLabels.LabeledPaths { + idx, err := p.extractIndexFromPath(pl.Path, p.PathLabels.ItemsPath) + if err != nil { + // Path doesn't match items pattern, skip + continue + } + entry := pl.Labels // Create a copy + indexLabels[idx] = &entry + } + + // Build labeled items + p.resolvedItems = make([]LabeledItem, len(items)) + for i, item := range items { + labels := indexLabels[i] + if labels == nil { + labels = p.PathLabels.DefaultLabels + } + + p.resolvedItems[i] = LabeledItem{ + Data: item, + Labels: p.pathEntryToResource(labels), + } + } + + p.resolved = true + return nil +} + +// getItems extracts the items array from the original data based on ItemsPath +func (p *PathLabeledData) getItems() ([]interface{}, error) { + if p.PathLabels.ItemsPath == "" { + // Root-level array + if arr, ok := p.OriginalData.([]interface{}); ok { + return arr, nil + } + // Not an array, return nil (single item) + return nil, nil + } + + // Navigate to the items path + current := p.OriginalData + parts := splitJSONPointer(p.PathLabels.ItemsPath) + + for _, part := range parts { + if part == "" { + continue + } + + switch v := current.(type) { + case map[string]interface{}: + var ok bool + current, ok = v[part] + if !ok { + return nil, fmt.Errorf("path %q not found in response", p.PathLabels.ItemsPath) + } + case []interface{}: + idx, err := strconv.Atoi(part) + if err != nil { + return nil, fmt.Errorf("expected array index at %q, got %q", p.PathLabels.ItemsPath, part) + } + if idx < 0 || idx >= len(v) { + return nil, fmt.Errorf("array index %d out of bounds", idx) + } + current = v[idx] + default: + return nil, fmt.Errorf("cannot navigate path %q: unexpected type at %q", p.PathLabels.ItemsPath, part) + } + } + + if arr, ok := current.([]interface{}); ok { + return arr, nil + } + + return nil, fmt.Errorf("items_path %q does not point to an array", p.PathLabels.ItemsPath) +} + +// extractIndexFromPath extracts the array index from a JSON Pointer path +// For example, "/items/5" with itemsPath "/items" returns 5 +func (p *PathLabeledData) extractIndexFromPath(path, itemsPath string) (int, error) { + // Normalize paths + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + if itemsPath != "" && !strings.HasPrefix(itemsPath, "/") { + itemsPath = "/" + itemsPath + } + + // Check if path starts with itemsPath + var remainder string + if itemsPath == "" { + remainder = path + } else if strings.HasPrefix(path, itemsPath+"/") { + remainder = strings.TrimPrefix(path, itemsPath) + } else if strings.HasPrefix(path, itemsPath) && len(path) > len(itemsPath) { + remainder = path[len(itemsPath):] + } else { + return -1, fmt.Errorf("path %q does not match items path %q", path, itemsPath) + } + + // Extract the index (first segment after itemsPath) + parts := splitJSONPointer(remainder) + if len(parts) == 0 { + return -1, fmt.Errorf("no index in path %q", path) + } + + idx, err := strconv.Atoi(parts[0]) + if err != nil { + return -1, fmt.Errorf("expected array index in path %q, got %q", path, parts[0]) + } + + return idx, nil +} + +// pathEntryToResource converts a PathLabelEntry to a LabeledResource +func (p *PathLabeledData) pathEntryToResource(entry *PathLabelEntry) *LabeledResource { + if entry == nil { + // Return empty labels if no entry + return NewLabeledResource("unlabeled") + } + + resource := NewLabeledResource(entry.Description) + + for _, s := range entry.Secrecy { + resource.Secrecy.Label.Add(Tag(s)) + } + + for _, i := range entry.Integrity { + resource.Integrity.Label.Add(Tag(i)) + } + + return resource +} + +// splitJSONPointer splits a JSON Pointer path into segments +// Handles RFC 6901 escaping (~0 = ~, ~1 = /) +func splitJSONPointer(path string) []string { + if path == "" || path == "/" { + return nil + } + + // Remove leading slash + path = strings.TrimPrefix(path, "/") + + parts := strings.Split(path, "/") + result := make([]string, len(parts)) + + for i, part := range parts { + // Unescape JSON Pointer special characters + part = strings.ReplaceAll(part, "~1", "/") + part = strings.ReplaceAll(part, "~0", "~") + result[i] = part + } + + return result +} + +// Overall returns the aggregate labels for all items +func (p *PathLabeledData) Overall() *LabeledResource { + if !p.resolved { + _ = p.resolve() + } + + if len(p.resolvedItems) == 0 { + return NewLabeledResource("empty path-labeled data") + } + + overall := NewLabeledResource("path-labeled collection") + for _, item := range p.resolvedItems { + if item.Labels != nil { + overall.Secrecy.Label.Union(item.Labels.Secrecy.Label) + overall.Integrity.Label.Union(item.Labels.Integrity.Label) + } + } + + return overall +} + +// ToResult returns the original data (path labels don't modify the data structure) +func (p *PathLabeledData) ToResult() (interface{}, error) { + return p.OriginalData, nil +} + +// GetItems returns the resolved labeled items for filtering +func (p *PathLabeledData) GetItems() []LabeledItem { + if !p.resolved { + _ = p.resolve() + } + return p.resolvedItems +} + +// ToCollectionLabeledData converts to CollectionLabeledData for compatibility with existing filtering +func (p *PathLabeledData) ToCollectionLabeledData() *CollectionLabeledData { + if !p.resolved { + _ = p.resolve() + } + return &CollectionLabeledData{ + Items: p.resolvedItems, + } +} + +// ParsePathLabels parses a JSON response from a guard into PathLabels +func ParsePathLabels(data []byte) (*PathLabels, error) { + var pl PathLabels + if err := json.Unmarshal(data, &pl); err != nil { + return nil, fmt.Errorf("failed to parse path labels: %w", err) + } + return &pl, nil +} diff --git a/internal/difc/path_labels_test.go b/internal/difc/path_labels_test.go new file mode 100644 index 00000000..b4c1bf9c --- /dev/null +++ b/internal/difc/path_labels_test.go @@ -0,0 +1,469 @@ +package difc + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSplitJSONPointer(t *testing.T) { + tests := []struct { + name string + path string + expected []string + }{ + { + name: "empty path", + path: "", + expected: nil, + }, + { + name: "root path", + path: "/", + expected: nil, + }, + { + name: "simple path", + path: "/items", + expected: []string{"items"}, + }, + { + name: "nested path", + path: "/items/0", + expected: []string{"items", "0"}, + }, + { + name: "deeply nested path", + path: "/results/data/users/5", + expected: []string{"results", "data", "users", "5"}, + }, + { + name: "escaped tilde", + path: "/foo~0bar", + expected: []string{"foo~bar"}, + }, + { + name: "escaped slash", + path: "/foo~1bar", + expected: []string{"foo/bar"}, + }, + { + name: "multiple escapes", + path: "/~0~1test", + expected: []string{"~/test"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := splitJSONPointer(tt.path) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestPathLabeledData_SimpleArray(t *testing.T) { + // Original response is a simple array + originalData := []interface{}{ + map[string]interface{}{"id": 1, "name": "public-item"}, + map[string]interface{}{"id": 2, "name": "private-item"}, + map[string]interface{}{"id": 3, "name": "another-public"}, + } + + pathLabels := &PathLabels{ + ItemsPath: "", // Root-level array + LabeledPaths: []PathLabel{ + { + Path: "/0", + Labels: PathLabelEntry{ + Description: "Public item #1", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + }, + { + Path: "/1", + Labels: PathLabelEntry{ + Description: "Private item #2", + Secrecy: []string{"repo_private"}, + Integrity: []string{"github_verified"}, + }, + }, + { + Path: "/2", + Labels: PathLabelEntry{ + Description: "Public item #3", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + }, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + // Check that items were resolved + items := pld.GetItems() + require.Len(t, items, 3) + + // Check first item labels + assert.Equal(t, 1, items[0].Data.(map[string]interface{})["id"]) + assert.True(t, items[0].Labels.Secrecy.Label.Contains(Tag("public"))) + assert.True(t, items[0].Labels.Integrity.Label.Contains(Tag("untrusted"))) + + // Check second item labels (private) + assert.Equal(t, 2, items[1].Data.(map[string]interface{})["id"]) + assert.True(t, items[1].Labels.Secrecy.Label.Contains(Tag("repo_private"))) + assert.True(t, items[1].Labels.Integrity.Label.Contains(Tag("github_verified"))) + + // Check overall labels (should be union of all) + overall := pld.Overall() + assert.True(t, overall.Secrecy.Label.Contains(Tag("public"))) + assert.True(t, overall.Secrecy.Label.Contains(Tag("repo_private"))) + + // Check ToResult returns original data unchanged + result, err := pld.ToResult() + require.NoError(t, err) + assert.Equal(t, originalData, result) +} + +func TestPathLabeledData_NestedItems(t *testing.T) { + // Original response has items in a nested path (simulate JSON unmarshaling) + originalDataJSON := `{ + "total_count": 2, + "items": [ + {"number": 42, "title": "Bug report"}, + {"number": 43, "title": "Feature request"} + ] + }` + var originalData interface{} + require.NoError(t, json.Unmarshal([]byte(originalDataJSON), &originalData)) + + pathLabels := &PathLabels{ + ItemsPath: "/items", + LabeledPaths: []PathLabel{ + { + Path: "/items/0", + Labels: PathLabelEntry{ + Description: "Issue #42", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + }, + { + Path: "/items/1", + Labels: PathLabelEntry{ + Description: "Issue #43", + Secrecy: []string{"repo_private"}, + Integrity: []string{"github_verified"}, + }, + }, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + items := pld.GetItems() + require.Len(t, items, 2) + + // Check labels were correctly applied + assert.Equal(t, float64(42), items[0].Data.(map[string]interface{})["number"]) + assert.True(t, items[0].Labels.Secrecy.Label.Contains(Tag("public"))) + + assert.Equal(t, float64(43), items[1].Data.(map[string]interface{})["number"]) + assert.True(t, items[1].Labels.Secrecy.Label.Contains(Tag("repo_private"))) +} + +func TestPathLabeledData_DefaultLabels(t *testing.T) { + // Some items have explicit labels, others use defaults + originalData := []interface{}{ + map[string]interface{}{"id": 1}, + map[string]interface{}{"id": 2}, + map[string]interface{}{"id": 3}, + } + + pathLabels := &PathLabels{ + ItemsPath: "", + LabeledPaths: []PathLabel{ + { + Path: "/1", // Only the second item has explicit labels + Labels: PathLabelEntry{ + Description: "Special item", + Secrecy: []string{"secret"}, + Integrity: []string{"verified"}, + }, + }, + }, + DefaultLabels: &PathLabelEntry{ + Description: "Default item", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + items := pld.GetItems() + require.Len(t, items, 3) + + // Item 0 should have default labels + assert.True(t, items[0].Labels.Secrecy.Label.Contains(Tag("public"))) + + // Item 1 should have explicit labels + assert.True(t, items[1].Labels.Secrecy.Label.Contains(Tag("secret"))) + assert.False(t, items[1].Labels.Secrecy.Label.Contains(Tag("public"))) + + // Item 2 should have default labels + assert.True(t, items[2].Labels.Secrecy.Label.Contains(Tag("public"))) +} + +func TestPathLabeledData_SingleObject(t *testing.T) { + // Response is a single object, not a collection + originalData := map[string]interface{}{ + "number": 42, + "title": "Bug report", + "private": false, + } + + pathLabels := &PathLabels{ + ItemsPath: "", // No items path - single object + LabeledPaths: nil, + DefaultLabels: &PathLabelEntry{ + Description: "Single issue", + Secrecy: []string{"public"}, + Integrity: []string{"github_verified"}, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + // Should have exactly one item (the whole object) + items := pld.GetItems() + require.Len(t, items, 1) + + assert.Equal(t, originalData, items[0].Data) + assert.True(t, items[0].Labels.Secrecy.Label.Contains(Tag("public"))) +} + +func TestPathLabeledData_ToCollectionLabeledData(t *testing.T) { + originalData := []interface{}{ + map[string]interface{}{"id": 1}, + map[string]interface{}{"id": 2}, + } + + pathLabels := &PathLabels{ + ItemsPath: "", + LabeledPaths: []PathLabel{ + {Path: "/0", Labels: PathLabelEntry{Secrecy: []string{"public"}, Integrity: []string{"untrusted"}}}, + {Path: "/1", Labels: PathLabelEntry{Secrecy: []string{"private"}, Integrity: []string{"verified"}}}, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + // Convert to CollectionLabeledData for compatibility + collection := pld.ToCollectionLabeledData() + require.NotNil(t, collection) + require.Len(t, collection.Items, 2) + + assert.True(t, collection.Items[0].Labels.Secrecy.Label.Contains(Tag("public"))) + assert.True(t, collection.Items[1].Labels.Secrecy.Label.Contains(Tag("private"))) +} + +func TestParsePathLabels(t *testing.T) { + jsonData := `{ + "items_path": "/items", + "labeled_paths": [ + { + "path": "/items/0", + "labels": { + "description": "First item", + "secrecy": ["public"], + "integrity": ["untrusted"] + } + }, + { + "path": "/items/1", + "labels": { + "description": "Second item", + "secrecy": ["repo_private"], + "integrity": ["github_verified"] + } + } + ], + "default_labels": { + "description": "Default", + "secrecy": ["public"], + "integrity": ["untrusted"] + } + }` + + pl, err := ParsePathLabels([]byte(jsonData)) + require.NoError(t, err) + + assert.Equal(t, "/items", pl.ItemsPath) + require.Len(t, pl.LabeledPaths, 2) + assert.Equal(t, "/items/0", pl.LabeledPaths[0].Path) + assert.Equal(t, []string{"public"}, pl.LabeledPaths[0].Labels.Secrecy) + + require.NotNil(t, pl.DefaultLabels) + assert.Equal(t, []string{"public"}, pl.DefaultLabels.Secrecy) +} + +func TestPathLabeledData_GitHubSearchIssuesExample(t *testing.T) { + // Realistic GitHub search_issues response + originalDataJSON := `{ + "total_count": 3, + "incomplete_results": false, + "items": [ + { + "number": 1, + "title": "Public bug report", + "repository": {"full_name": "octocat/hello-world", "private": false} + }, + { + "number": 2, + "title": "Private security issue", + "repository": {"full_name": "corp/internal-tools", "private": true} + }, + { + "number": 3, + "title": "Another public issue", + "repository": {"full_name": "octocat/hello-world", "private": false} + } + ] + }` + + var originalData interface{} + require.NoError(t, json.Unmarshal([]byte(originalDataJSON), &originalData)) + + // Guard returns path labels based on repo visibility + pathLabels := &PathLabels{ + ItemsPath: "/items", + LabeledPaths: []PathLabel{ + { + Path: "/items/0", + Labels: PathLabelEntry{ + Description: "Issue #1 in octocat/hello-world", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + }, + { + Path: "/items/1", + Labels: PathLabelEntry{ + Description: "Issue #2 in corp/internal-tools", + Secrecy: []string{"repo:corp/internal-tools"}, + Integrity: []string{"github_verified"}, + }, + }, + { + Path: "/items/2", + Labels: PathLabelEntry{ + Description: "Issue #3 in octocat/hello-world", + Secrecy: []string{"public"}, + Integrity: []string{"untrusted"}, + }, + }, + }, + } + + pld, err := NewPathLabeledData(originalData, pathLabels) + require.NoError(t, err) + + items := pld.GetItems() + require.Len(t, items, 3) + + // First item - public + assert.True(t, items[0].Labels.Secrecy.Label.Contains(Tag("public"))) + + // Second item - private repo + assert.True(t, items[1].Labels.Secrecy.Label.Contains(Tag("repo:corp/internal-tools"))) + assert.False(t, items[1].Labels.Secrecy.Label.Contains(Tag("public"))) + + // Third item - public + assert.True(t, items[2].Labels.Secrecy.Label.Contains(Tag("public"))) + + // Overall should contain all tags + overall := pld.Overall() + assert.True(t, overall.Secrecy.Label.Contains(Tag("public"))) + assert.True(t, overall.Secrecy.Label.Contains(Tag("repo:corp/internal-tools"))) + + // Can convert to CollectionLabeledData for filtering + collection := pld.ToCollectionLabeledData() + require.Len(t, collection.Items, 3) +} + +func TestExtractIndexFromPath(t *testing.T) { + pld := &PathLabeledData{} + + tests := []struct { + name string + path string + itemsPath string + wantIdx int + wantErr bool + }{ + { + name: "root array index", + path: "/0", + itemsPath: "", + wantIdx: 0, + wantErr: false, + }, + { + name: "nested items array", + path: "/items/5", + itemsPath: "/items", + wantIdx: 5, + wantErr: false, + }, + { + name: "deeply nested", + path: "/results/data/10", + itemsPath: "/results/data", + wantIdx: 10, + wantErr: false, + }, + { + name: "path without leading slash", + path: "items/3", + itemsPath: "items", + wantIdx: 3, + wantErr: false, + }, + { + name: "non-numeric index", + path: "/items/foo", + itemsPath: "/items", + wantIdx: -1, + wantErr: true, + }, + { + name: "mismatched path", + path: "/other/0", + itemsPath: "/items", + wantIdx: -1, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + idx, err := pld.extractIndexFromPath(tt.path, tt.itemsPath) + if tt.wantErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.wantIdx, idx) + } + }) + } +} diff --git a/internal/difc/resource.go b/internal/difc/resource.go index cc84ad19..cf3c3769 100644 --- a/internal/difc/resource.go +++ b/internal/difc/resource.go @@ -1,5 +1,7 @@ package difc +import "encoding/json" + // Resource represents an external system with label requirements (deprecated - use LabeledResource) type Resource struct { Description string @@ -93,6 +95,10 @@ func (s *SimpleLabeledData) ToResult() (interface{}, error) { // CollectionLabeledData represents a collection where each item has its own labels type CollectionLabeledData struct { Items []LabeledItem + + // mcpWrapper stores the original MCP response structure for rewrapping + // When the guard returns labeled items, we need to rewrap them in MCP format + mcpWrapper interface{} } // LabeledItem represents a single item in a collection with its labels @@ -101,6 +107,11 @@ type LabeledItem struct { Labels *LabeledResource } +// SetMCPWrapper stores the original MCP response structure for rewrapping +func (c *CollectionLabeledData) SetMCPWrapper(wrapper interface{}) { + c.mcpWrapper = wrapper +} + func (c *CollectionLabeledData) Overall() *LabeledResource { // Aggregate labels from all items - most restrictive if len(c.Items) == 0 { @@ -126,15 +137,48 @@ func (c *CollectionLabeledData) ToResult() (interface{}, error) { for _, item := range c.Items { result = append(result, item.Data) } + + // If we have an original MCP wrapper, rewrap the result + if c.mcpWrapper != nil { + return rewrapAsMCP(result) + } + return result, nil } +// rewrapAsMCP wraps the items back in MCP content format +func rewrapAsMCP(items interface{}) (map[string]interface{}, error) { + // Serialize the items to JSON string + itemsJSON, err := json.Marshal(items) + if err != nil { + return nil, err + } + + // Reconstruct MCP format + return map[string]interface{}{ + "content": []interface{}{ + map[string]interface{}{ + "type": "text", + "text": string(itemsJSON), + }, + }, + }, nil +} + // FilteredCollectionLabeledData represents a collection with some items filtered out type FilteredCollectionLabeledData struct { Accessible []LabeledItem Filtered []LabeledItem TotalCount int FilterReason string + + // mcpWrapper stores the original MCP response structure for rewrapping + mcpWrapper interface{} +} + +// SetMCPWrapper stores the original MCP response structure for rewrapping +func (f *FilteredCollectionLabeledData) SetMCPWrapper(wrapper interface{}) { + f.mcpWrapper = wrapper } func (f *FilteredCollectionLabeledData) Overall() *LabeledResource { @@ -160,6 +204,37 @@ func (f *FilteredCollectionLabeledData) ToResult() (interface{}, error) { for _, item := range f.Accessible { result = append(result, item.Data) } + + // If we have an original MCP wrapper, rewrap the result + if f.mcpWrapper != nil { + return rewrapAsMCP(result) + } + + // No mcpWrapper - handle special cases for MCP compatibility + + // If no items are accessible, return an empty MCP response + // This happens when filtering removes all items from a single-object response + if len(result) == 0 { + return map[string]interface{}{ + "content": []interface{}{ + map[string]interface{}{ + "type": "text", + "text": "[]", + }, + }, + }, nil + } + + // Single item that's already MCP-formatted - return it directly + // This handles responses where unwrapMCPResponse failed (e.g., multi-content with resource types) + if len(result) == 1 { + if itemMap, ok := result[0].(map[string]interface{}); ok { + if _, hasContent := itemMap["content"]; hasContent { + return result[0], nil + } + } + } + return result, nil } diff --git a/internal/difc/resource_test.go b/internal/difc/resource_test.go new file mode 100644 index 00000000..867d57f9 --- /dev/null +++ b/internal/difc/resource_test.go @@ -0,0 +1,161 @@ +package difc + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFilteredCollectionLabeledData_ToResult(t *testing.T) { + t.Run("empty filtered collection returns empty MCP response", func(t *testing.T) { + // This is the key test case: + // - Guard labels a single object with labels that should be filtered + // - Filtering removes the item (e.g., integrity violation) + // - We need to return a proper empty MCP response, not a bare array + filtered := &FilteredCollectionLabeledData{ + Accessible: []LabeledItem{}, + Filtered: []LabeledItem{{Data: "filtered item", Labels: nil}}, + TotalCount: 1, + FilterReason: "DIFC policy - integrity violation", + mcpWrapper: nil, // No wrapper because unwrapMCPResponse failed + } + + result, err := filtered.ToResult() + require.NoError(t, err) + + // Should return empty MCP response, not bare array + resultMap, ok := result.(map[string]interface{}) + require.True(t, ok, "Result should be a map (MCP format), not a bare array") + + content, ok := resultMap["content"].([]interface{}) + require.True(t, ok, "Result should have content array") + assert.Len(t, content, 1, "Content should have 1 item") + + firstContent, ok := content[0].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "text", firstContent["type"]) + assert.Equal(t, "[]", firstContent["text"]) + }) + + t.Run("single MCP-formatted item without mcpWrapper", func(t *testing.T) { + // Handles responses where unwrapMCPResponse failed (e.g., multi-content with resource types) + // and the guard returns the full MCP response as item.Data + mcpResponse := map[string]interface{}{ + "content": []interface{}{ + map[string]interface{}{ + "type": "text", + "text": "successfully downloaded file", + }, + map[string]interface{}{ + "type": "resource", + "resource": map[string]interface{}{ + "uri": "repo://owner/repo/file", + "mimeType": "text/plain", + "text": "file contents", + }, + }, + }, + } + + filtered := &FilteredCollectionLabeledData{ + Accessible: []LabeledItem{ + { + Data: mcpResponse, + Labels: NewLabeledResource("test resource"), + }, + }, + Filtered: []LabeledItem{}, + TotalCount: 1, + FilterReason: "test", + mcpWrapper: nil, + } + + result, err := filtered.ToResult() + require.NoError(t, err) + + // Should return the MCP response directly, not wrapped in an array + resultMap, ok := result.(map[string]interface{}) + require.True(t, ok, "Result should be a map, not an array") + + content, ok := resultMap["content"].([]interface{}) + require.True(t, ok, "Result should have content array") + assert.Len(t, content, 2, "Content should have 2 items") + }) + + t.Run("single non-MCP item returns array", func(t *testing.T) { + // Regular case: single item that's not MCP-formatted + filtered := &FilteredCollectionLabeledData{ + Accessible: []LabeledItem{ + { + Data: map[string]interface{}{"name": "test", "value": 123}, + Labels: NewLabeledResource("test"), + }, + }, + Filtered: []LabeledItem{}, + TotalCount: 1, + FilterReason: "test", + mcpWrapper: nil, + } + + result, err := filtered.ToResult() + require.NoError(t, err) + + // Non-MCP item should still return as array (for compatibility) + resultArr, ok := result.([]interface{}) + require.True(t, ok, "Result should be an array for non-MCP data") + assert.Len(t, resultArr, 1) + }) + + t.Run("with mcpWrapper uses rewrapAsMCP", func(t *testing.T) { + // When mcpWrapper is set, we use the normal rewrapping logic + filtered := &FilteredCollectionLabeledData{ + Accessible: []LabeledItem{ + {Data: map[string]interface{}{"name": "item1"}, Labels: nil}, + {Data: map[string]interface{}{"name": "item2"}, Labels: nil}, + }, + Filtered: []LabeledItem{}, + TotalCount: 2, + FilterReason: "test", + mcpWrapper: map[string]interface{}{"original": "wrapper"}, + } + + result, err := filtered.ToResult() + require.NoError(t, err) + + // Should be MCP-formatted + resultMap, ok := result.(map[string]interface{}) + require.True(t, ok, "Result should be MCP-formatted") + + content, ok := resultMap["content"].([]interface{}) + require.True(t, ok) + assert.Len(t, content, 1) + }) + + t.Run("empty with mcpWrapper returns empty MCP array", func(t *testing.T) { + // When mcpWrapper is set and all items are filtered + filtered := &FilteredCollectionLabeledData{ + Accessible: []LabeledItem{}, + Filtered: []LabeledItem{{Data: "filtered", Labels: nil}}, + TotalCount: 1, + FilterReason: "DIFC policy", + mcpWrapper: map[string]interface{}{"original": "wrapper"}, + } + + result, err := filtered.ToResult() + require.NoError(t, err) + + // Should be MCP-formatted with empty array in text + resultMap, ok := result.(map[string]interface{}) + require.True(t, ok, "Result should be MCP-formatted") + + content, ok := resultMap["content"].([]interface{}) + require.True(t, ok) + assert.Len(t, content, 1) + + firstContent, ok := content[0].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "text", firstContent["type"]) + assert.Equal(t, "[]", firstContent["text"]) + }) +} diff --git a/internal/guard/context.go b/internal/guard/context.go index a6382dd5..90fe4094 100644 --- a/internal/guard/context.go +++ b/internal/guard/context.go @@ -24,6 +24,7 @@ import ( "context" "github.com/github/gh-aw-mcpg/internal/logger" + "github.com/github/gh-aw-mcpg/internal/mcp" ) var log = logger.New("guard:context") @@ -33,20 +34,36 @@ type ContextKey string const ( // AgentIDContextKey stores the agent ID in the request context + // Deprecated: Use session ID from mcp.SessionIDContextKey instead. + // This key is kept for backward compatibility but is no longer set in production. AgentIDContextKey ContextKey = "difc-agent-id" // RequestStateContextKey stores guard-specific request state RequestStateContextKey ContextKey = "difc-request-state" ) -// GetAgentIDFromContext extracts the agent ID from the context -// Returns "default" if not found +// GetAgentIDFromContext extracts the agent/session ID from the context. +// For DIFC purposes, the session ID (from Authorization header) is used as the agent ID. +// This ensures each session has its own DIFC labels. +// +// Lookup order: +// 1. AgentIDContextKey (for explicit agent ID, rarely used) +// 2. mcp.SessionIDContextKey (session ID from Authorization header) +// 3. "default" as fallback func GetAgentIDFromContext(ctx context.Context) string { + // First check for explicit agent ID (backward compatibility) if agentID, ok := ctx.Value(AgentIDContextKey).(string); ok && agentID != "" { - log.Printf("Retrieved agent ID from context: %s", agentID) + log.Printf("Retrieved explicit agent ID from context: %s", agentID) return agentID } - log.Print("Agent ID not found in context, returning default") + + // Fall back to session ID (the common case in production) + if sessionID, ok := ctx.Value(mcp.SessionIDContextKey).(string); ok && sessionID != "" { + log.Printf("Using session ID as agent ID: %s", sessionID) + return sessionID + } + + log.Print("No agent/session ID found in context, returning default") return "default" } diff --git a/internal/guard/guard_test.go b/internal/guard/guard_test.go index 521504d1..e9f78247 100644 --- a/internal/guard/guard_test.go +++ b/internal/guard/guard_test.go @@ -10,6 +10,7 @@ import ( "github.com/github/gh-aw-mcpg/internal/auth" "github.com/github/gh-aw-mcpg/internal/difc" + "github.com/github/gh-aw-mcpg/internal/mcp" ) // mockGuard is a simple guard implementation for testing that can be distinguished by ID @@ -542,6 +543,25 @@ func TestContextHelpers(t *testing.T) { assert.Equal(t, "default", agentID, "Should return default for wrong type") }) + t.Run("GetAgentIDFromContext falls back to session ID", func(t *testing.T) { + ctx := context.Background() + // Set session ID (this is what routed.go does in production) + ctx = context.WithValue(ctx, mcp.SessionIDContextKey, "session-123") + + agentID := GetAgentIDFromContext(ctx) + assert.Equal(t, "session-123", agentID) + }) + + t.Run("GetAgentIDFromContext prefers explicit agent ID over session ID", func(t *testing.T) { + ctx := context.Background() + // Set both session ID and explicit agent ID + ctx = context.WithValue(ctx, mcp.SessionIDContextKey, "session-123") + ctx = SetAgentIDInContext(ctx, "explicit-agent") + + agentID := GetAgentIDFromContext(ctx) + assert.Equal(t, "explicit-agent", agentID) + }) + t.Run("auth.ExtractAgentID Bearer", func(t *testing.T) { agentID := auth.ExtractAgentID("Bearer test-token-123") assert.Equal(t, "test-token-123", agentID) diff --git a/internal/guard/loader.go b/internal/guard/loader.go new file mode 100644 index 00000000..b0a5d881 --- /dev/null +++ b/internal/guard/loader.go @@ -0,0 +1,267 @@ +package guard + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/github/gh-aw-mcpg/internal/logger" +) + +var logLoader = logger.New("guard:loader") + +// LoaderConfig contains configuration for loading WASM guards +type LoaderConfig struct { + // Path is the local filesystem path to the WASM file (mutually exclusive with URL) + Path string + + // URL is the remote URL to download the WASM file from (mutually exclusive with Path) + URL string + + // SHA256 is the expected SHA256 checksum (required when URL is set) + SHA256 string + + // CacheDir is the directory to cache downloaded WASM files + // If empty, uses system temp directory + CacheDir string + + // HTTPTimeout is the timeout for HTTP requests (default: 60s) + HTTPTimeout time.Duration + + // GitHubToken is an optional GitHub token for private repository access + // Can be set via GITHUB_TOKEN environment variable + GitHubToken string +} + +// LoadResult contains the result of loading a WASM guard +type LoadResult struct { + // WASMBytes contains the loaded WASM binary + WASMBytes []byte + + // Source indicates where the WASM was loaded from + // Either "file", "cache", or "url" + Source string + + // CachedPath is the path where the WASM is cached (only set for URL loads) + CachedPath string +} + +// Load loads a WASM guard from either a local path or a remote URL +func Load(ctx context.Context, cfg LoaderConfig) (*LoadResult, error) { + // Set defaults + if cfg.HTTPTimeout == 0 { + cfg.HTTPTimeout = 60 * time.Second + } + + // Check for GitHub token in environment if not provided + if cfg.GitHubToken == "" { + cfg.GitHubToken = os.Getenv("GITHUB_TOKEN") + } + + // Validate configuration + hasPath := cfg.Path != "" + hasURL := cfg.URL != "" + + if !hasPath && !hasURL { + return nil, fmt.Errorf("either path or url is required") + } + if hasPath && hasURL { + return nil, fmt.Errorf("path and url are mutually exclusive") + } + if hasURL && cfg.SHA256 == "" { + return nil, fmt.Errorf("sha256 is required when using url") + } + + // Load from path or URL + if hasPath { + return loadFromPath(cfg.Path) + } + return loadFromURL(ctx, cfg) +} + +// loadFromPath loads a WASM file from the local filesystem +func loadFromPath(path string) (*LoadResult, error) { + logLoader.Printf("Loading WASM from file: %s", path) + + wasmBytes, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read WASM file: %w", err) + } + + logLoader.Printf("Loaded WASM from file: %s (%d bytes)", path, len(wasmBytes)) + return &LoadResult{ + WASMBytes: wasmBytes, + Source: "file", + }, nil +} + +// loadFromURL loads a WASM file from a remote URL, with caching and verification +func loadFromURL(ctx context.Context, cfg LoaderConfig) (*LoadResult, error) { + logLoader.Printf("Loading WASM from URL: %s", cfg.URL) + + // Determine cache directory + cacheDir := cfg.CacheDir + if cacheDir == "" { + cacheDir = filepath.Join(os.TempDir(), "mcp-gateway", "guards") + } + + // Create cache directory if needed + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create cache directory: %w", err) + } + + // Generate cache filename based on SHA256 + cacheFile := filepath.Join(cacheDir, cfg.SHA256+".wasm") + + // Check if cached file exists and is valid + if wasmBytes, err := loadFromCache(cacheFile, cfg.SHA256); err == nil { + logLoader.Printf("Loaded WASM from cache: %s", cacheFile) + return &LoadResult{ + WASMBytes: wasmBytes, + Source: "cache", + CachedPath: cacheFile, + }, nil + } + + // Download from URL + wasmBytes, err := downloadWASM(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("failed to download WASM: %w", err) + } + + // Verify checksum + if err := verifyChecksum(wasmBytes, cfg.SHA256); err != nil { + return nil, err + } + + // Cache the downloaded file + if err := os.WriteFile(cacheFile, wasmBytes, 0644); err != nil { + logLoader.Printf("Warning: failed to cache WASM file: %v", err) + // Continue without caching + } else { + logLoader.Printf("Cached WASM file: %s", cacheFile) + } + + logLoader.Printf("Downloaded and verified WASM from URL: %s (%d bytes)", cfg.URL, len(wasmBytes)) + return &LoadResult{ + WASMBytes: wasmBytes, + Source: "url", + CachedPath: cacheFile, + }, nil +} + +// loadFromCache attempts to load a WASM file from cache and verify its checksum +func loadFromCache(cacheFile string, expectedSHA256 string) ([]byte, error) { + wasmBytes, err := os.ReadFile(cacheFile) + if err != nil { + return nil, err + } + + // Verify checksum + if err := verifyChecksum(wasmBytes, expectedSHA256); err != nil { + // Cached file is corrupted, remove it + os.Remove(cacheFile) + return nil, err + } + + return wasmBytes, nil +} + +// downloadWASM downloads a WASM file from a URL +func downloadWASM(ctx context.Context, cfg LoaderConfig) ([]byte, error) { + // Create HTTP client with timeout + client := &http.Client{ + Timeout: cfg.HTTPTimeout, + } + + // Create request + req, err := http.NewRequestWithContext(ctx, http.MethodGet, cfg.URL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + // Add GitHub token for private repositories + if cfg.GitHubToken != "" && isGitHubURL(cfg.URL) { + req.Header.Set("Authorization", "token "+cfg.GitHubToken) + // GitHub API requires Accept header for release assets + if strings.Contains(cfg.URL, "/releases/download/") { + req.Header.Set("Accept", "application/octet-stream") + } + } + + // Set user agent + req.Header.Set("User-Agent", "mcp-gateway-guard-loader") + + // Execute request + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to download: %w", err) + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("download failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Read response body with size limit (100MB) + const maxSize = 100 * 1024 * 1024 + limitedReader := io.LimitReader(resp.Body, maxSize) + wasmBytes, err := io.ReadAll(limitedReader) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + return wasmBytes, nil +} + +// verifyChecksum verifies the SHA256 checksum of WASM bytes +func verifyChecksum(wasmBytes []byte, expectedSHA256 string) error { + // Normalize expected checksum (lowercase, no spaces) + expectedSHA256 = strings.ToLower(strings.TrimSpace(expectedSHA256)) + + // Calculate actual checksum + hash := sha256.Sum256(wasmBytes) + actualSHA256 := hex.EncodeToString(hash[:]) + + if actualSHA256 != expectedSHA256 { + return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actualSHA256) + } + + logLoader.Printf("Checksum verified: %s", actualSHA256) + return nil +} + +// isGitHubURL checks if a URL is a GitHub URL +func isGitHubURL(url string) bool { + return strings.Contains(url, "github.com") || strings.Contains(url, "githubusercontent.com") +} + +// ClearCache removes cached WASM files +func ClearCache(cacheDir string) error { + if cacheDir == "" { + cacheDir = filepath.Join(os.TempDir(), "mcp-gateway", "guards") + } + + files, err := filepath.Glob(filepath.Join(cacheDir, "*.wasm")) + if err != nil { + return fmt.Errorf("failed to list cache files: %w", err) + } + + for _, file := range files { + if err := os.Remove(file); err != nil { + logLoader.Printf("Warning: failed to remove cache file %s: %v", file, err) + } + } + + logLoader.Printf("Cleared %d cached WASM files from %s", len(files), cacheDir) + return nil +} diff --git a/internal/guard/loader_test.go b/internal/guard/loader_test.go new file mode 100644 index 00000000..39fbc5dc --- /dev/null +++ b/internal/guard/loader_test.go @@ -0,0 +1,316 @@ +package guard + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Sample WASM bytes (minimal valid WASM module: magic number + version) +var minimalWASM = []byte{0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00} + +func sha256Hex(data []byte) string { + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +func TestLoad_FromPath(t *testing.T) { + // Create a temporary WASM file + tmpDir := t.TempDir() + wasmPath := filepath.Join(tmpDir, "test.wasm") + err := os.WriteFile(wasmPath, minimalWASM, 0644) + require.NoError(t, err) + + // Load from path + result, err := Load(context.Background(), LoaderConfig{ + Path: wasmPath, + }) + + require.NoError(t, err) + assert.Equal(t, minimalWASM, result.WASMBytes) + assert.Equal(t, "file", result.Source) + assert.Empty(t, result.CachedPath) +} + +func TestLoad_FromPath_NotFound(t *testing.T) { + result, err := Load(context.Background(), LoaderConfig{ + Path: "/nonexistent/path/guard.wasm", + }) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "failed to read WASM file") +} + +func TestLoad_Validation_NoPathOrURL(t *testing.T) { + result, err := Load(context.Background(), LoaderConfig{}) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "either path or url is required") +} + +func TestLoad_Validation_BothPathAndURL(t *testing.T) { + result, err := Load(context.Background(), LoaderConfig{ + Path: "/some/path", + URL: "https://example.com/guard.wasm", + SHA256: sha256Hex(minimalWASM), + }) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "path and url are mutually exclusive") +} + +func TestLoad_Validation_URLWithoutSHA256(t *testing.T) { + result, err := Load(context.Background(), LoaderConfig{ + URL: "https://example.com/guard.wasm", + }) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "sha256 is required when using url") +} + +func TestLoad_FromURL(t *testing.T) { + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/wasm") + w.Write(minimalWASM) + })) + defer server.Close() + + expectedSHA256 := sha256Hex(minimalWASM) + cacheDir := t.TempDir() + + // Load from URL + result, err := Load(context.Background(), LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: expectedSHA256, + CacheDir: cacheDir, + }) + + require.NoError(t, err) + assert.Equal(t, minimalWASM, result.WASMBytes) + assert.Equal(t, "url", result.Source) + assert.NotEmpty(t, result.CachedPath) + + // Verify cache file was created + _, err = os.Stat(result.CachedPath) + assert.NoError(t, err) +} + +func TestLoad_FromCache(t *testing.T) { + expectedSHA256 := sha256Hex(minimalWASM) + cacheDir := t.TempDir() + + // Pre-populate cache + cacheFile := filepath.Join(cacheDir, expectedSHA256+".wasm") + err := os.WriteFile(cacheFile, minimalWASM, 0644) + require.NoError(t, err) + + // Create server that should NOT be called + serverCalled := false + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + serverCalled = true + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + // Load - should use cache + result, err := Load(context.Background(), LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: expectedSHA256, + CacheDir: cacheDir, + }) + + require.NoError(t, err) + assert.Equal(t, minimalWASM, result.WASMBytes) + assert.Equal(t, "cache", result.Source) + assert.False(t, serverCalled, "server should not be called when cache is valid") +} + +func TestLoad_FromURL_ChecksumMismatch(t *testing.T) { + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/wasm") + w.Write(minimalWASM) + })) + defer server.Close() + + // Load with wrong checksum + result, err := Load(context.Background(), LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: "0000000000000000000000000000000000000000000000000000000000000000", + CacheDir: t.TempDir(), + }) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "checksum mismatch") +} + +func TestLoad_FromURL_ServerError(t *testing.T) { + // Create test server that returns error + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + w.Write([]byte("not found")) + })) + defer server.Close() + + result, err := Load(context.Background(), LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: sha256Hex(minimalWASM), + CacheDir: t.TempDir(), + }) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "404") +} + +func TestLoad_FromURL_Timeout(t *testing.T) { + // Create test server that sleeps + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(200 * time.Millisecond) + w.Write(minimalWASM) + })) + defer server.Close() + + // Use short timeout + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + result, err := Load(ctx, LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: sha256Hex(minimalWASM), + CacheDir: t.TempDir(), + HTTPTimeout: 50 * time.Millisecond, + }) + + assert.Error(t, err) + assert.Nil(t, result) +} + +func TestLoad_FromURL_WithGitHubToken(t *testing.T) { + var receivedAuth string + + // Create test server that checks for auth header + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + w.Write(minimalWASM) + })) + defer server.Close() + + // Note: The token is only sent for github.com URLs, so this test won't include it + // But we can still test the flow + result, err := Load(context.Background(), LoaderConfig{ + URL: server.URL + "/guard.wasm", + SHA256: sha256Hex(minimalWASM), + CacheDir: t.TempDir(), + GitHubToken: "test-token", + }) + + require.NoError(t, err) + assert.Equal(t, minimalWASM, result.WASMBytes) + // Token not sent because not a github.com URL + assert.Empty(t, receivedAuth) +} + +func TestVerifyChecksum(t *testing.T) { + tests := []struct { + name string + data []byte + checksum string + expectError bool + }{ + { + name: "valid checksum", + data: minimalWASM, + checksum: sha256Hex(minimalWASM), + expectError: false, + }, + { + name: "valid checksum uppercase", + data: minimalWASM, + checksum: strings.ToUpper(sha256Hex(minimalWASM)), + expectError: false, + }, + { + name: "valid checksum with spaces", + data: minimalWASM, + checksum: " " + sha256Hex(minimalWASM) + " ", + expectError: false, + }, + { + name: "invalid checksum", + data: minimalWASM, + checksum: "0000000000000000000000000000000000000000000000000000000000000000", + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := verifyChecksum(tt.data, tt.checksum) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestIsGitHubURL(t *testing.T) { + tests := []struct { + url string + expected bool + }{ + {"https://github.com/owner/repo/releases/download/v1.0.0/guard.wasm", true}, + {"https://api.github.com/repos/owner/repo/releases/assets/123", true}, + {"https://raw.githubusercontent.com/owner/repo/main/guard.wasm", true}, + {"https://example.com/guard.wasm", false}, + {"https://gitlab.com/owner/repo/guard.wasm", false}, + } + + for _, tt := range tests { + t.Run(tt.url, func(t *testing.T) { + result := isGitHubURL(tt.url) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestClearCache(t *testing.T) { + cacheDir := t.TempDir() + + // Create some cache files + for i := 0; i < 3; i++ { + cacheFile := filepath.Join(cacheDir, "test"+string(rune('0'+i))+".wasm") + err := os.WriteFile(cacheFile, minimalWASM, 0644) + require.NoError(t, err) + } + + // Verify files exist + files, _ := filepath.Glob(filepath.Join(cacheDir, "*.wasm")) + assert.Len(t, files, 3) + + // Clear cache + err := ClearCache(cacheDir) + require.NoError(t, err) + + // Verify files are deleted + files, _ = filepath.Glob(filepath.Join(cacheDir, "*.wasm")) + assert.Len(t, files, 0) +} diff --git a/internal/guard/wasm.go b/internal/guard/wasm.go new file mode 100644 index 00000000..30f55705 --- /dev/null +++ b/internal/guard/wasm.go @@ -0,0 +1,711 @@ +package guard + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "sync" + + "github.com/github/gh-aw-mcpg/internal/difc" + "github.com/github/gh-aw-mcpg/internal/logger" + "github.com/tetratelabs/wazero" + "github.com/tetratelabs/wazero/api" + "github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1" +) + +var logWasm = logger.New("guard:wasm") + +// WasmGuardOptions configures optional settings for WASM guard creation +type WasmGuardOptions struct { + // Stdout is the writer for WASM stdout output. Defaults to os.Stdout if nil. + Stdout io.Writer + // Stderr is the writer for WASM stderr output. Defaults to os.Stderr if nil. + Stderr io.Writer +} + +// WasmGuard implements Guard interface by executing a WASM module in-process +// The WASM module runs sandboxed within the gateway using wazero runtime +// Guards cannot make direct network calls - they receive a BackendCaller interface via host functions +// +// Thread Safety: WASM modules are single-threaded, so all calls to a guard instance +// are serialized using a mutex. Concurrent requests will queue and execute one at a time. +type WasmGuard struct { + name string + runtime wazero.Runtime + module api.Module + + // Backend caller provided to the guard via host functions + backend BackendCaller + ctx context.Context + + // mu serializes all calls to the WASM module + // WASM modules are single-threaded and cannot handle concurrent calls + mu sync.Mutex +} + +// NewWasmGuard creates a new WASM guard from a WASM binary file +func NewWasmGuard(ctx context.Context, name string, wasmPath string, backend BackendCaller) (*WasmGuard, error) { + logWasm.Printf("Creating WASM guard: name=%s, path=%s", name, wasmPath) + + // Read WASM binary + wasmBytes, err := os.ReadFile(wasmPath) + if err != nil { + return nil, fmt.Errorf("failed to read WASM file: %w", err) + } + + return NewWasmGuardFromBytes(ctx, name, wasmBytes, backend) +} + +// NewWasmGuardFromBytes creates a new WASM guard from WASM binary bytes +// This is useful when loading guards from URLs or other sources +func NewWasmGuardFromBytes(ctx context.Context, name string, wasmBytes []byte, backend BackendCaller) (*WasmGuard, error) { + return NewWasmGuardWithOptions(ctx, name, wasmBytes, backend, nil) +} + +// NewWasmGuardWithOptions creates a new WASM guard from WASM binary bytes with custom options +// Options can be nil to use defaults (stdout/stderr go to os.Stdout/os.Stderr) +func NewWasmGuardWithOptions(ctx context.Context, name string, wasmBytes []byte, backend BackendCaller, opts *WasmGuardOptions) (*WasmGuard, error) { + logWasm.Printf("Creating WASM guard from bytes: name=%s, size=%d", name, len(wasmBytes)) + + // Create WASM runtime + runtime := wazero.NewRuntime(ctx) + + // Instantiate WASI + if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil { + runtime.Close(ctx) + return nil, fmt.Errorf("failed to instantiate WASI: %w", err) + } + + guard := &WasmGuard{ + name: name, + runtime: runtime, + backend: backend, + ctx: ctx, + } + + // Create host functions for the guard to call + if err := guard.instantiateHostFunctions(ctx); err != nil { + runtime.Close(ctx) + return nil, fmt.Errorf("failed to instantiate host functions: %w", err) + } + + // Configure module options with stdout/stderr + moduleConfig := wazero.NewModuleConfig().WithName("guard").WithStartFunctions() + if opts != nil { + if opts.Stdout != nil { + moduleConfig = moduleConfig.WithStdout(opts.Stdout) + } + if opts.Stderr != nil { + moduleConfig = moduleConfig.WithStderr(opts.Stderr) + } + } + + // Compile and instantiate the WASM module + module, err := runtime.InstantiateWithConfig(ctx, wasmBytes, moduleConfig) + if err != nil { + runtime.Close(ctx) + return nil, fmt.Errorf("failed to instantiate WASM module: %w", err) + } + + guard.module = module + + // Verify required functions are exported + labelResourceFn := module.ExportedFunction("label_resource") + labelResponseFn := module.ExportedFunction("label_response") + + if labelResourceFn == nil || labelResponseFn == nil { + runtime.Close(ctx) + + // Check if this was compiled with standard Go (only _start is exported) + if module.ExportedFunction("_start") != nil && labelResourceFn == nil { + return nil, fmt.Errorf("WASM module does not export guard functions. " + + "This usually means the guard was compiled with standard Go instead of TinyGo. " + + "TinyGo is required for proper function exports. " + + "Note: TinyGo 0.34 supports Go 1.19-1.23 (not yet compatible with Go 1.25). " + + "See examples/guards/sample-guard/README.md for details") + } + + return nil, fmt.Errorf("WASM module must export label_resource and label_response functions") + } + + logWasm.Printf("WASM guard created successfully: name=%s", name) + return guard, nil +} + +// instantiateHostFunctions creates the host functions that the WASM module can call +func (g *WasmGuard) instantiateHostFunctions(ctx context.Context) error { + // Create a host module with functions the guard can call + _, err := g.runtime.NewHostModuleBuilder("env"). + // call_backend: allows guards to call MCP tools on the backend + NewFunctionBuilder(). + WithGoModuleFunction(api.GoModuleFunc(g.hostCallBackend), []api.ValueType{ + api.ValueTypeI32, // ptr to tool name + api.ValueTypeI32, // tool name length + api.ValueTypeI32, // ptr to args JSON + api.ValueTypeI32, // args length + api.ValueTypeI32, // ptr to result buffer + api.ValueTypeI32, // result buffer size + }, []api.ValueType{api.ValueTypeI32}). // returns result length or negative error + Export("call_backend"). + // host_log: allows guards to send log messages to the gateway + NewFunctionBuilder(). + WithGoModuleFunction(api.GoModuleFunc(g.hostLog), []api.ValueType{ + api.ValueTypeI32, // log level (0=debug, 1=info, 2=warn, 3=error) + api.ValueTypeI32, // ptr to message + api.ValueTypeI32, // message length + }, []api.ValueType{}). + Export("host_log"). + Instantiate(ctx) + + return err +} + +// hostCallBackend is called by the WASM module to make backend MCP calls +func (g *WasmGuard) hostCallBackend(ctx context.Context, m api.Module, stack []uint64) { + toolNamePtr := uint32(stack[0]) + toolNameLen := uint32(stack[1]) + argsPtr := uint32(stack[2]) + argsLen := uint32(stack[3]) + resultPtr := uint32(stack[4]) + resultSize := uint32(stack[5]) + + // Helper to set error return value + setError := func() { + stack[0] = uint64(^uint32(0)) // Max uint32 represents error + } + + // Read tool name from WASM memory + toolNameBytes, ok := m.Memory().Read(toolNamePtr, toolNameLen) + if !ok { + setError() + return + } + toolName := string(toolNameBytes) + + // Read args JSON from WASM memory + argsBytes, ok := m.Memory().Read(argsPtr, argsLen) + if !ok { + setError() + return + } + + // Parse args + var args interface{} + if len(argsBytes) > 0 { + if err := json.Unmarshal(argsBytes, &args); err != nil { + logWasm.Printf("Failed to unmarshal backend call args: %v", err) + setError() + return + } + } + + logWasm.Printf("WASM guard calling backend: tool=%s", toolName) + + // Call backend + result, err := g.backend.CallTool(ctx, toolName, args) + if err != nil { + logWasm.Printf("Backend call failed: %v", err) + setError() + return + } + + // Marshal result to JSON + resultJSON, err := json.Marshal(result) + if err != nil { + logWasm.Printf("Failed to marshal backend result: %v", err) + setError() + return + } + + // Check if result fits in buffer + if uint32(len(resultJSON)) > resultSize { + logWasm.Printf("Result too large: %d > %d", len(resultJSON), resultSize) + setError() + return + } + + // Write result to WASM memory + if !m.Memory().Write(resultPtr, resultJSON) { + logWasm.Printf("Failed to write result to WASM memory") + setError() + return + } + + // Return result length + stack[0] = uint64(uint32(len(resultJSON))) +} + +// Log level constants for hostLog +const ( + logLevelDebug = 0 + logLevelInfo = 1 + logLevelWarn = 2 + logLevelError = 3 +) + +// hostLog is called by the WASM module to send log messages to the gateway +func (g *WasmGuard) hostLog(ctx context.Context, m api.Module, stack []uint64) { + level := uint32(stack[0]) + msgPtr := uint32(stack[1]) + msgLen := uint32(stack[2]) + + // Read message from WASM memory + msgBytes, ok := m.Memory().Read(msgPtr, msgLen) + if !ok { + logWasm.Printf("hostLog: failed to read message from WASM memory") + return + } + msg := string(msgBytes) + + // Log at the appropriate level + prefix := fmt.Sprintf("[guard:%s] ", g.name) + switch level { + case logLevelDebug: + logWasm.Printf("%sDEBUG: %s", prefix, msg) + case logLevelInfo: + logWasm.Printf("%sINFO: %s", prefix, msg) + case logLevelWarn: + logWasm.Printf("%sWARN: %s", prefix, msg) + case logLevelError: + logWasm.Printf("%sERROR: %s", prefix, msg) + default: + logWasm.Printf("%s%s", prefix, msg) + } +} + +// Name returns the identifier for this guard +func (g *WasmGuard) Name() string { + return g.name +} + +// LabelResource calls the WASM module's label_resource function +func (g *WasmGuard) LabelResource(ctx context.Context, toolName string, args interface{}, backend BackendCaller, caps *difc.Capabilities) (*difc.LabeledResource, difc.OperationType, error) { + logWasm.Printf("LabelResource called: toolName=%s", toolName) + + // Serialize access to the WASM module + g.mu.Lock() + defer g.mu.Unlock() + + // Update backend caller for this request + g.backend = backend + + // Prepare input + input := map[string]interface{}{ + "tool_name": toolName, + "tool_args": args, + } + if caps != nil { + input["capabilities"] = caps + } + + inputJSON, err := json.Marshal(input) + if err != nil { + return nil, difc.OperationWrite, fmt.Errorf("failed to marshal input: %w", err) + } + + // Call WASM function + resultJSON, err := g.callWasmFunction("label_resource", inputJSON) + if err != nil { + return nil, difc.OperationWrite, err + } + + // Parse result + var response map[string]interface{} + if err := json.Unmarshal(resultJSON, &response); err != nil { + return nil, difc.OperationWrite, fmt.Errorf("failed to unmarshal WASM response: %w", err) + } + + return parseResourceResponse(response) +} + +// LabelResponse calls the WASM module's label_response function +func (g *WasmGuard) LabelResponse(ctx context.Context, toolName string, result interface{}, backend BackendCaller, caps *difc.Capabilities) (difc.LabeledData, error) { + logWasm.Printf("LabelResponse called: toolName=%s", toolName) + + // Serialize access to the WASM module + g.mu.Lock() + defer g.mu.Unlock() + + // Update backend caller for this request + g.backend = backend + + // Extract the actual response from MCP wrapper if present + // MCP responses are wrapped as: {"content":[{"type":"text","text":"{...actual JSON...}"}]} + unwrappedResult, wasMCPWrapped := unwrapMCPResponse(result) + + // Prepare input with unwrapped result + input := map[string]interface{}{ + "tool_name": toolName, + "tool_result": unwrappedResult, // Pass unwrapped data to guard + } + if caps != nil { + input["capabilities"] = caps + } + + inputJSON, err := json.Marshal(input) + if err != nil { + return nil, fmt.Errorf("failed to marshal input: %w", err) + } + + // Call WASM function + resultJSON, err := g.callWasmFunction("label_response", inputJSON) + if err != nil { + return nil, err + } + + // If empty result, return nil (no fine-grained labeling) + if len(resultJSON) == 0 { + return nil, nil + } + + // Parse result - check for new path-based format first + var responseMap map[string]interface{} + if err := json.Unmarshal(resultJSON, &responseMap); err != nil { + return nil, fmt.Errorf("failed to unmarshal WASM response: %w", err) + } + + // Check for path-based labeling format (preferred, more efficient) + if _, hasLabeledPaths := responseMap["labeled_paths"]; hasLabeledPaths { + labeledData, err := parsePathLabeledResponse(resultJSON, unwrappedResult) + if err != nil { + return nil, err + } + // Store MCP wrapper for rewrapping if needed + if wasMCPWrapped { + if collection, ok := labeledData.(*difc.CollectionLabeledData); ok { + collection.SetMCPWrapper(result) + } + } + return labeledData, nil + } + + // Legacy format: check if it's a collection with "items" + if items, ok := responseMap["items"].([]interface{}); ok && len(items) > 0 { + collection, err := parseCollectionLabeledData(items) + if err != nil { + return nil, err + } + // Store MCP wrapper for rewrapping if needed + if wasMCPWrapped { + collection.SetMCPWrapper(result) + } + return collection, nil + } + + // No fine-grained labeling + return nil, nil +} + +// unwrapMCPResponse extracts the actual JSON from MCP content wrapper +// MCP responses are wrapped as: {"content":[{"type":"text","text":"{...actual JSON...}"}]} +// Returns (unwrapped data, true) if MCP wrapped, or (original, false) if not +func unwrapMCPResponse(result interface{}) (interface{}, bool) { + resultMap, ok := result.(map[string]interface{}) + if !ok { + return result, false + } + + content, ok := resultMap["content"].([]interface{}) + if !ok || len(content) == 0 { + return result, false + } + + firstContent, ok := content[0].(map[string]interface{}) + if !ok { + return result, false + } + + // Check if this is a text content type + contentType, _ := firstContent["type"].(string) + if contentType != "text" { + return result, false + } + + textStr, ok := firstContent["text"].(string) + if !ok { + return result, false + } + + // Parse the JSON string inside text + var parsed interface{} + if err := json.Unmarshal([]byte(textStr), &parsed); err != nil { + // Not valid JSON inside text, return original + return result, false + } + + logWasm.Printf("Unwrapped MCP response for guard processing") + return parsed, true +} + +// parsePathLabeledResponse parses the new path-based labeling format +// This is more efficient as guards don't need to copy data, just return paths and labels +func parsePathLabeledResponse(responseJSON []byte, originalData interface{}) (difc.LabeledData, error) { + pathLabels, err := difc.ParsePathLabels(responseJSON) + if err != nil { + return nil, fmt.Errorf("failed to parse path labels: %w", err) + } + + pld, err := difc.NewPathLabeledData(originalData, pathLabels) + if err != nil { + return nil, fmt.Errorf("failed to apply path labels: %w", err) + } + + // Convert to CollectionLabeledData for compatibility with existing filtering + return pld.ToCollectionLabeledData(), nil +} + +// callWasmFunction calls an exported function in the WASM module +func (g *WasmGuard) callWasmFunction(funcName string, inputJSON []byte) ([]byte, error) { + fn := g.module.ExportedFunction(funcName) + if fn == nil { + return nil, fmt.Errorf("function %s not exported from WASM module", funcName) + } + + mem := g.module.Memory() + if mem == nil { + return nil, fmt.Errorf("WASM module has no memory") + } + + // Start with 4MB output buffer, can grow up to 16MB if needed + initialOutputSize := uint32(4 * 1024 * 1024) // 4MB initial + maxOutputSize := uint32(16 * 1024 * 1024) // 16MB maximum + maxInputSize := uint32(8 * 1024 * 1024) // 8MB max input + + if uint32(len(inputJSON)) > maxInputSize { + return nil, fmt.Errorf("input too large: %d bytes (max %d)", len(inputJSON), maxInputSize) + } + + // Try with initial buffer size, retry with larger buffer if needed + outputSize := initialOutputSize + const maxRetries = 3 + + for attempt := 0; attempt < maxRetries; attempt++ { + result, requiredSize, err := g.tryCallWasmFunction(fn, mem, inputJSON, outputSize) + if err != nil { + return nil, err + } + + // If we got a result, return it + if result != nil { + return result, nil + } + + // Buffer was too small, check if we can grow + if requiredSize == 0 { + // Guard didn't tell us the required size, double the buffer + requiredSize = outputSize * 2 + } + + if requiredSize > maxOutputSize { + return nil, fmt.Errorf("guard requires buffer of %d bytes which exceeds maximum of %d bytes", requiredSize, maxOutputSize) + } + + logWasm.Printf("Buffer too small (%d bytes), retrying with %d bytes", outputSize, requiredSize) + outputSize = requiredSize + } + + return nil, fmt.Errorf("failed after %d attempts, buffer size %d still insufficient", maxRetries, outputSize) +} + +// tryCallWasmFunction attempts to call the WASM function with the given buffer size +// Returns (result, 0, nil) on success +// Returns (nil, requiredSize, nil) if buffer was too small +// Returns (nil, 0, error) on actual error +func (g *WasmGuard) tryCallWasmFunction(fn api.Function, mem api.Memory, inputJSON []byte, outputSize uint32) ([]byte, uint32, error) { + // Ensure memory is large enough for our buffers + // Layout: [...guard memory...][input buffer][output buffer] + inputSize := uint32(len(inputJSON)) + requiredMemory := inputSize + outputSize + uint32(64*1024) // Extra 64KB for safety margin + + memSize := mem.Size() + if memSize < requiredMemory { + pages := (requiredMemory - memSize + 65535) / 65536 // Round up to pages + _, success := mem.Grow(pages) + if !success { + return nil, 0, fmt.Errorf("failed to grow WASM memory from %d to %d bytes", memSize, requiredMemory) + } + memSize = mem.Size() + } + + // Place buffers at end of memory + outputPtr := memSize - outputSize + inputPtr := outputPtr - inputSize + + // Write input to WASM memory + if !mem.Write(inputPtr, inputJSON) { + return nil, 0, fmt.Errorf("failed to write input to WASM memory") + } + + // Call the WASM function + results, err := fn.Call(g.ctx, + uint64(inputPtr), + uint64(inputSize), + uint64(outputPtr), + uint64(outputSize)) + if err != nil { + return nil, 0, fmt.Errorf("WASM function call failed: %w", err) + } + + // Check result + resultLen := int32(results[0]) + + // Error code -2 means "buffer too small" + // The guard can optionally return the required size in the output buffer as a uint32 + if resultLen == -2 { + // Try to read the required size from the output buffer (first 4 bytes as uint32) + if sizeBytes, ok := mem.Read(outputPtr, 4); ok && len(sizeBytes) == 4 { + requiredSize := uint32(sizeBytes[0]) | uint32(sizeBytes[1])<<8 | uint32(sizeBytes[2])<<16 | uint32(sizeBytes[3])<<24 + if requiredSize > 0 { + return nil, requiredSize, nil + } + } + // Guard didn't specify size, return 0 to trigger doubling + return nil, 0, nil + } + + // Other negative values are errors + if resultLen < 0 { + return nil, 0, fmt.Errorf("WASM function returned error code: %d", resultLen) + } + + if resultLen == 0 { + return []byte{}, 0, nil + } + + // Read output from WASM memory + outputJSON, ok := mem.Read(outputPtr, uint32(resultLen)) + if !ok { + return nil, 0, fmt.Errorf("failed to read output from WASM memory (len=%d)", resultLen) + } + + return outputJSON, 0, nil +} + +// parseResourceResponse converts guard response to LabeledResource +func parseResourceResponse(response map[string]interface{}) (*difc.LabeledResource, difc.OperationType, error) { + resourceData, ok := response["resource"].(map[string]interface{}) + if !ok { + return nil, difc.OperationWrite, fmt.Errorf("invalid resource format in guard response") + } + + resource := &difc.LabeledResource{} + + if desc, ok := resourceData["description"].(string); ok { + resource.Description = desc + } + + // Parse secrecy tags + if secrecy, ok := resourceData["secrecy"].([]interface{}); ok { + tags := make([]difc.Tag, 0, len(secrecy)) + for _, t := range secrecy { + if tagStr, ok := t.(string); ok { + tags = append(tags, difc.Tag(tagStr)) + } + } + resource.Secrecy = *difc.NewSecrecyLabelWithTags(tags) + } else { + resource.Secrecy = *difc.NewSecrecyLabel() + } + + // Parse integrity tags + if integrity, ok := resourceData["integrity"].([]interface{}); ok { + tags := make([]difc.Tag, 0, len(integrity)) + for _, t := range integrity { + if tagStr, ok := t.(string); ok { + tags = append(tags, difc.Tag(tagStr)) + } + } + resource.Integrity = *difc.NewIntegrityLabelWithTags(tags) + } else { + resource.Integrity = *difc.NewIntegrityLabel() + } + + // Parse operation type + operation := difc.OperationWrite // default to most restrictive + if opStr, ok := response["operation"].(string); ok { + switch opStr { + case "read": + operation = difc.OperationRead + case "write": + operation = difc.OperationWrite + case "read-write": + operation = difc.OperationReadWrite + } + } + + return resource, operation, nil +} + +// parseCollectionLabeledData converts an array of items to CollectionLabeledData +func parseCollectionLabeledData(items []interface{}) (*difc.CollectionLabeledData, error) { + collection := &difc.CollectionLabeledData{ + Items: make([]difc.LabeledItem, 0, len(items)), + } + + for _, item := range items { + itemMap, ok := item.(map[string]interface{}) + if !ok { + continue + } + + labeledItem := difc.LabeledItem{ + Data: itemMap["data"], + } + + // Parse labels + if labelsData, ok := itemMap["labels"].(map[string]interface{}); ok { + labels := &difc.LabeledResource{} + + if desc, ok := labelsData["description"].(string); ok { + labels.Description = desc + } + + // Parse secrecy tags + if secrecy, ok := labelsData["secrecy"].([]interface{}); ok { + tags := make([]difc.Tag, 0, len(secrecy)) + for _, t := range secrecy { + if tagStr, ok := t.(string); ok { + tags = append(tags, difc.Tag(tagStr)) + } + } + labels.Secrecy = *difc.NewSecrecyLabelWithTags(tags) + } else { + labels.Secrecy = *difc.NewSecrecyLabel() + } + + // Parse integrity tags + if integrity, ok := labelsData["integrity"].([]interface{}); ok { + tags := make([]difc.Tag, 0, len(integrity)) + for _, t := range integrity { + if tagStr, ok := t.(string); ok { + tags = append(tags, difc.Tag(tagStr)) + } + } + labels.Integrity = *difc.NewIntegrityLabelWithTags(tags) + } else { + labels.Integrity = *difc.NewIntegrityLabel() + } + + labeledItem.Labels = labels + } + + collection.Items = append(collection.Items, labeledItem) + } + + return collection, nil +} + +// Close releases WASM runtime resources +func (g *WasmGuard) Close(ctx context.Context) error { + if g.module != nil { + if err := g.module.Close(ctx); err != nil { + logWasm.Printf("Error closing module: %v", err) + } + } + if g.runtime != nil { + return g.runtime.Close(ctx) + } + return nil +} diff --git a/internal/mcp/connection.go b/internal/mcp/connection.go index 9189e9b3..2229a896 100644 --- a/internal/mcp/connection.go +++ b/internal/mcp/connection.go @@ -190,6 +190,12 @@ func NewConnection(ctx context.Context, command string, args []string, env map[s } } + // Capture stderr to help diagnose container failures + // The SDK's CommandTransport only uses stdin/stdout for MCP protocol, + // so we can capture stderr separately for debugging + var stderrBuf bytes.Buffer + cmd.Stderr = &stderrBuf + logger.LogInfo("backend", "Starting MCP backend server, command=%s, args=%v", command, sanitize.SanitizeArgs(expandedArgs)) log.Printf("Starting MCP server command: %s %v", command, sanitize.SanitizeArgs(expandedArgs)) transport := &sdk.CommandTransport{Command: cmd} @@ -208,6 +214,16 @@ func NewConnection(ctx context.Context, command string, args []string, env map[s log.Printf(" Args: %v", sanitize.SanitizeArgs(expandedArgs)) log.Printf(" Error: %v", err) + // Log captured stderr output from the container/process + stderrOutput := strings.TrimSpace(stderrBuf.String()) + if stderrOutput != "" { + logger.LogErrorMd("backend", "MCP backend stderr output:\n%s", stderrOutput) + log.Printf(" 📋 Container/Process stderr output:") + for _, line := range strings.Split(stderrOutput, "\n") { + log.Printf(" %s", line) + } + } + // Check if it's a command not found error if strings.Contains(err.Error(), "executable file not found") || strings.Contains(err.Error(), "no such file or directory") { diff --git a/internal/server/unified.go b/internal/server/unified.go index 05595db5..7df712dd 100644 --- a/internal/server/unified.go +++ b/internal/server/unified.go @@ -93,6 +93,7 @@ type UnifiedServer struct { capabilities *difc.Capabilities evaluator *difc.Evaluator enableDIFC bool // When true, DIFC enforcement and session requirement are enabled + difcFilter bool // When true, filters response data based on DIFC labels // Shutdown state tracking isShutdown bool @@ -105,7 +106,7 @@ type UnifiedServer struct { // NewUnified creates a new unified MCP server func NewUnified(ctx context.Context, cfg *config.Config) (*UnifiedServer, error) { - logUnified.Printf("Creating new unified server: enableDIFC=%v, sequentialLaunch=%v, servers=%d", cfg.EnableDIFC, cfg.SequentialLaunch, len(cfg.Servers)) + logUnified.Printf("Creating new unified server: enableDIFC=%v, difcFilter=%v, sequentialLaunch=%v, servers=%d, guards=%d", cfg.EnableDIFC, cfg.DIFCFilter, cfg.SequentialLaunch, len(cfg.Servers), len(cfg.Guards)) l := launcher.New(ctx, cfg) // Get payload directory from config, with fallback to default @@ -114,6 +115,20 @@ func NewUnified(ctx context.Context, cfg *config.Config) (*UnifiedServer, error) payloadDir = cfg.Gateway.PayloadDir } + // Build default session labels from config (per github-difc.md section 11.5) + var defaultSecrecy, defaultIntegrity []difc.Tag + if cfg.Gateway != nil && cfg.Gateway.Session != nil { + for _, s := range cfg.Gateway.Session.Secrecy { + defaultSecrecy = append(defaultSecrecy, difc.Tag(s)) + } + for _, i := range cfg.Gateway.Session.Integrity { + defaultIntegrity = append(defaultIntegrity, difc.Tag(i)) + } + if len(defaultSecrecy) > 0 || len(defaultIntegrity) > 0 { + logUnified.Printf("Session defaults: secrecy=%v, integrity=%v", defaultSecrecy, defaultIntegrity) + } + } + us := &UnifiedServer{ launcher: l, sysServer: sys.NewSysServer(l.ServerIDs()), @@ -123,12 +138,13 @@ func NewUnified(ctx context.Context, cfg *config.Config) (*UnifiedServer, error) sequentialLaunch: cfg.SequentialLaunch, payloadDir: payloadDir, - // Initialize DIFC components + // Initialize DIFC components with default session labels guardRegistry: guard.NewRegistry(), - agentRegistry: difc.NewAgentRegistry(), + agentRegistry: difc.NewAgentRegistryWithDefaults(defaultSecrecy, defaultIntegrity), capabilities: difc.NewCapabilities(), evaluator: difc.NewEvaluator(), enableDIFC: cfg.EnableDIFC, + difcFilter: cfg.DIFCFilter, } // Create MCP server @@ -141,7 +157,7 @@ func NewUnified(ctx context.Context, cfg *config.Config) (*UnifiedServer, error) // Register guards for all backends for _, serverID := range l.ServerIDs() { - us.registerGuard(serverID) + us.registerGuard(serverID, cfg) } // Register aggregated tools from all backends @@ -448,13 +464,13 @@ func (us *UnifiedServer) registerSysTools() error { us.toolsMu.Lock() us.tools["sys___init"] = &ToolInfo{ Name: "sys___init", - Description: "Initialize the MCPG system and get available MCP servers", + Description: "[DEPRECATED] Initialize the MCPG system. This tool is no longer required - sessions are automatically created from the Authorization header. Kept for backward compatibility only.", InputSchema: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "token": map[string]interface{}{ "type": "string", - "description": "Authentication token for session initialization (can be empty for first call)", + "description": "Authentication token for session initialization (ignored - session ID is extracted from Authorization header)", }, }, }, @@ -466,13 +482,13 @@ func (us *UnifiedServer) registerSysTools() error { // Register with SDK sdk.AddTool(us.server, &sdk.Tool{ Name: "sys___init", - Description: "Initialize the MCPG system and get available MCP servers", + Description: "[DEPRECATED] Initialize the MCPG system. This tool is no longer required - sessions are automatically created from the Authorization header. Kept for backward compatibility only.", InputSchema: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "token": map[string]interface{}{ "type": "string", - "description": "Authentication token for session initialization (can be empty for first call)", + "description": "Authentication token for session initialization (ignored - session ID is extracted from Authorization header)", }, }, }, @@ -533,13 +549,76 @@ func (us *UnifiedServer) registerSysTools() error { } // registerGuard registers a guard for a specific backend server -func (us *UnifiedServer) registerGuard(serverID string) { - // For now, use noop guards for all servers - // In the future, this will load guards based on configuration - // or use guard.CreateGuard() with a guard name from config - g := guard.NewNoopGuard() - us.guardRegistry.Register(serverID, g) - log.Printf("[DIFC] Registered guard '%s' for server '%s'", g.Name(), serverID) +func (us *UnifiedServer) registerGuard(serverID string, cfg *config.Config) { + // Check if server specifies a guard binding + serverCfg, ok := cfg.Servers[serverID] + if !ok || serverCfg.Guard == "" { + // No guard binding, use noop guard + g := guard.NewNoopGuard() + us.guardRegistry.Register(serverID, g) + log.Printf("[DIFC] Registered noop guard for server '%s'", serverID) + return + } + + guardID := serverCfg.Guard + + // Check if we have a guard configuration + guardCfg, ok := cfg.Guards[guardID] + if !ok { + // Guard not configured, use noop guard as fallback + log.Printf("[DIFC] Warning: guard '%s' specified for server '%s' but not configured, using noop guard", guardID, serverID) + g := guard.NewNoopGuard() + us.guardRegistry.Register(serverID, g) + return + } + + // Create appropriate guard based on type + switch guardCfg.Type { + case "wasm": + // Create backend caller for this guard + backendCaller := &guardBackendCaller{ + server: us, + serverID: serverID, + ctx: us.ctx, + } + + // Load WASM guard from path or URL + loadCfg := guard.LoaderConfig{ + Path: guardCfg.Path, + URL: guardCfg.URL, + SHA256: guardCfg.SHA256, + CacheDir: guardCfg.CacheDir, + } + + loadResult, err := guard.Load(us.ctx, loadCfg) + if err != nil { + log.Printf("[DIFC] Failed to load WASM guard '%s': %v, using noop guard", guardID, err) + g := guard.NewNoopGuard() + us.guardRegistry.Register(serverID, g) + return + } + + // Create WASM guard from loaded bytes + wasmGuard, err := guard.NewWasmGuardFromBytes(us.ctx, guardID, loadResult.WASMBytes, backendCaller) + if err != nil { + log.Printf("[DIFC] Failed to create WASM guard '%s': %v, using noop guard", guardID, err) + g := guard.NewNoopGuard() + us.guardRegistry.Register(serverID, g) + return + } + + us.guardRegistry.Register(serverID, wasmGuard) + if guardCfg.URL != "" { + log.Printf("[DIFC] Registered WASM guard '%s' for server '%s' (url: %s, source: %s)", guardID, serverID, guardCfg.URL, loadResult.Source) + } else { + log.Printf("[DIFC] Registered WASM guard '%s' for server '%s' (path: %s)", guardID, serverID, guardCfg.Path) + } + + default: + log.Printf("[DIFC] Warning: unsupported guard type '%s' for guard '%s', using noop guard", guardCfg.Type, guardID) + g := guard.NewNoopGuard() + us.guardRegistry.Register(serverID, g) + } } // guardBackendCaller implements guard.BackendCaller for guards to query backend metadata @@ -664,25 +743,35 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName resource.Description, operation, resource.Secrecy.Label.GetTags(), resource.Integrity.Label.GetTags()) // **Phase 2: Reference Monitor performs coarse-grained access check** - isWrite := (operation == difc.OperationWrite || operation == difc.OperationReadWrite) + // For read operations with filtering enabled, we skip the coarse-grained block + // and let the request proceed. Fine-grained filtering at Phase 5 will filter + // individual items from the response based on their labels. + isReadOperation := (operation == difc.OperationRead) result := us.evaluator.Evaluate(agentLabels.Secrecy, agentLabels.Integrity, resource, operation) if !result.IsAllowed() { - // Access denied - log and return detailed error - log.Printf("[DIFC] Access DENIED for agent %s to %s: %s", agentID, resource.Description, result.Reason) - detailedErr := difc.FormatViolationError(result, agentLabels.Secrecy, agentLabels.Integrity, resource) - return &sdk.CallToolResult{ - Content: []sdk.Content{ - &sdk.TextContent{ - Text: detailedErr.Error(), + if isReadOperation && us.difcFilter { + // Read operation with filtering enabled - skip coarse-grained block + // The guard will label response items and Phase 5 will filter them + log.Printf("[DIFC] Coarse-grained check failed for read, but filtering enabled - proceeding to backend") + log.Printf("[DIFC] Response items will be filtered at Phase 5 based on per-item labels") + } else { + // Write operation OR filtering disabled - block the request + log.Printf("[DIFC] Access DENIED for agent %s to %s: %s", agentID, resource.Description, result.Reason) + detailedErr := difc.FormatViolationError(result, agentLabels.Secrecy, agentLabels.Integrity, resource) + return &sdk.CallToolResult{ + Content: []sdk.Content{ + &sdk.TextContent{ + Text: detailedErr.Error(), + }, }, - }, - IsError: true, - }, nil, detailedErr + IsError: true, + }, nil, detailedErr + } + } else { + log.Printf("[DIFC] Access ALLOWED for agent %s to %s", agentID, resource.Description) } - log.Printf("[DIFC] Access ALLOWED for agent %s to %s", agentID, resource.Description) - // **Phase 3: Execute the backend call** // Get or launch backend connection (use session-aware connection for stateful backends) sessionID := us.getSessionID(ctx) @@ -745,23 +834,12 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName } } - // **Phase 6: Accumulate labels from this operation (for reads)** - if !isWrite { - overall := labeledData.Overall() - agentLabels.AccumulateFromRead(overall) - log.Printf("[DIFC] Agent %s accumulated labels | Secrecy: %v | Integrity: %v", - agentID, agentLabels.GetSecrecyTags(), agentLabels.GetIntegrityTags()) - } + // Note: Automatic label accumulation is disabled. + // Agent labels remain fixed at their initial session values. + // Future versions will support explicit primitives for label changes. } else { // No fine-grained labeling - use original backend result finalResult = backendResult - - // **Phase 6: Accumulate labels from resource (for reads)** - if !isWrite { - agentLabels.AccumulateFromRead(resource) - log.Printf("[DIFC] Agent %s accumulated labels | Secrecy: %v | Integrity: %v", - agentID, agentLabels.GetSecrecyTags(), agentLabels.GetIntegrityTags()) - } } // Convert finalResult to SDK CallToolResult format @@ -832,48 +910,36 @@ func (us *UnifiedServer) ensureSessionDirectory(sessionID string) error { } // requireSession checks that a session has been initialized for this request -// When DIFC is disabled (default), automatically creates a session if one doesn't exist +// Sessions are auto-created from the Authorization header for all modes. +// When DIFC is enabled, the session ID is used to track labels per-agent. func (us *UnifiedServer) requireSession(ctx context.Context) error { sessionID := us.getSessionID(ctx) log.Printf("Checking session for ID: %s", sessionID) - // If DIFC is disabled (default), use double-checked locking to auto-create session - if !us.enableDIFC { - us.sessionMu.RLock() - session := us.sessions[sessionID] - us.sessionMu.RUnlock() - - if session == nil { - // Need to create session - acquire write lock - us.sessionMu.Lock() - // Double-check after acquiring write lock to avoid race condition - if us.sessions[sessionID] == nil { - log.Printf("DIFC disabled: auto-creating session for ID: %s", sessionID) - us.sessions[sessionID] = NewSession(sessionID, "") - log.Printf("Session auto-created for ID: %s", sessionID) - - // Ensure session directory exists in payload mount point - // This is done after releasing the lock to avoid holding it during I/O - us.sessionMu.Unlock() - if err := us.ensureSessionDirectory(sessionID); err != nil { - logger.LogWarn("client", "Failed to create session directory for session=%s: %v", sessionID, err) - // Don't fail - payloads will attempt to create the directory when needed - } - return nil - } - us.sessionMu.Unlock() - } - return nil - } - - // DIFC is enabled - require explicit session initialization + // Use double-checked locking to auto-create session if needed us.sessionMu.RLock() session := us.sessions[sessionID] us.sessionMu.RUnlock() if session == nil { - log.Printf("Session not found for ID: %s. Available sessions: %v", sessionID, us.getSessionKeys()) - return fmt.Errorf("sys___init must be called before any other tool calls") + // Need to create session - acquire write lock + us.sessionMu.Lock() + // Double-check after acquiring write lock to avoid race condition + if us.sessions[sessionID] == nil { + log.Printf("Auto-creating session for ID: %s (DIFC enabled: %v)", sessionID, us.enableDIFC) + us.sessions[sessionID] = NewSession(sessionID, "") + log.Printf("Session auto-created for ID: %s", sessionID) + + // Ensure session directory exists in payload mount point + // This is done after releasing the lock to avoid holding it during I/O + us.sessionMu.Unlock() + if err := us.ensureSessionDirectory(sessionID); err != nil { + logger.LogWarn("client", "Failed to create session directory for session=%s: %v", sessionID, err) + // Don't fail - payloads will attempt to create the directory when needed + } + return nil + } + us.sessionMu.Unlock() } log.Printf("Session validated for ID: %s", sessionID) diff --git a/internal/server/unified_test.go b/internal/server/unified_test.go index 2e27d318..62d6efca 100644 --- a/internal/server/unified_test.go +++ b/internal/server/unified_test.go @@ -182,10 +182,17 @@ func TestRequireSession(t *testing.T) { err = us.requireSession(ctxWithSession) assert.NoError(t, err, "requireSession() failed for valid session") - // Test with invalid session (DIFC enabled) - ctxWithInvalidSession := context.WithValue(ctx, SessionIDContextKey, "invalid-session") - err = us.requireSession(ctxWithInvalidSession) - require.Error(t, err, "requireSession() should fail for invalid session when DIFC is enabled") + // Test with new session (DIFC enabled) - should auto-create session + ctxWithNewSession := context.WithValue(ctx, SessionIDContextKey, "new-session") + err = us.requireSession(ctxWithNewSession) + require.NoError(t, err, "requireSession() should auto-create session even when DIFC is enabled") + + // Verify session was created + us.sessionMu.RLock() + newSession, exists := us.sessions["new-session"] + us.sessionMu.RUnlock() + require.True(t, exists, "Session should have been auto-created") + require.NotNil(t, newSession, "Session should not be nil") } func TestRequireSession_DifcDisabled(t *testing.T) { @@ -432,8 +439,8 @@ func TestRequireSession_EdgeCases(t *testing.T) { enableDIFC: true, sessionID: "nonexistent", preCreate: false, - wantErr: true, - description: "should deny access to nonexistent session when DIFC enabled", + wantErr: false, + description: "should auto-create session even when DIFC enabled", }, { name: "DIFC disabled without session", @@ -565,3 +572,102 @@ func TestUnifiedServer_EnsureSessionDirectory(t *testing.T) { _, err = os.Stat(nestedPath) require.NoError(t, err, "Nested session directory should exist") } + +// TestCoarseGrainedCheckWithFiltering tests that read operations bypass the coarse-grained +// DIFC block when filtering is enabled, allowing fine-grained per-item filtering instead. +// This captures the behavior where: +// - For read operations with difcFilter=true: bypass coarse-grained block, proceed to backend +// - For write operations: always block if coarse-grained check fails (even with filtering enabled) +// - For read operations with difcFilter=false: block if coarse-grained check fails +func TestCoarseGrainedCheckWithFiltering(t *testing.T) { + tests := []struct { + name string + enableDIFC bool + difcFilter bool + isReadOperation bool + coarseGrainedCheckFail bool + expectBlockAtPhase2 bool + description string + }{ + { + name: "Read operation with filtering enabled, coarse check fails", + enableDIFC: true, + difcFilter: true, + isReadOperation: true, + coarseGrainedCheckFail: true, + expectBlockAtPhase2: false, // Should NOT block - let filtering handle it + description: "Read ops with filtering bypass coarse block for fine-grained filtering", + }, + { + name: "Read operation with filtering disabled, coarse check fails", + enableDIFC: true, + difcFilter: false, + isReadOperation: true, + coarseGrainedCheckFail: true, + expectBlockAtPhase2: true, // Should block - no filtering to fall back on + description: "Read ops without filtering must block when coarse check fails", + }, + { + name: "Write operation with filtering enabled, coarse check fails", + enableDIFC: true, + difcFilter: true, + isReadOperation: false, + coarseGrainedCheckFail: true, + expectBlockAtPhase2: true, // Should block - writes can't be filtered + description: "Write ops always block on coarse check failure (can't filter writes)", + }, + { + name: "Write operation with filtering disabled, coarse check fails", + enableDIFC: true, + difcFilter: false, + isReadOperation: false, + coarseGrainedCheckFail: true, + expectBlockAtPhase2: true, // Should block + description: "Write ops block when coarse check fails", + }, + { + name: "Read operation with filtering enabled, coarse check passes", + enableDIFC: true, + difcFilter: true, + isReadOperation: true, + coarseGrainedCheckFail: false, + expectBlockAtPhase2: false, // No block - check passed + description: "Read ops proceed when coarse check passes", + }, + { + name: "DIFC disabled, coarse check would fail", + enableDIFC: false, + difcFilter: true, + isReadOperation: true, + coarseGrainedCheckFail: true, + expectBlockAtPhase2: false, // No block - DIFC not enforced + description: "When DIFC is disabled, no blocking occurs", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := &config.Config{ + Servers: map[string]*config.ServerConfig{}, + EnableDIFC: tt.enableDIFC, + DIFCFilter: tt.difcFilter, + } + + ctx := context.Background() + us, err := NewUnified(ctx, cfg) + require.NoError(t, err, "NewUnified() failed") + defer us.Close() + + // Verify the unified server has correct configuration + assert.Equal(t, tt.enableDIFC, us.enableDIFC, "enableDIFC should match config") + assert.Equal(t, tt.difcFilter, us.difcFilter, "difcFilter should match config") + + // Test the decision logic that determines whether to block at Phase 2 + // This mimics the logic in unified.go callToolWithDIFC + shouldBypassBlock := tt.isReadOperation && tt.difcFilter + actualBlockAtPhase2 := tt.coarseGrainedCheckFail && !shouldBypassBlock + + assert.Equal(t, tt.expectBlockAtPhase2, actualBlockAtPhase2, tt.description) + }) + } +} diff --git a/scripts/echo-guard-demo.sh b/scripts/echo-guard-demo.sh new file mode 100755 index 00000000..c97e5d1d --- /dev/null +++ b/scripts/echo-guard-demo.sh @@ -0,0 +1,368 @@ +#!/bin/bash +# Echo Guard Demo Script +# Demonstrates the echo guard's output when processing MCP tool calls + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' # No Color + +# Get script directory and project root +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +ECHO_GUARD_DIR="$PROJECT_ROOT/examples/guards/echo-guard" +WASM_FILE="$ECHO_GUARD_DIR/guard.wasm" +DEMO_CONFIG="$ECHO_GUARD_DIR/demo-config.toml" +CODEX_CONFIG="$ECHO_GUARD_DIR/codex.config.toml" +GATEWAY_BINARY="$PROJECT_ROOT/awmg" +GATEWAY_PID="" + +print_header() { + echo "" + echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo -e "${BOLD}${BLUE} $1${NC}" + echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}" + echo "" +} + +print_step() { + echo -e "${CYAN}▶ $1${NC}" +} + +print_success() { + echo -e "${GREEN}✓ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠ $1${NC}" +} + +print_error() { + echo -e "${RED}✗ $1${NC}" +} + +cleanup() { + if [ -n "$GATEWAY_PID" ] && kill -0 "$GATEWAY_PID" 2>/dev/null; then + print_step "Stopping gateway (PID: $GATEWAY_PID)..." + kill "$GATEWAY_PID" 2>/dev/null || true + wait "$GATEWAY_PID" 2>/dev/null || true + print_success "Gateway stopped" + fi +} + +trap cleanup EXIT + +# Check for TinyGo +check_tinygo() { + if ! command -v tinygo &> /dev/null; then + print_error "TinyGo is not installed" + echo "" + echo "Install TinyGo from: https://tinygo.org/getting-started/install/" + echo "" + echo "On macOS:" + echo " brew install tinygo" + echo "" + echo "On Ubuntu/Debian:" + echo " wget https://github.com/tinygo-org/tinygo/releases/download/v0.34.0/tinygo_0.34.0_amd64.deb" + echo " sudo dpkg -i tinygo_0.34.0_amd64.deb" + exit 1 + fi + print_success "TinyGo found: $(tinygo version)" +} + +# Build the gateway binary +build_gateway() { + if [ ! -f "$GATEWAY_BINARY" ]; then + print_step "Building gateway binary..." + cd "$PROJECT_ROOT" + make build + print_success "Gateway binary built" + else + print_success "Gateway binary already exists" + fi +} + +# Build the echo guard +build_guard() { + print_header "Building Echo Guard" + + print_step "Checking TinyGo installation..." + check_tinygo + echo "" + + print_step "Building guard.wasm..." + cd "$ECHO_GUARD_DIR" + + # Try to find Go 1.23 for TinyGo compatibility + GO123="" + for cmd in go1.23 go1.23.4 go1.23.9; do + if command -v "$cmd" &> /dev/null; then + GO123="$cmd" + break + fi + done + + if [ -n "$GO123" ]; then + print_step "Using Go 1.23 for TinyGo compatibility: $GO123" + GOROOT=$("$GO123" env GOROOT) tinygo build -o guard.wasm -target=wasi main.go + else + print_warning "Go 1.23 not found, using default Go (may have compatibility issues)" + tinygo build -o guard.wasm -target=wasi main.go + fi + + if [ -f "$WASM_FILE" ]; then + SIZE=$(ls -lh "$WASM_FILE" | awk '{print $5}') + print_success "Built guard.wasm ($SIZE)" + else + print_error "Failed to build guard.wasm" + exit 1 + fi + + cd "$PROJECT_ROOT" +} + +# Run the demo using the Go test (quick mode) +run_test_demo() { + print_header "Running Echo Guard Test Demo" + + echo -e "${BOLD}The echo guard prints all inputs it receives from the gateway.${NC}" + echo -e "${BOLD}This is useful for debugging guard implementations.${NC}" + echo "" + + print_step "Running label_resource test (tool call interception)..." + echo "" + echo -e "${YELLOW}--- Expected output: Guard receives tool name, args, and capabilities ---${NC}" + echo "" + + # Run specific tests that show output + cd "$PROJECT_ROOT" + go test -v -run "TestEchoGuardLabelResourceOutput" ./test/integration/... 2>&1 | \ + sed -n '/Echo guard output:/,/=============================$/p' | \ + sed 's/^ //' + + echo "" + print_step "Running label_response test (response interception)..." + echo "" + echo -e "${YELLOW}--- Expected output: Guard receives tool result data ---${NC}" + echo "" + + go test -v -run "TestEchoGuardLabelResponseOutput" ./test/integration/... 2>&1 | \ + sed -n '/Echo guard output:/,/=============================$/p' | \ + sed 's/^ //' + + echo "" + print_success "Test demo complete!" +} + +# Run the gateway with echo guard (end-to-end mode) +run_gateway() { + print_header "Starting Gateway with Echo Guard" + + # Build gateway if needed + build_gateway + + # Check GitHub token + if [ -z "$GITHUB_PERSONAL_ACCESS_TOKEN" ]; then + print_warning "GITHUB_PERSONAL_ACCESS_TOKEN is not set" + echo " The GitHub server will not work without it." + echo " You can still test with the 'fetch' server." + echo "" + else + print_success "GITHUB_PERSONAL_ACCESS_TOKEN is set" + fi + + print_step "Starting gateway on http://127.0.0.1:8000..." + echo "" + + cd "$PROJECT_ROOT" + "$GATEWAY_BINARY" --config "$DEMO_CONFIG" +} + +# Run gateway in background and show instructions for Codex +run_gateway_with_codex() { + print_header "Echo Guard End-to-End Demo with Codex" + + # Build gateway if needed + build_gateway + + # Check GitHub token + if [ -z "$GITHUB_PERSONAL_ACCESS_TOKEN" ]; then + print_warning "GITHUB_PERSONAL_ACCESS_TOKEN is not set" + echo " Export it before running: export GITHUB_PERSONAL_ACCESS_TOKEN=ghp_..." + echo "" + else + print_success "GITHUB_PERSONAL_ACCESS_TOKEN is set" + fi + + print_step "Starting gateway in foreground on http://127.0.0.1:8000..." + echo "" + echo -e "${BOLD}${YELLOW}═══════════════════════════════════════════════════════════════${NC}" + echo -e "${BOLD}${YELLOW} INSTRUCTIONS${NC}" + echo -e "${BOLD}${YELLOW}═══════════════════════════════════════════════════════════════${NC}" + echo "" + echo "1. The gateway will start below with the echo guard attached to GitHub." + echo "" + echo "2. In another terminal, start Codex with the demo config:" + echo "" + echo -e " ${CYAN}export AGENT_ID=demo-key-12345${NC}" + echo -e " ${CYAN}codex --mcp-config $CODEX_CONFIG${NC}" + echo "" + echo "3. Ask Codex to use GitHub tools, for example:" + echo " - 'List the issues in octocat/Hello-World'" + echo " - 'What are the recent commits in github/docs?'" + echo "" + echo "4. Watch the gateway output below - you'll see the echo guard printing" + echo " the tool calls and responses as they flow through." + echo "" + echo "5. Press Ctrl-C to stop the gateway when done." + echo "" + echo -e "${BOLD}${YELLOW}═══════════════════════════════════════════════════════════════${NC}" + echo "" + + cd "$PROJECT_ROOT" + "$GATEWAY_BINARY" --config "$DEMO_CONFIG" +} + +# Interactive mode with tmux - shows gateway + instructions side by side +run_tmux_demo() { + print_header "Interactive Echo Guard Demo (tmux)" + + if ! command -v tmux &> /dev/null; then + print_error "tmux is not installed" + echo "Install with: brew install tmux (macOS) or apt install tmux (Linux)" + exit 1 + fi + + # Build gateway if needed + build_gateway + + SESSION="echo-guard-demo" + + # Kill existing session if it exists + tmux kill-session -t "$SESSION" 2>/dev/null || true + + # Create new session with gateway running + tmux new-session -d -s "$SESSION" -n "gateway" + + # Main pane: Gateway output + tmux send-keys -t "$SESSION:0.0" "cd $PROJECT_ROOT && echo '=== GATEWAY OUTPUT ===' && echo 'Starting gateway with echo guard...' && echo '' && ./awmg --config $DEMO_CONFIG 2>&1" Enter + + # Split horizontally for instructions + tmux split-window -h -t "$SESSION" + tmux send-keys -t "$SESSION:0.1" "clear && echo ' +${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC} +${BOLD}${CYAN} ECHO GUARD DEMO - INSTRUCTIONS${NC} +${BOLD}${CYAN}═══════════════════════════════════════════════════════════════${NC} + +The gateway is running on the left pane with the echo guard. + +${BOLD}TO CONNECT CODEX:${NC} + + 1. Open a new terminal + + 2. Set the API key: + ${CYAN}export AGENT_ID=demo-key-12345${NC} + + 3. Start Codex: + ${CYAN}codex --mcp-config $CODEX_CONFIG${NC} + + 4. Ask Codex to use GitHub: + \"List issues in octocat/Hello-World\" + \"Show recent PRs in github/docs\" + +${BOLD}WHAT TO WATCH:${NC} + + Look at the left pane - the echo guard will print: + - Tool Name (e.g., list_issues, get_repository) + - Tool Args (the parameters passed) + - Tool Result (the response from GitHub) + +${BOLD}KEYBOARD:${NC} + Ctrl-B + Arrow - Switch panes + Ctrl-B + D - Detach from tmux + Ctrl-C - Stop gateway (left pane) + +' && bash" Enter + + # Make the gateway pane wider + tmux resize-pane -t "$SESSION:0.0" -x 80 + + # Attach to session + echo "" + print_step "Starting tmux session..." + echo "Press Ctrl-B then D to detach" + echo "" + + tmux attach-session -t "$SESSION" +} + +# Show usage +usage() { + echo "Echo Guard Demo" + echo "" + echo "Usage: $0 [command]" + echo "" + echo "Commands:" + echo " build Build the echo guard WASM file" + echo " test Run quick test demo (unit tests with output)" + echo " gateway Start gateway with echo guard (foreground)" + echo " codex Start gateway with Codex connection instructions" + echo " tmux Interactive demo with tmux (gateway + instructions)" + echo " all Build and run test demo (default)" + echo "" + echo "End-to-End Demo:" + echo " 1. Build the guard: $0 build" + echo " 2. Start gateway: $0 codex" + echo " 3. In another terminal: export AGENT_ID=demo-key-12345" + echo " 4. Run Codex: codex --mcp-config examples/guards/echo-guard/codex.config.toml" + echo "" + echo "Examples:" + echo " $0 # Run quick test demo" + echo " $0 codex # Start gateway for Codex integration" + echo " $0 tmux # Interactive tmux demo" +} + +# Main +main() { + case "${1:-all}" in + build) + build_guard + ;; + test|run) + build_guard + run_test_demo + ;; + gateway) + build_guard + run_gateway + ;; + codex) + build_guard + run_gateway_with_codex + ;; + tmux|interactive) + build_guard + run_tmux_demo + ;; + all) + build_guard + run_test_demo + ;; + help|--help|-h) + usage + ;; + *) + print_error "Unknown command: $1" + usage + exit 1 + ;; + esac +} + +main "$@" diff --git a/scripts/rebase-github-difc.sh b/scripts/rebase-github-difc.sh new file mode 100755 index 00000000..be52b5ef --- /dev/null +++ b/scripts/rebase-github-difc.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Script to rebase lpcox/github-difc onto origin/main + +set -e + +echo "=== Rebasing lpcox/github-difc onto origin/main ===" + +# Fetch latest from origin +echo "Fetching latest from origin..." +git fetch origin + +# Check current branch +CURRENT_BRANCH=$(git branch --show-current) +echo "Current branch: $CURRENT_BRANCH" + +# Stash any uncommitted changes +if ! git diff --quiet || ! git diff --cached --quiet; then + echo "Stashing uncommitted changes..." + git stash push -m "Auto-stash before rebase" + STASHED=1 +else + STASHED=0 +fi + +# Checkout the branch to rebase +echo "Checking out lpcox/github-difc..." +git checkout lpcox/github-difc + +# Perform the rebase +echo "Rebasing onto origin/main..." +if git rebase origin/main; then + echo "✓ Rebase completed successfully!" +else + echo "✗ Rebase failed with conflicts." + echo "" + echo "To resolve:" + echo " 1. Fix conflicts in the listed files" + echo " 2. git add " + echo " 3. git rebase --continue" + echo "" + echo "To abort: git rebase --abort" + exit 1 +fi + +# Return to original branch if different +if [ "$CURRENT_BRANCH" != "lpcox/github-difc" ]; then + echo "Returning to original branch: $CURRENT_BRANCH" + git checkout "$CURRENT_BRANCH" +fi + +# Restore stashed changes +if [ "$STASHED" -eq 1 ]; then + echo "Restoring stashed changes..." + git stash pop +fi + +echo "" +echo "=== Done ===" +echo "To push the rebased branch: git push --force-with-lease origin lpcox/github-difc" diff --git a/test/integration/echo_guard_test.go b/test/integration/echo_guard_test.go new file mode 100644 index 00000000..9049f78a --- /dev/null +++ b/test/integration/echo_guard_test.go @@ -0,0 +1,305 @@ +package integration + +import ( + "bytes" + "context" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/github/gh-aw-mcpg/internal/difc" + "github.com/github/gh-aw-mcpg/internal/guard" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// buildEchoGuard builds the echo guard with TinyGo + Go 1.23 if available +func buildEchoGuard(t *testing.T) string { + guardDir := filepath.Join("..", "..", "examples", "guards", "echo-guard") + wasmFile := filepath.Join(guardDir, "guard.wasm") + + // Clean up any existing wasm file + os.Remove(wasmFile) + + // Check TinyGo availability + if !isTinyGoAvailable() { + t.Skip("TinyGo not available - required for building echo guard") + } + + // Try with Go 1.23 if available + go123 := getGo123Binary() + if go123 != "" { + t.Logf("Found Go 1.23: %s", go123) + cmd := exec.Command("tinygo", "build", "-o", "guard.wasm", "-target=wasi", "main.go") + cmd.Dir = guardDir + // Set GOROOT to use Go 1.23 + goRootCmd := exec.Command(go123, "env", "GOROOT") + goRootBytes, err := goRootCmd.Output() + if err == nil { + cmd.Env = append(os.Environ(), "GOROOT="+strings.TrimSpace(string(goRootBytes))) + output, err := cmd.CombinedOutput() + if err == nil { + t.Logf("✓ Successfully built echo guard with TinyGo using %s", go123) + return wasmFile + } + t.Logf("TinyGo build with %s failed: %s", go123, output) + } + } + + // Try with default Go version + cmd := exec.Command("tinygo", "build", "-o", "guard.wasm", "-target=wasi", "main.go") + cmd.Dir = guardDir + output, err := cmd.CombinedOutput() + if err != nil { + t.Skipf("Failed to build echo guard: %s", output) + } + t.Log("✓ Successfully built echo guard with TinyGo") + return wasmFile +} + +// TestEchoGuardCompilation tests that the echo guard can be compiled +func TestEchoGuardCompilation(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + wasmFile := buildEchoGuard(t) + defer os.Remove(wasmFile) + + // Verify the WASM file exists + _, err := os.Stat(wasmFile) + require.NoError(t, err, "WASM file not created") + + // Verify file is not empty + info, err := os.Stat(wasmFile) + require.NoError(t, err) + assert.Greater(t, info.Size(), int64(0), "WASM file should not be empty") +} + +// TestEchoGuardLoading tests loading the echo guard +func TestEchoGuardLoading(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available - required for WASM guard tests") + } + + wasmFile := buildEchoGuard(t) + defer os.Remove(wasmFile) + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuard(ctx, "echo-guard", wasmFile, backend) + require.NoError(t, err, "Failed to create echo guard") + defer wasmGuard.Close(ctx) + + // Verify guard name + assert.Equal(t, "echo-guard", wasmGuard.Name()) +} + +// TestEchoGuardLabelResourceOutput tests that label_resource produces expected output +func TestEchoGuardLabelResourceOutput(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available - required for WASM guard tests") + } + + wasmFile := buildEchoGuard(t) + defer os.Remove(wasmFile) + + // Read WASM bytes + wasmBytes, err := os.ReadFile(wasmFile) + require.NoError(t, err) + + // Create a buffer to capture stdout + var stdout bytes.Buffer + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard with custom stdout + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuardWithOptions(ctx, "echo-guard", wasmBytes, backend, &guard.WasmGuardOptions{ + Stdout: &stdout, + }) + require.NoError(t, err, "Failed to create echo guard") + defer wasmGuard.Close(ctx) + + // Call LabelResource + resource, operation, err := wasmGuard.LabelResource( + ctx, + "get_issue", + map[string]interface{}{ + "owner": "octocat", + "repo": "hello-world", + "issue_number": 42, + }, + backend, + difc.NewCapabilities(), + ) + + require.NoError(t, err) + + // Verify the returned labels (echo guard now uses DIFC-compliant empty labels) + // Empty secrecy = public, Empty integrity = no endorsement per DIFC spec + assert.Equal(t, difc.OperationRead, operation) + secrecyTags := resource.Secrecy.Label.GetTags() + // Empty secrecy is valid per DIFC spec (means public/no restrictions) + assert.Empty(t, secrecyTags, "Echo guard should return empty secrecy (public per DIFC spec)") + + // Verify stdout output contains expected content + output := stdout.String() + t.Logf("Echo guard output:\n%s", output) + + // Check for expected output sections + assert.Contains(t, output, "=== label_resource called ===", "Should have header") + assert.Contains(t, output, "Tool Name: get_issue", "Should contain tool name") + assert.Contains(t, output, "Tool Args:", "Should have args section") + assert.Contains(t, output, "octocat", "Should contain owner value") + assert.Contains(t, output, "hello-world", "Should contain repo value") + assert.Contains(t, output, "42", "Should contain issue_number value") + assert.Contains(t, output, "=============================", "Should have footer") +} + +// TestEchoGuardLabelResponseOutput tests that label_response produces expected output +func TestEchoGuardLabelResponseOutput(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available - required for WASM guard tests") + } + + wasmFile := buildEchoGuard(t) + defer os.Remove(wasmFile) + + // Read WASM bytes + wasmBytes, err := os.ReadFile(wasmFile) + require.NoError(t, err) + + // Create a buffer to capture stdout + var stdout bytes.Buffer + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard with custom stdout + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuardWithOptions(ctx, "echo-guard", wasmBytes, backend, &guard.WasmGuardOptions{ + Stdout: &stdout, + }) + require.NoError(t, err, "Failed to create echo guard") + defer wasmGuard.Close(ctx) + + // Call LabelResponse + result, err := wasmGuard.LabelResponse( + ctx, + "get_issue", + map[string]interface{}{ + "number": 42, + "title": "Found a bug", + "state": "open", + "user": map[string]interface{}{ + "login": "octocat", + }, + }, + backend, + difc.NewCapabilities(), + ) + + require.NoError(t, err) + + // Echo guard returns nil (no fine-grained labeling) + assert.Nil(t, result) + + // Verify stdout output contains expected content + output := stdout.String() + t.Logf("Echo guard output:\n%s", output) + + // Check for expected output sections + assert.Contains(t, output, "=== label_response called ===", "Should have header") + assert.Contains(t, output, "Tool Name: get_issue", "Should contain tool name") + assert.Contains(t, output, "Tool Result:", "Should have result section") + assert.Contains(t, output, "Found a bug", "Should contain issue title") + assert.Contains(t, output, "octocat", "Should contain user login") + assert.Contains(t, output, "=============================", "Should have footer") +} + +// TestEchoGuardResourceDescription tests the returned resource description +func TestEchoGuardResourceDescription(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available - required for WASM guard tests") + } + + wasmFile := buildEchoGuard(t) + defer os.Remove(wasmFile) + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuard(ctx, "echo-guard", wasmFile, backend) + require.NoError(t, err, "Failed to create echo guard") + defer wasmGuard.Close(ctx) + + tests := []struct { + name string + toolName string + expectedDescPrefix string + }{ + { + name: "get_issue", + toolName: "get_issue", + expectedDescPrefix: "echo:get_issue", + }, + { + name: "create_repository", + toolName: "create_repository", + expectedDescPrefix: "echo:create_repository", + }, + { + name: "list_pull_requests", + toolName: "list_pull_requests", + expectedDescPrefix: "echo:list_pull_requests", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource, _, err := wasmGuard.LabelResource( + ctx, + tt.toolName, + map[string]interface{}{}, + backend, + difc.NewCapabilities(), + ) + + // TinyGo 0.40.1 has a known issue with Go 1.25's JSON encoding + // that causes "invalid table access" errors for some tool names. + // Skip the assertion if we hit this known issue. + if err != nil && strings.Contains(err.Error(), "invalid table access") { + t.Skipf("Skipping due to known TinyGo 0.40.1/Go 1.25 JSON encoding issue: %v", err) + } + + require.NoError(t, err) + assert.Equal(t, tt.expectedDescPrefix, resource.Description, + "Resource description should be 'echo:'") + }) + } +} diff --git a/test/integration/wasm_guard_test.go b/test/integration/wasm_guard_test.go new file mode 100644 index 00000000..ca8dc369 --- /dev/null +++ b/test/integration/wasm_guard_test.go @@ -0,0 +1,462 @@ +package integration + +import ( + "context" + "encoding/json" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/github/gh-aw-mcpg/internal/config" + "github.com/github/gh-aw-mcpg/internal/difc" + "github.com/github/gh-aw-mcpg/internal/guard" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockBackendCaller implements guard.BackendCaller for testing +type mockBackendCaller struct { + calls []mockCall +} + +type mockCall struct { + toolName string + args interface{} + result interface{} + err error +} + +func (m *mockBackendCaller) CallTool(ctx context.Context, toolName string, args interface{}) (interface{}, error) { + // Record the call + call := mockCall{ + toolName: toolName, + args: args, + } + + // Return mock data based on tool name + switch toolName { + case "search_repositories": + // Mock a private repository response + call.result = map[string]interface{}{ + "items": []interface{}{ + map[string]interface{}{ + "name": "test-repo", + "private": true, + "owner": map[string]interface{}{ + "login": "test-owner", + }, + }, + }, + } + case "get_issue": + // Mock issue response + call.result = map[string]interface{}{ + "number": 42, + "title": "Test Issue", + "state": "open", + } + default: + call.result = map[string]interface{}{} + } + + m.calls = append(m.calls, call) + return call.result, call.err +} + +// isTinyGoAvailable checks if TinyGo is available and compatible +func isTinyGoAvailable() bool { + cmd := exec.Command("tinygo", "version") + return cmd.Run() == nil +} + +// getGo123Binary returns the command to use for Go 1.23 +func getGo123Binary() string { + binaries := []string{"go1.23", "go1.23.9", "go1.23.10", "go1.23.8"} + for _, bin := range binaries { + if _, err := exec.LookPath(bin); err == nil { + return bin + } + } + return "" +} + +// buildWasmGuard builds the sample guard with TinyGo + Go 1.23 if available +func buildWasmGuard(t *testing.T) string { + guardDir := filepath.Join("..", "..", "examples", "guards", "sample-guard") + wasmFile := filepath.Join(guardDir, "guard.wasm") + + // Clean up any existing wasm file + os.Remove(wasmFile) + + // Try to compile with TinyGo first + // TinyGo needs Go 1.23 for compatibility (doesn't support Go 1.25 yet) + if isTinyGoAvailable() { + // Try with Go 1.23 if available + go123 := getGo123Binary() + if go123 != "" { + t.Logf("Found Go 1.23: %s", go123) + cmd := exec.Command("tinygo", "build", "-o", "guard.wasm", "-target=wasi", "main.go") + cmd.Dir = guardDir + // Set GOROOT to use Go 1.23 + goRootCmd := exec.Command(go123, "env", "GOROOT") + goRootBytes, err := goRootCmd.Output() + if err == nil { + cmd.Env = append(os.Environ(), "GOROOT="+strings.TrimSpace(string(goRootBytes))) + output, err := cmd.CombinedOutput() + if err == nil { + t.Logf("✓ Successfully built guard with TinyGo using %s", go123) + return wasmFile + } + t.Logf("TinyGo build with %s failed: %s", go123, output) + } + } else { + t.Log("Go 1.23 not found - install with: go install golang.org/dl/go1.23.9@latest && go1.23.9 download") + } + + // Try with default Go version + cmd := exec.Command("tinygo", "build", "-o", "guard.wasm", "-target=wasi", "main.go") + cmd.Dir = guardDir + output, err := cmd.CombinedOutput() + if err == nil { + t.Log("Successfully built guard with TinyGo") + return wasmFile + } + t.Logf("TinyGo build failed (may not support current Go version): %s", output) + } + + // Fall back to standard Go (won't work but useful for testing error handling) + cmd := exec.Command("make", "build") + cmd.Dir = guardDir + output, err := cmd.CombinedOutput() + if err != nil { + t.Logf("Standard Go build output: %s", output) + t.Logf("Note: Standard Go WASM will not export guard functions properly") + } + + return wasmFile +} + +// TestWasmGuardCompilation tests that the sample guard can be compiled +func TestWasmGuardCompilation(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + wasmFile := buildWasmGuard(t) + defer os.Remove(wasmFile) + + // Verify the WASM file exists + _, err := os.Stat(wasmFile) + require.NoError(t, err, "WASM file not created") +} + +// TestWasmGuardLoading tests loading a WASM guard +func TestWasmGuardLoading(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available or not compatible with Go 1.25 - skipping WASM guard tests") + } + + wasmFile := buildWasmGuard(t) + defer os.Remove(wasmFile) + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuard(ctx, "test-guard", wasmFile, backend) + + if err != nil { + // If standard Go was used, we expect this error + if !isTinyGoAvailable() { + t.Logf("Expected error with standard Go WASM: %v", err) + t.Skip("TinyGo required for functional WASM guards") + } + require.NoError(t, err, "Failed to create WASM guard") + } + + if wasmGuard != nil { + defer wasmGuard.Close(ctx) + // Verify guard name + assert.Equal(t, "test-guard", wasmGuard.Name()) + } +} + +// TestWasmGuardLabelResource tests the label_resource function +func TestWasmGuardLabelResource(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available or not compatible - required for WASM guard function exports") + } + + wasmFile := buildWasmGuard(t) + defer os.Remove(wasmFile) + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuard(ctx, "test-guard", wasmFile, backend) + if err != nil { + t.Skipf("Could not create WASM guard (TinyGo may not support Go 1.25): %v", err) + } + defer wasmGuard.Close(ctx) + + tests := []struct { + name string + toolName string + args map[string]interface{} + expectedOperation difc.OperationType + expectedSecrecy []string + expectedIntegrity []string + expectBackendCall bool + }{ + { + name: "create_issue - write operation", + toolName: "create_issue", + args: map[string]interface{}{"owner": "test-owner", "repo": "test-repo", "title": "Test"}, + expectedOperation: difc.OperationWrite, + expectedSecrecy: []string{}, // empty = public per DIFC spec + expectedIntegrity: []string{"contributor:test-owner/test-repo"}, + expectBackendCall: false, + }, + { + name: "merge_pull_request - read-write operation", + toolName: "merge_pull_request", + args: map[string]interface{}{"owner": "test-owner", "repo": "test-repo", "number": 1}, + expectedOperation: difc.OperationReadWrite, + expectedSecrecy: []string{}, // empty = public per DIFC spec + // Maintainer level expands to contributor + maintainer (hierarchical expansion) + expectedIntegrity: []string{"contributor:test-owner/test-repo", "maintainer:test-owner/test-repo"}, + expectBackendCall: false, + }, + { + name: "list_issues - calls backend for repo visibility", + toolName: "list_issues", + args: map[string]interface{}{ + "owner": "test-owner", + "repo": "test-repo", + }, + expectedOperation: difc.OperationRead, + expectedSecrecy: []string{"private:test-owner/test-repo"}, // repo-scoped private tag + expectedIntegrity: []string{}, // empty = no endorsement per DIFC spec + expectBackendCall: true, + }, + { + name: "list_issues - without owner/repo args", + toolName: "list_issues", + args: map[string]interface{}{}, + expectedOperation: difc.OperationRead, + expectedSecrecy: []string{}, // empty = public per DIFC spec + expectedIntegrity: []string{}, // empty = no endorsement per DIFC spec + expectBackendCall: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Reset backend calls + backend.calls = nil + + // Call LabelResource + resource, operation, err := wasmGuard.LabelResource( + ctx, + tt.toolName, + tt.args, + backend, + difc.NewCapabilities(), + ) + + require.NoError(t, err) + assert.Equal(t, tt.expectedOperation, operation) + + // Check secrecy tags + secrecyTags := resource.Secrecy.Label.GetTags() + if len(tt.expectedSecrecy) == 0 { + assert.Empty(t, secrecyTags, "Expected empty secrecy (public per DIFC spec)") + } else { + for _, expectedTag := range tt.expectedSecrecy { + assert.Contains(t, secrecyTags, difc.Tag(expectedTag), + "Expected secrecy tag %s not found", expectedTag) + } + } + + // Check integrity tags + integrityTags := resource.Integrity.Label.GetTags() + if len(tt.expectedIntegrity) == 0 { + assert.Empty(t, integrityTags, "Expected empty integrity (no endorsement per DIFC spec)") + } else { + for _, expectedTag := range tt.expectedIntegrity { + assert.Contains(t, integrityTags, difc.Tag(expectedTag), + "Expected integrity tag %s not found", expectedTag) + } + } + + // Verify backend call was made if expected + if tt.expectBackendCall { + assert.NotEmpty(t, backend.calls, "Expected backend call but none were made") + if len(backend.calls) > 0 { + assert.Equal(t, "search_repositories", backend.calls[0].toolName) + } + } else { + assert.Empty(t, backend.calls, "Unexpected backend call") + } + }) + } +} + +// TestWasmGuardLabelResponse tests the label_response function +func TestWasmGuardLabelResponse(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + if !isTinyGoAvailable() { + t.Skip("TinyGo not available or not compatible - required for WASM guard function exports") + } + + wasmFile := buildWasmGuard(t) + defer os.Remove(wasmFile) + + // Create a mock backend caller + backend := &mockBackendCaller{} + + // Create a WASM guard + ctx := context.Background() + wasmGuard, err := guard.NewWasmGuard(ctx, "test-guard", wasmFile, backend) + if err != nil { + t.Skipf("Could not create WASM guard: %v", err) + } + defer wasmGuard.Close(ctx) + + // Call LabelResponse with a direct array (path-based labeling should handle this) + result, err := wasmGuard.LabelResponse( + ctx, + "list_issues", + []interface{}{ + map[string]interface{}{"number": 1, "title": "Issue 1"}, + map[string]interface{}{"number": 2, "title": "Issue 2"}, + }, + backend, + difc.NewCapabilities(), + ) + + require.NoError(t, err) + // Sample guard now uses path-based labeling and returns labeled items + require.NotNil(t, result, "Sample guard should return path-based labels for arrays") + + // Verify it's a CollectionLabeledData (converted from path-based format) + collectionData, ok := result.(*difc.CollectionLabeledData) + require.True(t, ok, "Result should be CollectionLabeledData") + require.Len(t, collectionData.Items, 2, "Should have 2 labeled items") + + // Check first item - note: data was re-parsed from original so numbers stay as their Go types + firstData := collectionData.Items[0].Data.(map[string]interface{}) + assert.NotNil(t, firstData["number"], "First item should have number field") + assert.NotNil(t, collectionData.Items[0].Labels, "First item should have labels") + // DIFC spec: empty secrecy = public, so we check that labels exist (not empty by design choice) + // The sample guard now uses empty secrecy labels per DIFC spec + firstSecrecyTags := collectionData.Items[0].Labels.Secrecy.Label.GetTags() + // Empty secrecy tags are valid per DIFC spec (means public/no restrictions) + assert.Empty(t, firstSecrecyTags, "First item should have empty secrecy (public per DIFC spec)") + + // Check second item + secondData := collectionData.Items[1].Data.(map[string]interface{}) + assert.NotNil(t, secondData["number"], "Second item should have number field") + assert.NotNil(t, collectionData.Items[1].Labels, "Second item should have labels") +} + +// TestWasmGuardConfiguration tests loading guard configuration +func TestWasmGuardConfiguration(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + // For configuration testing, we just need the file to exist + wasmFile := buildWasmGuard(t) + defer os.Remove(wasmFile) + + // Create a config with guard + absWasmPath, err := filepath.Abs(wasmFile) + require.NoError(t, err) + + stdinConfig := config.StdinConfig{ + MCPServers: map[string]*config.StdinServerConfig{ + "test": { + Type: "stdio", + Container: "test-container", + Guard: "test-guard", + }, + }, + Guards: map[string]*config.StdinGuardConfig{ + "test-guard": { + Type: "wasm", + Path: absWasmPath, + }, + }, + } + + // Convert to JSON and parse + configJSON, err := json.Marshal(stdinConfig) + require.NoError(t, err) + + // This tests that the configuration is valid + var parsed config.StdinConfig + err = json.Unmarshal(configJSON, &parsed) + require.NoError(t, err) + + assert.Equal(t, "wasm", parsed.Guards["test-guard"].Type) + assert.Equal(t, absWasmPath, parsed.Guards["test-guard"].Path) + assert.Equal(t, "test-guard", parsed.MCPServers["test"].Guard) +} + +// TestWasmGuardErrorHandling tests error handling in WASM guards +func TestWasmGuardErrorHandling(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + // Test loading non-existent WASM file + ctx := context.Background() + backend := &mockBackendCaller{} + _, err := guard.NewWasmGuard(ctx, "test-guard", "/nonexistent/guard.wasm", backend) + assert.Error(t, err, "Should fail to load non-existent WASM file") + assert.Contains(t, err.Error(), "failed to read WASM file") +} + +// TestWasmGuardStandardGoError tests the helpful error when using standard Go WASM +func TestWasmGuardStandardGoError(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + guardDir := filepath.Join("..", "..", "examples", "guards", "sample-guard") + wasmFile := filepath.Join(guardDir, "guard.wasm") + + // Build with standard Go (will not export functions) + cmd := exec.Command("sh", "-c", "GOOS=wasip1 GOARCH=wasm go build -o guard.wasm main.go") + cmd.Dir = guardDir + output, err := cmd.CombinedOutput() + require.NoError(t, err, "Failed to compile with standard Go: %s", output) + defer os.Remove(wasmFile) + + // Try to create guard - should fail with helpful error + ctx := context.Background() + backend := &mockBackendCaller{} + _, err = guard.NewWasmGuard(ctx, "test-guard", wasmFile, backend) + + require.Error(t, err) + assert.Contains(t, err.Error(), "TinyGo is required") + assert.Contains(t, err.Error(), "standard Go") + t.Logf("Helpful error message: %v", err) +}