Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pkg/parser/import_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,22 @@ func (c *ImportCache) Get(owner, repo, path, sha string) (string, bool) {
// Check if the cached file exists
if _, err := os.Stat(fullCachePath); err != nil {
if os.IsNotExist(err) {
importCacheLog.Printf("Cache miss: %s/%s/%s@%s", owner, repo, path, sha)
importCacheLog.Printf("Cache miss: %s", FormatWorkflowSpec(owner, repo, path, sha))
} else {
// Log other types of errors (permissions, I/O issues, etc.)
importCacheLog.Printf("Cache access error for %s/%s/%s@%s: %v", owner, repo, path, sha, err)
importCacheLog.Printf("Cache access error for %s: %v", FormatWorkflowSpec(owner, repo, path, sha), err)
}
return "", false
}

importCacheLog.Printf("Cache hit: %s/%s/%s@%s -> %s", owner, repo, path, sha, fullCachePath)
importCacheLog.Printf("Cache hit: %s -> %s", FormatWorkflowSpec(owner, repo, path, sha), fullCachePath)
return fullCachePath, true
}

// Set stores a new cache entry by saving the content to the cache directory
// sha parameter should be the resolved commit SHA
func (c *ImportCache) Set(owner, repo, path, sha string, content []byte) (string, error) {
importCacheLog.Printf("Setting cache entry: %s/%s/%s@%s, size=%d bytes", owner, repo, path, sha, len(content))
importCacheLog.Printf("Setting cache entry: %s, size=%d bytes", FormatWorkflowSpec(owner, repo, path, sha), len(content))

// Validate file size (max 10MB)
const maxFileSize = 10 * 1024 * 1024
Expand Down Expand Up @@ -131,7 +131,7 @@ func (c *ImportCache) Set(owner, repo, path, sha string, content []byte) (string
return "", err
}

importCacheLog.Printf("Cached import: %s/%s/%s@%s -> %s", owner, repo, path, sha, fullCachePath)
importCacheLog.Printf("Cached import: %s -> %s", FormatWorkflowSpec(owner, repo, path, sha), fullCachePath)
return fullCachePath, nil
}

Expand Down
14 changes: 7 additions & 7 deletions pkg/parser/import_cache_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ func TestImportCacheMultipleFiles(t *testing.T) {
for _, f := range files {
_, err := cache.Set(f.owner, f.repo, f.path, f.sha, []byte(f.content))
if err != nil {
t.Fatalf("Failed to cache file %s/%s/%s@%s: %v", f.owner, f.repo, f.path, f.sha, err)
t.Fatalf("Failed to cache file %s: %v", FormatWorkflowSpec(f.owner, f.repo, f.path, f.sha), err)
}
}

// Verify all files are retrievable
for _, f := range files {
path, found := cache.Get(f.owner, f.repo, f.path, f.sha)
if !found {
t.Errorf("Failed to retrieve cached file %s/%s/%s@%s", f.owner, f.repo, f.path, f.sha)
t.Errorf("Failed to retrieve cached file %s", FormatWorkflowSpec(f.owner, f.repo, f.path, f.sha))
continue
}

Expand All @@ -131,8 +131,8 @@ func TestImportCacheMultipleFiles(t *testing.T) {
}

if string(content) != f.content {
t.Errorf("Content mismatch for %s/%s/%s@%s. Expected %q, got %q",
f.owner, f.repo, f.path, f.sha, f.content, string(content))
t.Errorf("Content mismatch for %s. Expected %q, got %q",
FormatWorkflowSpec(f.owner, f.repo, f.path, f.sha), f.content, string(content))
}
}

Expand All @@ -142,7 +142,7 @@ func TestImportCacheMultipleFiles(t *testing.T) {
for _, f := range files {
path, found := cache2.Get(f.owner, f.repo, f.path, f.sha)
if !found {
t.Errorf("Failed to retrieve cached file from new instance %s/%s/%s@%s", f.owner, f.repo, f.path, f.sha)
t.Errorf("Failed to retrieve cached file from new instance %s", FormatWorkflowSpec(f.owner, f.repo, f.path, f.sha))
continue
}

Expand All @@ -153,8 +153,8 @@ func TestImportCacheMultipleFiles(t *testing.T) {
}

if string(content) != f.content {
t.Errorf("Content mismatch from new instance for %s/%s/%s@%s. Expected %q, got %q",
f.owner, f.repo, f.path, f.sha, f.content, string(content))
t.Errorf("Content mismatch from new instance for %s. Expected %q, got %q",
FormatWorkflowSpec(f.owner, f.repo, f.path, f.sha), f.content, string(content))
}
}
}
136 changes: 117 additions & 19 deletions pkg/parser/import_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,63 @@ func ProcessImportsFromFrontmatter(frontmatter map[string]any, baseDir string) (
return result.MergedTools, result.MergedEngines, nil
}

// FormatWorkflowSpec formats a workflowspec string from its components.
// This is the canonical format used throughout the codebase for logging,
// error messages, cache keys, and spec construction.
//
// Examples:
//
// FormatWorkflowSpec("elastic", "ai-github-actions", "gh-agent-workflows/file.md", "main")
// → "elastic/ai-github-actions/gh-agent-workflows/file.md@main"
//
// FormatWorkflowSpec("org", "repo", "dir/file.md", "v1.0")
// → "org/repo/dir/file.md@v1.0"
func FormatWorkflowSpec(owner, repo, filePath, ref string) string {
return fmt.Sprintf("%s/%s/%s@%s", owner, repo, filePath, ref)
}

// remoteImportOrigin tracks the remote repository context for an imported file.
// When a file is fetched from a remote GitHub repository via workflowspec,
// its nested relative imports must be resolved against the same remote repo.
type remoteImportOrigin struct {
Owner string // Repository owner (e.g., "elastic")
Repo string // Repository name (e.g., "ai-github-actions")
Ref string // Git ref - branch, tag, or SHA (e.g., "main", "v1.0.0", "abc123...")
Owner string // Repository owner (e.g., "elastic")
Repo string // Repository name (e.g., "ai-github-actions")
Ref string // Git ref - branch, tag, or SHA (e.g., "main", "v1.0.0", "abc123...")
BasePath string // Base path for resolving nested imports (e.g., "gh-agent-workflows" or ".github/workflows")
}

// String returns the origin identity as "owner/repo@ref" for display in logs.
func (o *remoteImportOrigin) String() string {
return fmt.Sprintf("%s/%s@%s", o.Owner, o.Repo, o.Ref)
}

// ResolveNestedImport constructs a full workflowspec for a relative import path
// resolved against this origin's base path (the parent directory of the importing
// file). The relative path may use "../" to traverse up from the base path,
// following standard relative path semantics.
//
// Examples:
//
// origin{BasePath:"gh-agent-workflows/gh-aw-workflows"}
// .ResolveNestedImport("../gh-aw-fragments/tools.md")
// → "elastic/ai-github-actions/gh-agent-workflows/gh-aw-fragments/tools.md@main"
//
// origin{BasePath:"workflows"}
// .ResolveNestedImport("shared/reporting.md")
// → "org/repo/workflows/shared/reporting.md@main"
//
// origin{BasePath:""}
// .ResolveNestedImport("dir/file.md")
// → "org/repo/dir/file.md@v1.0"
func (o *remoteImportOrigin) ResolveNestedImport(relativePath string) string {
cleanRelative := path.Clean(strings.TrimPrefix(relativePath, "./"))
var fullPath string
if o.BasePath != "" {
fullPath = path.Clean(o.BasePath + "/" + cleanRelative)
} else {
fullPath = cleanRelative
}
return FormatWorkflowSpec(o.Owner, o.Repo, fullPath, o.Ref)
}

// importQueueItem represents a file to be imported with its context
Expand All @@ -104,9 +154,27 @@ type importQueueItem struct {
remoteOrigin *remoteImportOrigin // Remote origin context (non-nil when imported from a remote repo)
}

// parseRemoteOrigin extracts the remote origin (owner, repo, ref) from a workflowspec path.
// parseRemoteOrigin extracts the remote origin (owner, repo, ref, basePath) from a workflowspec path.
// Returns nil if the path is not a valid workflowspec.
// Format: owner/repo/path[@ref] where ref defaults to "main" if not specified.
//
// BasePath is the parent directory of the file within the repository. It is
// extracted by removing the last component (the filename) from the repo-relative
// path. Nested imports are resolved relative to this directory, following
// standard relative path semantics (including "../" traversal).
//
// Examples:
//
// "owner/repo/workflows/code-simplifier.md@main"
// → BasePath = "workflows"
// → import "shared/reporting.md" resolves to "workflows/shared/reporting.md"
//
// "elastic/repo/gh-agent-workflows/gh-aw-workflows/file.md@sha"
// → BasePath = "gh-agent-workflows/gh-aw-workflows"
// → import "../gh-aw-fragments/tools.md" resolves to "gh-agent-workflows/gh-aw-fragments/tools.md"
//
// "owner/repo/file.md@sha"
// → BasePath = "" (file at repo root)
func parseRemoteOrigin(spec string) *remoteImportOrigin {
// Remove section reference if present
cleanSpec := spec
Expand All @@ -128,10 +196,21 @@ func parseRemoteOrigin(spec string) *remoteImportOrigin {
return nil
}

// Extract base path: the parent directory of the file within the repo.
// Remove only the last component (the filename) to get the directory path.
// For "owner/repo/dir/file.md", basePath = "dir".
// For "owner/repo/file.md", basePath = "" (file is at repo root).
repoPathParts := slashParts[2:] // everything after owner/repo
var basePath string
if len(repoPathParts) > 1 {
basePath = strings.Join(repoPathParts[:len(repoPathParts)-1], "/")
}

return &remoteImportOrigin{
Owner: slashParts[0],
Repo: slashParts[1],
Ref: ref,
Owner: slashParts[0],
Repo: slashParts[1],
Ref: ref,
BasePath: basePath,
}
}

Expand Down Expand Up @@ -306,7 +385,7 @@ func processImportsFromFrontmatterWithManifestAndSource(frontmatter map[string]a
if isWorkflowSpec(filePath) {
origin = parseRemoteOrigin(filePath)
if origin != nil {
importLog.Printf("Tracking remote origin for workflowspec: %s/%s@%s", origin.Owner, origin.Repo, origin.Ref)
importLog.Printf("Tracking remote origin for workflowspec: %s (basePath: %q)", origin, origin.BasePath)
}
}

Expand Down Expand Up @@ -492,24 +571,32 @@ func processImportsFromFrontmatterWithManifestAndSource(frontmatter map[string]a

if item.remoteOrigin != nil && !isWorkflowSpec(nestedFilePath) {
// Parent was fetched from a remote repo and nested path is relative.
// Convert to a workflowspec that resolves against the remote repo's
// .github/workflows/ directory (mirrors local compilation behavior).
cleanPath := path.Clean(strings.TrimPrefix(nestedFilePath, "./"))
// Convert to a workflowspec that resolves against the parent's base
// path (parent directory) in the remote repo. Relative paths including
// "../" traversals are supported via path.Clean in ResolveNestedImport.

// Reject paths that escape .github/workflows/ (e.g., ../../../etc/passwd)
if cleanPath == ".." || strings.HasPrefix(cleanPath, "../") || path.IsAbs(cleanPath) {
return nil, fmt.Errorf("nested import '%s' from remote file '%s' escapes .github/workflows/ base directory", nestedFilePath, item.importPath)
// Reject absolute paths
if path.IsAbs(nestedFilePath) {
return nil, fmt.Errorf("nested import '%s' from remote file '%s' uses absolute path", nestedFilePath, item.importPath)
}

resolvedPath = item.remoteOrigin.ResolveNestedImport(nestedFilePath)

// After resolving against the base path, reject paths that escape
// the repository root (resolved path starts with "../")
resolvedFile := strings.SplitN(resolvedPath, "@", 2)[0] // strip @ref
repoRelative := strings.SplitN(resolvedFile, "/", 3) // owner/repo/path
if len(repoRelative) >= 3 && strings.HasPrefix(repoRelative[2], "..") {
return nil, fmt.Errorf("nested import '%s' from remote file '%s' escapes repository root", nestedFilePath, item.importPath)
}

resolvedPath = fmt.Sprintf("%s/%s/.github/workflows/%s@%s",
item.remoteOrigin.Owner, item.remoteOrigin.Repo, cleanPath, item.remoteOrigin.Ref)
nestedRemoteOrigin = item.remoteOrigin
importLog.Printf("Resolving nested import as remote workflowspec: %s -> %s", nestedFilePath, resolvedPath)
importLog.Printf("Resolving nested import as remote workflowspec: %s -> %s (basePath: %q)", nestedFilePath, resolvedPath, item.remoteOrigin.BasePath)
} else if isWorkflowSpec(nestedFilePath) {
// Nested import is itself a workflowspec - parse its remote origin
nestedRemoteOrigin = parseRemoteOrigin(nestedFilePath)
if nestedRemoteOrigin != nil {
importLog.Printf("Nested workflowspec import detected: %s (origin: %s/%s@%s)", nestedFilePath, nestedRemoteOrigin.Owner, nestedRemoteOrigin.Repo, nestedRemoteOrigin.Ref)
importLog.Printf("Nested workflowspec import detected: %s (origin: %s)", nestedFilePath, nestedRemoteOrigin)
}
}

Expand All @@ -536,8 +623,19 @@ func processImportsFromFrontmatterWithManifestAndSource(frontmatter map[string]a
// Check for cycles - skip if already visited
if !visited[nestedFullPath] {
visited[nestedFullPath] = true

// Use the resolved path as the import path so that downstream
// phases (topological sort, security scan) can re-resolve it.
// For remote nested imports, resolvedPath is the full workflowspec
// (e.g., "elastic/repo/base/dir/file.md@sha"); for local and
// workflowspec imports it equals nestedFilePath (unchanged).
queueImportPath := resolvedPath
if nestedSectionName != "" {
queueImportPath += "#" + nestedSectionName
}

queue = append(queue, importQueueItem{
importPath: nestedImportPath,
importPath: queueImportPath,
fullPath: nestedFullPath,
sectionName: nestedSectionName,
baseDir: baseDir, // Use original baseDir, not nestedBaseDir
Expand Down
Loading