diff --git a/.github/workflows/daily-cli-performance.lock.yml b/.github/workflows/daily-cli-performance.lock.yml index 59ac962be6..d2287e1d17 100644 --- a/.github/workflows/daily-cli-performance.lock.yml +++ b/.github/workflows/daily-cli-performance.lock.yml @@ -1154,43 +1154,47 @@ jobs: **Issue template:** ```markdown - # Performance Regression Detected + ### šŸ“Š Performance Regression Detected - ## Benchmark: [BenchmarkName] + #### Benchmark: [BenchmarkName] **Current Performance**: [current_ns] ns/op **Historical Average**: [avg_historical_ns] ns/op **Change**: [change_percent]% slower - ## Details - - This benchmark has regressed by more than 10% compared to the 7-day historical average. +
+ šŸ“ˆ Detailed Performance Metrics - ### Performance Metrics + #### Performance Comparison - **ns/op**: [current_ns] (was [avg_historical_ns]) - **Change**: +[change_percent]% - **Historical Data Points**: [data_points] - ### Baseline Targets + #### Baseline Targets - Simple workflows: <100ms - Complex workflows: <500ms - MCP-heavy workflows: <1s - ## Recommended Actions +
+ + ### šŸ’” Recommended Actions 1. Review recent changes to the compilation pipeline 2. Run `make bench-memory` to generate memory profiles 3. Use `go tool pprof` to identify hotspots 4. Compare with previous benchmark results: `benchstat` - ## Additional Context +
+ šŸ“‹ Additional Context - **Run ID**: __GH_AW_GITHUB_RUN_ID__ - **Date**: [date] - **Workflow**: [Daily CLI Performance](__GH_AW_GITHUB_SERVER_URL__/__GH_AW_GITHUB_REPOSITORY__/actions/runs/__GH_AW_GITHUB_RUN_ID__) +
+ --- *Automatically generated by Daily CLI Performance workflow* ``` @@ -1249,17 +1253,47 @@ jobs: ## Phase 5: Generate Performance Report - ### 5.1 Create Summary Report + ### 5.1 Report Formatting Guidelines + + PROMPT_EOF + cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT" + When generating your performance report, follow these markdown formatting guidelines: + + #### Header Levels + Use h3 (###) or lower for all headers in your report to maintain proper document hierarchy. The issue or discussion title serves as h1, so all content headers should start at h3. + + #### Progressive Disclosure + Wrap long sections in `
Section Name` tags to improve readability and reduce scrolling. This creates a more navigable report that doesn't overwhelm readers with information. + + **Example structure:** + ```markdown +
+ Full Performance Details + + [Long detailed content here...] + +
+ ``` + + #### Suggested Report Structure + Structure your performance report with these sections: + - **Brief summary** (always visible): Key findings, overall status, critical issues + - **Key performance metrics** (always visible): Most important numbers and comparisons + - **Detailed benchmark results** (in `
` tags): Complete benchmark data, raw numbers + - **Historical comparisons** (in `
` tags): Trend analysis, historical context + - **Recommendations** (always visible): Specific actionable items + + This structure follows design principles of building trust through clarity, exceeding expectations with helpful context, creating delight through progressive disclosure, and maintaining consistency with other reporting workflows. + + ### 5.2 Create Summary Report Generate a comprehensive summary of today's benchmark run: ```bash - PROMPT_EOF - cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT" cat > /tmp/gh-aw/benchmarks/generate_report.py << 'EOF' #!/usr/bin/env python3 """ - Generate performance summary report + Generate performance summary report with proper markdown formatting """ import json @@ -1284,6 +1318,9 @@ jobs: with open(CURRENT_FILE, 'r') as f: current = json.load(f) + summary = analysis['summary'] + + # Print terminal output (for logs) print("\n" + "="*70) print(" DAILY CLI PERFORMANCE BENCHMARK REPORT") print("="*70) @@ -1293,39 +1330,111 @@ jobs: print("\n" + "-"*70) print("SUMMARY") print("-"*70) - summary = analysis['summary'] print(f"Total Benchmarks: {summary['total']}") print(f" āœ… Stable: {summary['stable']}") print(f" ⚔ Warnings: {summary['warnings']}") print(f" āš ļø Regressions: {summary['regressions']}") print(f" ✨ Improvements: {summary['improvements']}") - print("\n" + "-"*70) - print("DETAILED RESULTS") - print("-"*70) - - for name, result in sorted(analysis['benchmarks'].items()): - metrics = current['benchmarks'][name] - status_icon = { - 'regression': 'āš ļø ', - 'warning': '⚔', - 'improvement': '✨', - 'stable': 'āœ“', - 'baseline': 'ā„¹ļø ' - }.get(result['status'], '?') + # Generate markdown report following formatting guidelines + with open('/tmp/gh-aw/benchmarks/report.md', 'w') as f: + # Brief summary (always visible) + f.write("### šŸ“Š Performance Summary\n\n") + f.write(f"**Date**: {analysis['date']} \n") + f.write(f"**Analysis Status**: ") + + if summary['regressions'] > 0: + f.write(f"āš ļø {summary['regressions']} regression(s) detected\n\n") + elif summary['warnings'] > 0: + f.write(f"⚔ {summary['warnings']} warning(s) detected\n\n") + elif summary['improvements'] > 0: + f.write(f"✨ {summary['improvements']} improvement(s) detected\n\n") + else: + f.write("āœ… All benchmarks stable\n\n") + + # Key performance metrics (always visible) + f.write("### šŸŽÆ Key Metrics\n\n") + f.write(f"- **Total Benchmarks**: {summary['total']}\n") + f.write(f"- **Stable**: {summary['stable']}\n") + f.write(f"- **Warnings**: {summary['warnings']}\n") + f.write(f"- **Regressions**: {summary['regressions']}\n") + f.write(f"- **Improvements**: {summary['improvements']}\n\n") + + # Detailed benchmark results (in details tag) + f.write("
\n") + f.write("šŸ“ˆ Detailed Benchmark Results\n\n") - print(f"\n{status_icon} {name}") - print(f" Current: {format_ns(result['current_ns'])}") - if result['avg_historical_ns']: - print(f" Historical Avg: {format_ns(result['avg_historical_ns'])}") - print(f" Change: {result['change_percent']:+.1f}%") - print(f" Memory: {metrics['bytes_per_op']} B/op") - print(f" Allocations: {metrics['allocs_per_op']} allocs/op") - if result['status'] != 'baseline': - print(f" {result['message']}") + for name, result in sorted(analysis['benchmarks'].items()): + metrics = current['benchmarks'][name] + status_icon = { + 'regression': 'āš ļø', + 'warning': '⚔', + 'improvement': '✨', + 'stable': 'āœ“', + 'baseline': 'ā„¹ļø' + }.get(result['status'], '?') + + f.write(f"#### {status_icon} {name}\n\n") + f.write(f"- **Current**: {format_ns(result['current_ns'])}\n") + if result['avg_historical_ns']: + f.write(f"- **Historical Average**: {format_ns(result['avg_historical_ns'])}\n") + f.write(f"- **Change**: {result['change_percent']:+.1f}%\n") + f.write(f"- **Memory**: {metrics['bytes_per_op']} B/op\n") + f.write(f"- **Allocations**: {metrics['allocs_per_op']} allocs/op\n") + if result['status'] != 'baseline': + f.write(f"- **Status**: {result['message']}\n") + f.write("\n") + + f.write("
\n\n") + + # Historical comparisons (in details tag) + f.write("
\n") + f.write("šŸ“‰ Historical Comparisons\n\n") + f.write("### Trend Analysis\n\n") + + # Group by status + regressions = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'regression'] + warnings = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'warning'] + improvements = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'improvement'] + + if regressions: + f.write("#### āš ļø Regressions\n\n") + for name, res in regressions: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% slower (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + if warnings: + f.write("#### ⚔ Warnings\n\n") + for name, res in warnings: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% slower (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + if improvements: + f.write("#### ✨ Improvements\n\n") + for name, res in improvements: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% faster (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + f.write("
\n\n") + + # Recommendations (always visible) + f.write("### šŸ’” Recommendations\n\n") + if summary['regressions'] > 0: + f.write("1. Review recent changes to the compilation pipeline\n") + f.write("2. Run `make bench-memory` to generate memory profiles\n") + f.write("3. Use `go tool pprof` to identify performance hotspots\n") + f.write("4. Compare with previous benchmark results using `benchstat`\n") + elif summary['warnings'] > 0: + f.write("1. Monitor the warned benchmarks closely in upcoming runs\n") + f.write("2. Consider running manual profiling if warnings persist\n") + elif summary['improvements'] > 0: + f.write("1. Document the changes that led to these improvements\n") + f.write("2. Consider applying similar optimizations to other areas\n") + else: + f.write("1. Continue monitoring performance daily\n") + f.write("2. Performance is stable - good work!\n") - print("\n" + "="*70) - print() + print("\nāœ… Markdown report generated at /tmp/gh-aw/benchmarks/report.md") if __name__ == '__main__': main() @@ -1333,6 +1442,11 @@ jobs: chmod +x /tmp/gh-aw/benchmarks/generate_report.py python3 /tmp/gh-aw/benchmarks/generate_report.py + + # Display the generated markdown report + echo "" + echo "=== Generated Markdown Report ===" + cat /tmp/gh-aw/benchmarks/report.md ``` ## Success Criteria diff --git a/.github/workflows/daily-cli-performance.md b/.github/workflows/daily-cli-performance.md index 2d27b68c12..8a21d6553f 100644 --- a/.github/workflows/daily-cli-performance.md +++ b/.github/workflows/daily-cli-performance.md @@ -342,43 +342,47 @@ If regressions are detected, open issues with detailed information. **Issue template:** ```markdown -# Performance Regression Detected +### šŸ“Š Performance Regression Detected -## Benchmark: [BenchmarkName] +#### Benchmark: [BenchmarkName] **Current Performance**: [current_ns] ns/op **Historical Average**: [avg_historical_ns] ns/op **Change**: [change_percent]% slower -## Details +
+šŸ“ˆ Detailed Performance Metrics -This benchmark has regressed by more than 10% compared to the 7-day historical average. - -### Performance Metrics +#### Performance Comparison - **ns/op**: [current_ns] (was [avg_historical_ns]) - **Change**: +[change_percent]% - **Historical Data Points**: [data_points] -### Baseline Targets +#### Baseline Targets - Simple workflows: <100ms - Complex workflows: <500ms - MCP-heavy workflows: <1s -## Recommended Actions +
+ +### šŸ’” Recommended Actions 1. Review recent changes to the compilation pipeline 2. Run `make bench-memory` to generate memory profiles 3. Use `go tool pprof` to identify hotspots 4. Compare with previous benchmark results: `benchstat` -## Additional Context +
+šŸ“‹ Additional Context - **Run ID**: ${{ github.run_id }} - **Date**: [date] - **Workflow**: [Daily CLI Performance](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) +
+ --- *Automatically generated by Daily CLI Performance workflow* ``` @@ -437,7 +441,37 @@ Now, for each regression found, use the `create issue` tool to open an issue wit ## Phase 5: Generate Performance Report -### 5.1 Create Summary Report +### 5.1 Report Formatting Guidelines + +When generating your performance report, follow these markdown formatting guidelines: + +#### Header Levels +Use h3 (###) or lower for all headers in your report to maintain proper document hierarchy. The issue or discussion title serves as h1, so all content headers should start at h3. + +#### Progressive Disclosure +Wrap long sections in `
Section Name` tags to improve readability and reduce scrolling. This creates a more navigable report that doesn't overwhelm readers with information. + +**Example structure:** +```markdown +
+Full Performance Details + +[Long detailed content here...] + +
+``` + +#### Suggested Report Structure +Structure your performance report with these sections: +- **Brief summary** (always visible): Key findings, overall status, critical issues +- **Key performance metrics** (always visible): Most important numbers and comparisons +- **Detailed benchmark results** (in `
` tags): Complete benchmark data, raw numbers +- **Historical comparisons** (in `
` tags): Trend analysis, historical context +- **Recommendations** (always visible): Specific actionable items + +This structure follows design principles of building trust through clarity, exceeding expectations with helpful context, creating delight through progressive disclosure, and maintaining consistency with other reporting workflows. + +### 5.2 Create Summary Report Generate a comprehensive summary of today's benchmark run: @@ -445,7 +479,7 @@ Generate a comprehensive summary of today's benchmark run: cat > /tmp/gh-aw/benchmarks/generate_report.py << 'EOF' #!/usr/bin/env python3 """ -Generate performance summary report +Generate performance summary report with proper markdown formatting """ import json @@ -470,6 +504,9 @@ def main(): with open(CURRENT_FILE, 'r') as f: current = json.load(f) + summary = analysis['summary'] + + # Print terminal output (for logs) print("\n" + "="*70) print(" DAILY CLI PERFORMANCE BENCHMARK REPORT") print("="*70) @@ -479,39 +516,111 @@ def main(): print("\n" + "-"*70) print("SUMMARY") print("-"*70) - summary = analysis['summary'] print(f"Total Benchmarks: {summary['total']}") print(f" āœ… Stable: {summary['stable']}") print(f" ⚔ Warnings: {summary['warnings']}") print(f" āš ļø Regressions: {summary['regressions']}") print(f" ✨ Improvements: {summary['improvements']}") - print("\n" + "-"*70) - print("DETAILED RESULTS") - print("-"*70) - - for name, result in sorted(analysis['benchmarks'].items()): - metrics = current['benchmarks'][name] - status_icon = { - 'regression': 'āš ļø ', - 'warning': '⚔', - 'improvement': '✨', - 'stable': 'āœ“', - 'baseline': 'ā„¹ļø ' - }.get(result['status'], '?') + # Generate markdown report following formatting guidelines + with open('/tmp/gh-aw/benchmarks/report.md', 'w') as f: + # Brief summary (always visible) + f.write("### šŸ“Š Performance Summary\n\n") + f.write(f"**Date**: {analysis['date']} \n") + f.write(f"**Analysis Status**: ") + + if summary['regressions'] > 0: + f.write(f"āš ļø {summary['regressions']} regression(s) detected\n\n") + elif summary['warnings'] > 0: + f.write(f"⚔ {summary['warnings']} warning(s) detected\n\n") + elif summary['improvements'] > 0: + f.write(f"✨ {summary['improvements']} improvement(s) detected\n\n") + else: + f.write("āœ… All benchmarks stable\n\n") + + # Key performance metrics (always visible) + f.write("### šŸŽÆ Key Metrics\n\n") + f.write(f"- **Total Benchmarks**: {summary['total']}\n") + f.write(f"- **Stable**: {summary['stable']}\n") + f.write(f"- **Warnings**: {summary['warnings']}\n") + f.write(f"- **Regressions**: {summary['regressions']}\n") + f.write(f"- **Improvements**: {summary['improvements']}\n\n") + + # Detailed benchmark results (in details tag) + f.write("
\n") + f.write("šŸ“ˆ Detailed Benchmark Results\n\n") + + for name, result in sorted(analysis['benchmarks'].items()): + metrics = current['benchmarks'][name] + status_icon = { + 'regression': 'āš ļø', + 'warning': '⚔', + 'improvement': '✨', + 'stable': 'āœ“', + 'baseline': 'ā„¹ļø' + }.get(result['status'], '?') + + f.write(f"#### {status_icon} {name}\n\n") + f.write(f"- **Current**: {format_ns(result['current_ns'])}\n") + if result['avg_historical_ns']: + f.write(f"- **Historical Average**: {format_ns(result['avg_historical_ns'])}\n") + f.write(f"- **Change**: {result['change_percent']:+.1f}%\n") + f.write(f"- **Memory**: {metrics['bytes_per_op']} B/op\n") + f.write(f"- **Allocations**: {metrics['allocs_per_op']} allocs/op\n") + if result['status'] != 'baseline': + f.write(f"- **Status**: {result['message']}\n") + f.write("\n") + + f.write("
\n\n") - print(f"\n{status_icon} {name}") - print(f" Current: {format_ns(result['current_ns'])}") - if result['avg_historical_ns']: - print(f" Historical Avg: {format_ns(result['avg_historical_ns'])}") - print(f" Change: {result['change_percent']:+.1f}%") - print(f" Memory: {metrics['bytes_per_op']} B/op") - print(f" Allocations: {metrics['allocs_per_op']} allocs/op") - if result['status'] != 'baseline': - print(f" {result['message']}") + # Historical comparisons (in details tag) + f.write("
\n") + f.write("šŸ“‰ Historical Comparisons\n\n") + f.write("### Trend Analysis\n\n") + + # Group by status + regressions = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'regression'] + warnings = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'warning'] + improvements = [(name, res) for name, res in analysis['benchmarks'].items() if res['status'] == 'improvement'] + + if regressions: + f.write("#### āš ļø Regressions\n\n") + for name, res in regressions: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% slower (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + if warnings: + f.write("#### ⚔ Warnings\n\n") + for name, res in warnings: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% slower (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + if improvements: + f.write("#### ✨ Improvements\n\n") + for name, res in improvements: + f.write(f"- **{name}**: {res['change_percent']:+.1f}% faster (was {format_ns(res['avg_historical_ns'])}, now {format_ns(res['current_ns'])})\n") + f.write("\n") + + f.write("
\n\n") + + # Recommendations (always visible) + f.write("### šŸ’” Recommendations\n\n") + if summary['regressions'] > 0: + f.write("1. Review recent changes to the compilation pipeline\n") + f.write("2. Run `make bench-memory` to generate memory profiles\n") + f.write("3. Use `go tool pprof` to identify performance hotspots\n") + f.write("4. Compare with previous benchmark results using `benchstat`\n") + elif summary['warnings'] > 0: + f.write("1. Monitor the warned benchmarks closely in upcoming runs\n") + f.write("2. Consider running manual profiling if warnings persist\n") + elif summary['improvements'] > 0: + f.write("1. Document the changes that led to these improvements\n") + f.write("2. Consider applying similar optimizations to other areas\n") + else: + f.write("1. Continue monitoring performance daily\n") + f.write("2. Performance is stable - good work!\n") - print("\n" + "="*70) - print() + print("\nāœ… Markdown report generated at /tmp/gh-aw/benchmarks/report.md") if __name__ == '__main__': main() @@ -519,6 +628,11 @@ EOF chmod +x /tmp/gh-aw/benchmarks/generate_report.py python3 /tmp/gh-aw/benchmarks/generate_report.py + +# Display the generated markdown report +echo "" +echo "=== Generated Markdown Report ===" +cat /tmp/gh-aw/benchmarks/report.md ``` ## Success Criteria diff --git a/pkg/cli/templates/create-agentic-workflow.md b/pkg/cli/templates/create-agentic-workflow.md index 161444b155..1b31386fde 100644 --- a/pkg/cli/templates/create-agentic-workflow.md +++ b/pkg/cli/templates/create-agentic-workflow.md @@ -181,7 +181,7 @@ DO NOT ask all these questions at once; instead, engage in a back-and-forth conv - šŸ“‹ **DO NOT include other fields with good defaults** - Let the compiler use sensible defaults unless customization is needed. - Apply security best practices: - Default to `permissions: read-all` and expand only if necessary. - - Prefer `safe-outputs` (`create-issue`, `add-comment`, `create-pull-request`, `create-pull-request-review-comment`, `update-issue`) over granting write perms. + - Prefer `safe-outputs` (`create-issue`, `add-comment`, `create-pull-request`, `create-pull-request-review-comment`, `update-issue`, `dispatch-workflow`) over granting write perms. - For custom write operations to external services (email, Slack, webhooks), use `safe-outputs.jobs:` to create custom safe output jobs. - Constrain `network:` to the minimum required ecosystems/domains. - Use sanitized expressions (`${{ needs.activation.outputs.text }}`) instead of raw event text. diff --git a/pkg/cli/templates/create-shared-agentic-workflow.md b/pkg/cli/templates/create-shared-agentic-workflow.md index 76e0675728..577bc3660c 100644 --- a/pkg/cli/templates/create-shared-agentic-workflow.md +++ b/pkg/cli/templates/create-shared-agentic-workflow.md @@ -34,7 +34,7 @@ You are a conversational chat agent that interacts with the user to design secur **Move Write Operations to Safe Outputs** - Never grant direct write permissions in shared components - Use `safe-outputs:` configuration for all write operations -- Common safe outputs: `create-issue`, `add-comment`, `create-pull-request`, `update-issue` +- Common safe outputs: `create-issue`, `add-comment`, `create-pull-request`, `update-issue`, `dispatch-workflow` - Let consuming workflows decide which safe outputs to enable **Process Agent Output in Safe Jobs** diff --git a/pkg/cli/templates/github-agentic-workflows.md b/pkg/cli/templates/github-agentic-workflows.md index 0724608609..f350e65b03 100644 --- a/pkg/cli/templates/github-agentic-workflows.md +++ b/pkg/cli/templates/github-agentic-workflows.md @@ -568,6 +568,14 @@ The YAML frontmatter supports these fields: target-repo: "owner/repo" # Optional: cross-repository ``` Publishes workflow artifacts to an orphaned git branch for persistent storage. Default allowed extensions include common non-executable types. Maximum file size is 50MB (51200 KB). + - `dispatch-workflow:` - Trigger other workflows with inputs + ```yaml + safe-outputs: + dispatch-workflow: + workflows: [workflow-name] # Required: list of workflow names to allow + max: 3 # Optional: max dispatches (default: 1, max: 3) + ``` + Triggers other agentic workflows in the same repository using workflow_dispatch. Agent output includes `workflow_name` (without .md extension) and optional `inputs` (key-value pairs). Not supported for cross-repository operations. - `create-code-scanning-alert:` - Generate SARIF security advisories ```yaml safe-outputs: