diff --git a/.github/skills/azdo-helix-failures/SKILL.md b/.github/skills/azdo-helix-failures/SKILL.md new file mode 100644 index 00000000000000..d12c199dbdabb3 --- /dev/null +++ b/.github/skills/azdo-helix-failures/SKILL.md @@ -0,0 +1,118 @@ +--- +name: azdo-helix-failures +description: Retrieve and analyze test failures from Azure DevOps builds and Helix test runs for dotnet repositories. Use when investigating CI failures, debugging failing PRs, or given URLs containing dev.azure.com or helix.dot.net. +--- + +# Azure DevOps and Helix Failure Analysis + +Analyze CI test failures in Azure DevOps and Helix for dotnet repositories (runtime, sdk, aspnetcore, roslyn, and more). + +## When to Use This Skill + +Use this skill when: +- Investigating CI failures or checking why a PR's tests are failing +- Debugging Helix test issues or analyzing build errors +- Given URLs containing `dev.azure.com`, `helix.dot.net`, or GitHub PR links with failing checks +- Asked questions like "why is this PR failing", "analyze the CI failures", or "what's wrong with this build" + +## Quick Start + +```powershell +# Analyze PR failures (most common) - defaults to dotnet/runtime +./scripts/Get-HelixFailures.ps1 -PRNumber 123445 -ShowLogs + +# Analyze by build ID +./scripts/Get-HelixFailures.ps1 -BuildId 1276327 -ShowLogs + +# Query specific Helix work item +./scripts/Get-HelixFailures.ps1 -HelixJob "4b24b2c2-..." -WorkItem "System.Net.Http.Tests" + +# Other dotnet repositories +./scripts/Get-HelixFailures.ps1 -PRNumber 12345 -Repository "dotnet/aspnetcore" +./scripts/Get-HelixFailures.ps1 -PRNumber 67890 -Repository "dotnet/sdk" +./scripts/Get-HelixFailures.ps1 -PRNumber 11111 -Repository "dotnet/roslyn" +``` + +## Key Parameters + +| Parameter | Description | +|-----------|-------------| +| `-PRNumber` | GitHub PR number to analyze | +| `-BuildId` | Azure DevOps build ID | +| `-ShowLogs` | Fetch and display Helix console logs | +| `-Repository` | Target repo (default: dotnet/runtime) | +| `-MaxJobs` | Max failed jobs to show (default: 5) | +| `-SearchMihuBot` | Search MihuBot for related issues | + +## What the Script Does + +1. Fetches Build Analysis for known issues +2. Gets failed jobs from Azure DevOps timeline +3. **Separates canceled jobs from failed jobs** (canceled = dependency failures) +4. Extracts Helix work item failures +5. Fetches console logs (with `-ShowLogs`) +6. Searches for known issues with "Known Build Error" label +7. Correlates failures with PR changes +8. **Provides smart retry recommendations** + +## Interpreting Results + +**Known Issues section**: Failures matching existing GitHub issues - these are tracked and being investigated. + +**Canceled jobs**: Jobs that were canceled (not failed) due to earlier stage failures or timeouts. These don't need separate investigation. + +**PR Change Correlation**: Files changed by PR appearing in failures - likely PR-related. + +**Build errors**: Compilation failures need code fixes. + +**Helix failures**: Test failures on distributed infrastructure. + +**Local test failures**: Some repos (e.g., dotnet/sdk) run tests directly on build agents. These can also match known issues - search for the test name with the "Known Build Error" label. + +## Retry Recommendations + +The script provides a recommendation at the end: + +| Recommendation | Meaning | +|----------------|---------| +| **KNOWN ISSUES DETECTED** | Tracked issues found that may correlate with failures. Review details. | +| **LIKELY PR-RELATED** | Failures correlate with PR changes. Fix issues first. | +| **POSSIBLY TRANSIENT** | No clear cause - check main branch, search for issues. | +| **REVIEW REQUIRED** | Could not auto-determine cause. Manual review needed. | + +## Analysis Workflow + +1. **Read PR context first** - Check title, description, comments +2. **Run the script** with `-ShowLogs` for detailed failure info +3. **Check Build Analysis** - Known issues are safe to retry +4. **Correlate with PR changes** - Same files failing = likely PR-related +5. **Interpret patterns**: + - Same error across many jobs → Real code issue + - Device failures (iOS/Android/tvOS) → Often transient infrastructure + - Docker/container image pull failures → Infrastructure issue + - Network timeouts, "host not found" → Transient infrastructure + - Test timeout but tests passed → Executor issue, not test failure + +## Presenting Results + +The script provides a recommendation at the end, but this is based on heuristics and may be incomplete. Before presenting conclusions to the user: + +1. Review the detailed failure information, not just the summary +2. Look for patterns the script may have missed (e.g., related failures across jobs) +3. Consider the PR context (what files changed, what the PR is trying to do) +4. Present findings with appropriate caveats - state what is known vs. uncertain +5. If the script's recommendation seems inconsistent with the details, trust the details + +## References + +- **Helix artifacts & binlogs**: See [references/helix-artifacts.md](references/helix-artifacts.md) +- **Manual investigation steps**: See [references/manual-investigation.md](references/manual-investigation.md) +- **AzDO/Helix details**: See [references/azdo-helix-reference.md](references/azdo-helix-reference.md) + +## Tips + +1. Read PR description and comments first for context +2. Check if same test fails on main branch before assuming transient +3. Look for `[ActiveIssue]` attributes for known skipped tests +4. Use `-SearchMihuBot` for semantic search of related issues +5. Binlogs in artifacts help diagnose MSB4018 task failures diff --git a/.github/skills/azdo-helix-failures/references/azdo-helix-reference.md b/.github/skills/azdo-helix-failures/references/azdo-helix-reference.md new file mode 100644 index 00000000000000..df72252ba78860 --- /dev/null +++ b/.github/skills/azdo-helix-failures/references/azdo-helix-reference.md @@ -0,0 +1,93 @@ +# Azure DevOps and Helix Reference + +## Supported Repositories + +The script works with any dotnet repository that uses Azure DevOps and Helix: + +| Repository | Common Pipelines | +|------------|-----------------| +| `dotnet/runtime` | runtime, runtime-dev-innerloop, dotnet-linker-tests | +| `dotnet/sdk` | dotnet-sdk (mix of local and Helix tests) | +| `dotnet/aspnetcore` | aspnetcore-ci | +| `dotnet/roslyn` | roslyn-CI | +| `dotnet/maui` | maui-public | + +Use `-Repository` to specify the target: +```powershell +./scripts/Get-HelixFailures.ps1 -PRNumber 12345 -Repository "dotnet/aspnetcore" +``` + +## Build Definition IDs (Example: dotnet/runtime) + +Each repository has its own build definition IDs. Here are common ones for dotnet/runtime: + +| Definition ID | Name | Description | +|---------------|------|-------------| +| `129` | runtime | Main PR validation build | +| `133` | runtime-dev-innerloop | Fast innerloop validation | +| `139` | dotnet-linker-tests | ILLinker/trimming tests | + +**Note:** The script auto-discovers builds for a PR, so you rarely need to know definition IDs. + +## Azure DevOps Organizations + +**Public builds (default):** +- Organization: `dnceng-public` +- Project: `cbb18261-c48f-4abb-8651-8cdcb5474649` + +**Internal/private builds:** +- Organization: `dnceng` +- Project GUID: Varies by pipeline + +Override with: +```powershell +./scripts/Get-HelixFailures.ps1 -BuildId 1276327 -Organization "dnceng" -Project "internal-project-guid" +``` + +## Common Pipeline Names (Example: dotnet/runtime) + +| Pipeline | Description | +|----------|-------------| +| `runtime` | Main PR validation build | +| `runtime-dev-innerloop` | Fast innerloop validation | +| `dotnet-linker-tests` | ILLinker/trimming tests | +| `runtime-wasm-perf` | WASM performance tests | +| `runtime-libraries enterprise-linux` | Enterprise Linux compatibility | + +Other repos have different pipelines - the script discovers them automatically from the PR. + +## Useful Links + +- [Helix Portal](https://helix.dot.net/): View Helix jobs and work items (all repos) +- [Helix API Documentation](https://helix.dot.net/swagger/): Swagger docs for Helix REST API +- [Build Analysis](https://github.com/dotnet/arcade/blob/main/Documentation/Projects/Build%20Analysis/LandingPage.md): Known issues tracking (arcade infrastructure) +- [dnceng-public AzDO](https://dev.azure.com/dnceng-public/public/_build): Public builds for all dotnet repos + +### Repository-specific docs: +- [runtime: Triaging Failures](https://github.com/dotnet/runtime/blob/main/docs/workflow/ci/triaging-failures.md) +- [runtime: Area Owners](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) + +## Test Execution Types + +### Helix Tests +Tests run on Helix distributed test infrastructure. The script extracts console log URLs and can fetch detailed failure info with `-ShowLogs`. + +### Local Tests (Non-Helix) +Some repositories (e.g., dotnet/sdk) run tests directly on the build agent. The script detects these and extracts Azure DevOps Test Run URLs. + +## Known Issue Labels + +- `Known Build Error` - Used by Build Analysis across all dotnet repositories +- Search syntax: `repo:/ is:issue is:open label:"Known Build Error" ` + +Example searches: +```bash +# Search in runtime +gh issue list --repo dotnet/runtime --label "Known Build Error" --search "FileSystemWatcher" + +# Search in aspnetcore +gh issue list --repo dotnet/aspnetcore --label "Known Build Error" --search "Blazor" + +# Search in sdk +gh issue list --repo dotnet/sdk --label "Known Build Error" --search "template" +``` diff --git a/.github/skills/azdo-helix-failures/references/helix-artifacts.md b/.github/skills/azdo-helix-failures/references/helix-artifacts.md new file mode 100644 index 00000000000000..d2d84de4c88533 --- /dev/null +++ b/.github/skills/azdo-helix-failures/references/helix-artifacts.md @@ -0,0 +1,184 @@ +# Helix Work Item Artifacts + +Guide to finding and analyzing artifacts from Helix test runs. + +## Accessing Artifacts + +### Via the Script + +Query a specific work item to see its artifacts: + +```powershell +./scripts/Get-HelixFailures.ps1 -HelixJob "4b24b2c2-..." -WorkItem "Microsoft.NET.Sdk.Tests.dll.1" -ShowLogs +``` + +### Via API + +```bash +# Get work item details including Files array +curl -s "https://helix.dot.net/api/2019-06-17/jobs/{jobId}/workitems/{workItemName}" +``` + +The `Files` array contains artifacts with `FileName` and `Uri` properties. + +## Artifact Availability Varies + +**Not all test types produce the same artifacts.** What you see depends on the repo, test type, and configuration: + +- **Build/publish tests** (SDK, WASM) → Multiple binlogs +- **AOT compilation tests** (iOS/Android) → `AOTBuild.binlog` plus device logs +- **Standard unit tests** → Console logs only, no binlogs +- **Crash failures** (exit code 134) → Core dumps may be present + +Always query the specific work item to see what's available rather than assuming a fixed structure. + +## Common Artifact Patterns + +| File Pattern | Purpose | When Useful | +|--------------|---------|-------------| +| `*.binlog` | MSBuild binary logs | AOT/build failures, MSB4018 errors | +| `console.*.log` | Console output | Always available, general output | +| `run-*.log` | XHarness execution logs | Mobile test failures | +| `device-*.log` | Device-specific logs | iOS/Android device issues | +| `dotnetTestLog.*.log` | dotnet test output | Test framework issues | +| `vstest.*.log` | VSTest output | aspnetcore/SDK test issues | +| `core.*`, `*.dmp` | Core dumps | Crashes, hangs | +| `testResults.xml` | Test results | Detailed pass/fail info | + +Artifacts may be at the root level or nested in subdirectories like `xharness-output/logs/`. + +## Binlog Files + +Binlogs are **only present for tests that invoke MSBuild** (build/publish tests, AOT compilation). Standard unit tests don't produce binlogs. + +### Common Names + +| File | Description | +|------|-------------| +| `build.msbuild.binlog` | Build phase | +| `publish.msbuild.binlog` | Publish phase | +| `AOTBuild.binlog` | AOT compilation | +| `msbuild.binlog` | General MSBuild operations | +| `msbuild0.binlog`, `msbuild1.binlog` | Per-test-run logs (numbered) | + +### Analyzing Binlogs + +**Online viewer (no download):** +1. Copy the binlog URI from the script output +2. Go to https://live.msbuildlog.com/ +3. Paste the URL to load and analyze + +**Download and view locally:** +```bash +curl -o build.binlog "https://helix.dot.net/api/jobs/{jobId}/workitems/{workItem}/files/build.msbuild.binlog?api-version=2019-06-17" +# Open with MSBuild Structured Log Viewer +``` + +**AI-assisted analysis:** +Use the MSBuild MCP server to analyze binlogs for errors and warnings. + +## Core Dumps + +Core dumps appear when tests crash (typically exit code 134 on Linux/macOS): + +``` +core.1000.34 # Format: core.{uid}.{pid} +``` + +## Mobile Test Artifacts (iOS/Android) + +Mobile device tests typically include XHarness orchestration logs: + +- `run-ios-device.log` / `run-android.log` - Execution log +- `device-{machine}-*.log` - Device output +- `list-ios-device-*.log` - Device discovery +- `AOTBuild.binlog` - AOT compilation (when applicable) +- `*.crash` - iOS crash reports + +## Finding the Right Work Item + +1. Run the script with `-ShowLogs` to see Helix job/work item info +2. Look for lines like: + ``` + Helix Job: 4b24b2c2-ad5a-4c46-8a84-844be03b1d51 + Work Item: Microsoft.NET.Sdk.Tests.dll.1 + ``` +3. Query that specific work item for full artifact list + +## AzDO Build Artifacts (Pre-Helix) + +Helix work items contain artifacts from **test execution**. But there's another source of binlogs: **AzDO build artifacts** from the build phase before tests are sent to Helix. + +### When to Use Build Artifacts + +- Failed work item has no binlogs (unit tests don't produce them) +- You need to see how tests were **built**, not how they **executed** +- Investigating build/restore issues that happen before Helix + +### Listing Build Artifacts + +```powershell +# List all artifacts for a build +$org = "dnceng-public" +$project = "public" +$buildId = 1280125 + +$url = "https://dev.azure.com/$org/$project/_apis/build/builds/$buildId/artifacts?api-version=5.0" +$artifacts = (Invoke-RestMethod -Uri $url).value + +# Show artifacts with sizes +$artifacts | ForEach-Object { + $sizeMB = [math]::Round($_.resource.properties.artifactsize / 1MB, 2) + Write-Host "$($_.name) - $sizeMB MB" +} +``` + +### Common Build Artifacts + +| Artifact Pattern | Contents | Size | +|------------------|----------|------| +| `TestBuild_*` | Test build outputs + binlogs | 30-100 MB | +| `BuildConfiguration` | Build config metadata | <1 MB | +| `TemplateEngine_*` | Template engine outputs | ~40 MB | +| `AoT_*` | AOT compilation outputs | ~3 MB | +| `FullFramework_*` | .NET Framework test outputs | ~40 MB | + +### Downloading and Finding Binlogs + +```powershell +# Download a specific artifact +$artifactName = "TestBuild_linux_x64" +$downloadUrl = "https://dev.azure.com/$org/$project/_apis/build/builds/$buildId/artifacts?artifactName=$artifactName&api-version=5.0&`$format=zip" +$zipPath = "$env:TEMP\$artifactName.zip" +$extractPath = "$env:TEMP\$artifactName" + +Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath +Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force + +# Find binlogs +Get-ChildItem -Path $extractPath -Filter "*.binlog" -Recurse | ForEach-Object { + $sizeMB = [math]::Round($_.Length / 1MB, 2) + Write-Host "$($_.Name) ($sizeMB MB) - $($_.FullName)" +} +``` + +### Typical Binlogs in Build Artifacts + +| File | Description | +|------|-------------| +| `log/Release/Build.binlog` | Main build log | +| `log/Release/TestBuildTests.binlog` | Test build verification | +| `log/Release/ToolsetRestore.binlog` | Toolset restore | + +### Build vs Helix Binlogs + +| Source | When Generated | What It Shows | +|--------|----------------|---------------| +| AzDO build artifacts | During CI build phase | How tests were compiled/packaged | +| Helix work item artifacts | During test execution | What happened when tests ran `dotnet build` etc. | + +If a test runs `dotnet build` internally (like SDK end-to-end tests), both sources may have relevant binlogs. + +## Artifact Retention + +Helix artifacts are retained for a limited time (typically 30 days). Download important artifacts promptly if needed for long-term analysis. diff --git a/.github/skills/azdo-helix-failures/references/manual-investigation.md b/.github/skills/azdo-helix-failures/references/manual-investigation.md new file mode 100644 index 00000000000000..ea3e82fb589198 --- /dev/null +++ b/.github/skills/azdo-helix-failures/references/manual-investigation.md @@ -0,0 +1,98 @@ +# Manual Investigation Guide + +If the script doesn't provide enough information, use these manual investigation steps. + +## Table of Contents +- [Get Build Timeline](#get-build-timeline) +- [Find Helix Tasks](#find-helix-tasks) +- [Get Build Logs](#get-build-logs) +- [Query Helix APIs](#query-helix-apis) +- [Download Artifacts](#download-artifacts) +- [Analyze Binlogs](#analyze-binlogs) +- [Extract Environment Variables](#extract-environment-variables) + +## Get Build Timeline + +```powershell +$buildId = 1276327 +$response = Invoke-RestMethod -Uri "https://dev.azure.com/dnceng-public/cbb18261-c48f-4abb-8651-8cdcb5474649/_apis/build/builds/$buildId/timeline?api-version=7.0" +$failedJobs = $response.records | Where-Object { $_.type -eq "Job" -and $_.result -eq "failed" } +$failedJobs | Select-Object id, name, result | Format-Table +``` + +## Find Helix Tasks + +```powershell +$jobId = "90274d9a-fbd8-54f8-6a7d-8dfc4e2f6f3f" # From timeline +$helixTasks = $response.records | Where-Object { $_.parentId -eq $jobId -and $_.name -like "*Helix*" } +$helixTasks | Select-Object id, name, result, log | Format-Table +``` + +## Get Build Logs + +```powershell +$logId = 565 # From task.log.id +$logContent = Invoke-RestMethod -Uri "https://dev.azure.com/dnceng-public/cbb18261-c48f-4abb-8651-8cdcb5474649/_apis/build/builds/$buildId/logs/${logId}?api-version=7.0" +$logContent | Select-String -Pattern "error|FAIL" -Context 2,5 +``` + +## Query Helix APIs + +```bash +# Get job details +curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID" + +# List work items +curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems" + +# Get work item details +curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME" + +# Get console log +curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME/console" +``` + +## Download Artifacts + +```powershell +$workItem = Invoke-RestMethod -Uri "https://helix.dot.net/api/2019-06-17/jobs/$jobId/workitems/$workItemName" +$workItem.Files | ForEach-Object { Write-Host "$($_.FileName): $($_.Uri)" } +``` + +Common artifacts: +- `console.*.log` - Console output +- `*.binlog` - MSBuild binary logs +- `run-*.log` - XHarness/test runner logs +- Core dumps and crash reports + +## Analyze Binlogs + +Binlogs contain detailed MSBuild execution traces for diagnosing: +- AOT compilation failures +- Static web asset issues +- NuGet restore problems +- Target execution order issues + +**Using MSBuild MCP Server:** +``` +msbuild-mcp analyze --binlog path/to/build.binlog --errors +msbuild-mcp analyze --binlog path/to/build.binlog --target ResolveReferences +``` + +**Manual Analysis:** +Use [MSBuild Structured Log Viewer](https://msbuildlog.com/) or https://live.msbuildlog.com/ + +## Extract Environment Variables + +```bash +curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME/console" | grep "DOTNET_" +``` + +Example output: +``` +DOTNET_JitStress=1 +DOTNET_TieredCompilation=0 +DOTNET_GCStress=0xC +``` + +These are critical for reproducing failures locally. diff --git a/.github/skills/azdo-helix-failures/scripts/Get-HelixFailures.ps1 b/.github/skills/azdo-helix-failures/scripts/Get-HelixFailures.ps1 new file mode 100644 index 00000000000000..26e610a359d730 --- /dev/null +++ b/.github/skills/azdo-helix-failures/scripts/Get-HelixFailures.ps1 @@ -0,0 +1,2101 @@ +<# +.SYNOPSIS + Retrieves test failures from Azure DevOps builds and Helix test runs. + +.DESCRIPTION + This script queries Azure DevOps for failed jobs in a build and retrieves + the corresponding Helix console logs to show detailed test failure information. + It can also directly query a specific Helix job and work item. + +.PARAMETER BuildId + The Azure DevOps build ID to query. + +.PARAMETER PRNumber + The GitHub PR number to find the associated build. + +.PARAMETER HelixJob + The Helix job ID (GUID) to query directly. + +.PARAMETER WorkItem + The Helix work item name to query (requires -HelixJob). + +.PARAMETER Repository + The GitHub repository (owner/repo format). Default: dotnet/runtime + +.PARAMETER Organization + The Azure DevOps organization. Default: dnceng-public + +.PARAMETER Project + The Azure DevOps project GUID. Default: cbb18261-c48f-4abb-8651-8cdcb5474649 + +.PARAMETER ShowLogs + If specified, fetches and displays the Helix console logs for failed tests. + +.PARAMETER MaxJobs + Maximum number of failed jobs to process. Default: 5 + +.PARAMETER MaxFailureLines + Maximum number of lines to capture per test failure. Default: 50 + +.PARAMETER TimeoutSec + Timeout in seconds for API calls. Default: 30 + +.PARAMETER ContextLines + Number of context lines to show before errors. Default: 0 + +.PARAMETER NoCache + Bypass cache and fetch fresh data for all API calls. + +.PARAMETER CacheTTLSeconds + Cache lifetime in seconds. Default: 30 + +.PARAMETER ClearCache + Clear all cached files and exit. + +.PARAMETER ContinueOnError + Continue processing remaining jobs if an API call fails, showing partial results. + +.PARAMETER SearchMihuBot + Search MihuBot's semantic database for related issues and discussions. + Uses https://mihubot.xyz/mcp to find conceptually related issues across dotnet repositories. + +.PARAMETER FindBinlogs + Scan work items in a Helix job to find which ones contain MSBuild binlog files. + Useful when the failed work item doesn't have binlogs (e.g., unit tests) but you need + to find related build tests that do have binlogs for deeper analysis. + +.EXAMPLE + .\Get-HelixFailures.ps1 -BuildId 1276327 + +.EXAMPLE + .\Get-HelixFailures.ps1 -PRNumber 123445 -ShowLogs + +.EXAMPLE + .\Get-HelixFailures.ps1 -PRNumber 123445 -Repository dotnet/aspnetcore + +.EXAMPLE + .\Get-HelixFailures.ps1 -HelixJob "4b24b2c2-ad5a-4c46-8a84-844be03b1d51" -WorkItem "iOS.Device.Aot.Test" + +.EXAMPLE + .\Get-HelixFailures.ps1 -BuildId 1276327 -SearchMihuBot + +.EXAMPLE + .\Get-HelixFailures.ps1 -HelixJob "4b24b2c2-ad5a-4c46-8a84-844be03b1d51" -FindBinlogs + # Scans work items to find which ones contain MSBuild binlog files + +.EXAMPLE + .\Get-HelixFailures.ps1 -ClearCache +#> + +[CmdletBinding(DefaultParameterSetName = 'BuildId')] +param( + [Parameter(ParameterSetName = 'BuildId', Mandatory = $true)] + [int]$BuildId, + + [Parameter(ParameterSetName = 'PRNumber', Mandatory = $true)] + [int]$PRNumber, + + [Parameter(ParameterSetName = 'HelixJob', Mandatory = $true)] + [string]$HelixJob, + + [Parameter(ParameterSetName = 'HelixJob')] + [string]$WorkItem, + + [Parameter(ParameterSetName = 'ClearCache', Mandatory = $true)] + [switch]$ClearCache, + + [string]$Repository = "dotnet/runtime", + [string]$Organization = "dnceng-public", + [string]$Project = "cbb18261-c48f-4abb-8651-8cdcb5474649", + [switch]$ShowLogs, + [int]$MaxJobs = 5, + [int]$MaxFailureLines = 50, + [int]$TimeoutSec = 30, + [int]$ContextLines = 0, + [switch]$NoCache, + [int]$CacheTTLSeconds = 30, + [switch]$ContinueOnError, + [switch]$SearchMihuBot, + [switch]$FindBinlogs +) + +$ErrorActionPreference = "Stop" + +#region Caching Functions + +# Cross-platform temp directory detection +function Get-TempDirectory { + # Try common environment variables in order of preference + $tempPath = $env:TEMP + if (-not $tempPath) { $tempPath = $env:TMP } + if (-not $tempPath) { $tempPath = $env:TMPDIR } # macOS + if (-not $tempPath -and $IsLinux) { $tempPath = "/tmp" } + if (-not $tempPath -and $IsMacOS) { $tempPath = "/tmp" } + if (-not $tempPath) { + # Fallback: use .cache in user's home directory + $home = $env:HOME + if (-not $home) { $home = $env:USERPROFILE } + if ($home) { + $tempPath = Join-Path $home ".cache" + if (-not (Test-Path $tempPath)) { + New-Item -ItemType Directory -Path $tempPath -Force | Out-Null + } + } + } + if (-not $tempPath) { + throw "Could not determine temp directory. Set TEMP, TMP, or TMPDIR environment variable." + } + return $tempPath +} + +$script:TempDir = Get-TempDirectory + +# Handle -ClearCache parameter +if ($ClearCache) { + $cacheDir = Join-Path $script:TempDir "helix-failures-cache" + if (Test-Path $cacheDir) { + $files = Get-ChildItem -Path $cacheDir -File + $count = $files.Count + Remove-Item -Path $cacheDir -Recurse -Force + Write-Host "Cleared $count cached files from $cacheDir" -ForegroundColor Green + } + else { + Write-Host "Cache directory does not exist: $cacheDir" -ForegroundColor Yellow + } + exit 0 +} + +# Setup caching +$script:CacheDir = Join-Path $script:TempDir "helix-failures-cache" +if (-not (Test-Path $script:CacheDir)) { + New-Item -ItemType Directory -Path $script:CacheDir -Force | Out-Null +} + +# Clean up expired cache files on startup (files older than 2x TTL) +function Clear-ExpiredCache { + param([int]$TTLSeconds = $CacheTTLSeconds) + + $maxAge = $TTLSeconds * 2 + $cutoff = (Get-Date).AddSeconds(-$maxAge) + + Get-ChildItem -Path $script:CacheDir -File -ErrorAction SilentlyContinue | Where-Object { + $_.LastWriteTime -lt $cutoff + } | ForEach-Object { + Write-Verbose "Removing expired cache file: $($_.Name)" + try { + Remove-Item $_.FullName -Force -ErrorAction Stop + } + catch { + Write-Verbose "Failed to remove cache file '$($_.Name)': $($_.Exception.Message)" + } + } +} + +# Run cache cleanup at startup (non-blocking) +if (-not $NoCache) { + Clear-ExpiredCache -TTLSeconds $CacheTTLSeconds +} + +function Get-UrlHash { + param([string]$Url) + + $sha256 = [System.Security.Cryptography.SHA256]::Create() + try { + return [System.BitConverter]::ToString( + $sha256.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($Url)) + ).Replace("-", "") + } + finally { + $sha256.Dispose() + } +} + +function Get-CachedResponse { + param( + [string]$Url, + [int]$TTLSeconds = $CacheTTLSeconds + ) + + if ($NoCache) { return $null } + + $hash = Get-UrlHash -Url $Url + $cacheFile = Join-Path $script:CacheDir "$hash.json" + + if (Test-Path $cacheFile) { + $cacheInfo = Get-Item $cacheFile + $age = (Get-Date) - $cacheInfo.LastWriteTime + + if ($age.TotalSeconds -lt $TTLSeconds) { + Write-Verbose "Cache hit for $Url (age: $([int]$age.TotalSeconds) sec)" + return Get-Content $cacheFile -Raw + } + else { + Write-Verbose "Cache expired for $Url" + } + } + + return $null +} + +function Set-CachedResponse { + param( + [string]$Url, + [string]$Content + ) + + if ($NoCache) { return } + + $hash = Get-UrlHash -Url $Url + $cacheFile = Join-Path $script:CacheDir "$hash.json" + + # Use atomic write: write to temp file, then rename + $tempFile = Join-Path $script:CacheDir "$hash.tmp.$([System.Guid]::NewGuid().ToString('N'))" + try { + $Content | Set-Content -LiteralPath $tempFile -Force + Move-Item -LiteralPath $tempFile -Destination $cacheFile -Force + Write-Verbose "Cached response for $Url" + } + catch { + # Clean up temp file on failure + if (Test-Path $tempFile) { + Remove-Item -LiteralPath $tempFile -Force -ErrorAction SilentlyContinue + } + Write-Verbose "Failed to cache response: $_" + } +} + +function Invoke-CachedRestMethod { + param( + [string]$Uri, + [int]$TimeoutSec = 30, + [switch]$AsJson, + [switch]$SkipCache, + [switch]$SkipCacheWrite + ) + + # Check cache first (unless skipping) + if (-not $SkipCache) { + $cached = Get-CachedResponse -Url $Uri + if ($cached) { + if ($AsJson) { + try { + return $cached | ConvertFrom-Json -ErrorAction Stop + } + catch { + Write-Verbose "Failed to parse cached response as JSON, treating as cache miss: $_" + } + } + else { + return $cached + } + } + } + + # Make the actual request + Write-Verbose "GET $Uri" + $response = Invoke-RestMethod -Uri $Uri -Method Get -TimeoutSec $TimeoutSec + + # Cache the response (unless skipping write) + if (-not $SkipCache -and -not $SkipCacheWrite) { + if ($AsJson -or $response -is [PSCustomObject]) { + $content = $response | ConvertTo-Json -Depth 100 -Compress + Set-CachedResponse -Url $Uri -Content $content + } + else { + Set-CachedResponse -Url $Uri -Content $response + } + } + + return $response +} + +#endregion Caching Functions + +#region Validation Functions + +function Test-RepositoryFormat { + param([string]$Repo) + + # Validate repository format to prevent command injection + $repoPattern = '^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$' + if ($Repo -notmatch $repoPattern) { + throw "Invalid repository format '$Repo'. Expected 'owner/repo' (e.g., 'dotnet/runtime')." + } + return $true +} + +function Get-SafeSearchTerm { + param([string]$Term) + + # Sanitize search term to avoid passing unsafe characters to gh CLI + # Keep: alphanumeric, spaces, dots, hyphens, colons (for namespaces like System.Net), + # and slashes (for paths). These are safe for GitHub search and common in .NET names. + $safeTerm = $Term -replace '[^\w\s\-.:/]', '' + return $safeTerm.Trim() +} + +#endregion Validation Functions + +#region Azure DevOps API Functions + +function Get-AzDOBuildIdFromPR { + param([int]$PR) + + # Check for gh CLI dependency + if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { + throw "GitHub CLI (gh) is required for PR lookup. Install from https://cli.github.com/ or use -BuildId instead." + } + + # Validate repository format + Test-RepositoryFormat -Repo $Repository | Out-Null + + Write-Host "Finding builds for PR #$PR in $Repository..." -ForegroundColor Cyan + Write-Verbose "Running: gh pr checks $PR --repo $Repository" + + # Use gh cli to get the checks with splatted arguments + $checksOutput = & gh pr checks $PR --repo $Repository 2>&1 + + # Find ALL failing Azure DevOps builds + $failingBuilds = @{} + foreach ($line in $checksOutput) { + if ($line -match 'fail.*buildId=(\d+)') { + $buildId = $Matches[1] + # Extract pipeline name (first column before 'fail') + $pipelineName = ($line -split '\s+fail')[0].Trim() + if (-not $failingBuilds.ContainsKey($buildId)) { + $failingBuilds[$buildId] = $pipelineName + } + } + } + + if ($failingBuilds.Count -eq 0) { + # No failing builds - try to find any build + $anyBuild = $checksOutput | Select-String -Pattern "buildId=(\d+)" | Select-Object -First 1 + if ($anyBuild) { + $anyBuildMatch = [regex]::Match($anyBuild.ToString(), "buildId=(\d+)") + if ($anyBuildMatch.Success) { + $buildIdStr = $anyBuildMatch.Groups[1].Value + $buildIdInt = 0 + if ([int]::TryParse($buildIdStr, [ref]$buildIdInt)) { + return @($buildIdInt) + } + } + } + throw "Could not find Azure DevOps build for PR #$PR in $Repository" + } + + # Return all unique failing build IDs + $buildIds = $failingBuilds.Keys | ForEach-Object { [int]$_ } | Sort-Object -Unique + + if ($buildIds.Count -gt 1) { + Write-Host "Found $($buildIds.Count) failing builds:" -ForegroundColor Yellow + foreach ($id in $buildIds) { + Write-Host " - Build $id ($($failingBuilds[$id.ToString()]))" -ForegroundColor Gray + } + } + + return $buildIds +} + +function Get-BuildAnalysisKnownIssues { + param([int]$PR) + + # Check for gh CLI dependency + if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { + Write-Verbose "GitHub CLI (gh) not available for Build Analysis check" + return @() + } + + Write-Verbose "Fetching Build Analysis check for PR #$PR..." + + try { + # Get the head commit SHA for the PR + $headSha = gh pr view $PR --repo $Repository --json headRefOid --jq '.headRefOid' 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Verbose "Failed to get PR head SHA: $headSha" + return @() + } + + # Validate headSha is a valid git SHA (40 hex characters) + if ($headSha -notmatch '^[a-fA-F0-9]{40}$') { + Write-Verbose "Invalid head SHA format: $headSha" + return @() + } + + # Get the Build Analysis check run + $checkRuns = gh api "repos/$Repository/commits/$headSha/check-runs" --jq '.check_runs[] | select(.name == "Build Analysis") | .output' 2>&1 + if ($LASTEXITCODE -ne 0 -or -not $checkRuns) { + Write-Verbose "No Build Analysis check found" + return @() + } + + $output = $checkRuns | ConvertFrom-Json -ErrorAction SilentlyContinue + if (-not $output -or -not $output.text) { + Write-Verbose "Build Analysis check has no output text" + return @() + } + + # Parse known issues from the output text + # Format: Issue Title + $knownIssues = @() + $issuePattern = '([^<]+)' + $matches = [regex]::Matches($output.text, $issuePattern) + + foreach ($match in $matches) { + $issueUrl = $match.Groups[1].Value + $issueNumber = $match.Groups[2].Value + $issueTitle = $match.Groups[3].Value + + # Avoid duplicates + if (-not ($knownIssues | Where-Object { $_.Number -eq $issueNumber })) { + $knownIssues += @{ + Number = $issueNumber + Url = $issueUrl + Title = $issueTitle + } + } + } + + if ($knownIssues.Count -gt 0) { + Write-Host "`nBuild Analysis found $($knownIssues.Count) known issue(s):" -ForegroundColor Yellow + foreach ($issue in $knownIssues) { + Write-Host " - #$($issue.Number): $($issue.Title)" -ForegroundColor Gray + Write-Host " $($issue.Url)" -ForegroundColor DarkGray + } + } + + return $knownIssues + } + catch { + Write-Verbose "Error fetching Build Analysis: $_" + return @() + } +} + +function Get-PRChangedFiles { + param( + [int]$PR, + [int]$MaxFiles = 100 + ) + + # Check for gh CLI dependency + if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { + Write-Verbose "GitHub CLI (gh) not available for PR file lookup" + return @() + } + + Write-Verbose "Fetching changed files for PR #$PR..." + + try { + # Get the file count first to avoid fetching huge PRs + $fileCount = gh pr view $PR --repo $Repository --json files --jq '.files | length' 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Verbose "Failed to get PR file count: $fileCount" + return @() + } + + $count = [int]$fileCount + if ($count -gt $MaxFiles) { + Write-Verbose "PR has $count files (exceeds limit of $MaxFiles) - skipping correlation" + Write-Host "PR has $count changed files - skipping detailed correlation (limit: $MaxFiles)" -ForegroundColor Gray + return @() + } + + # Get the list of changed files + $filesJson = gh pr view $PR --repo $Repository --json files --jq '.files[].path' 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Verbose "Failed to get PR files: $filesJson" + return @() + } + + $files = $filesJson -split "`n" | Where-Object { $_ } + return $files + } + catch { + Write-Verbose "Error fetching PR files: $_" + return @() + } +} + +function Get-PRCorrelation { + param( + [array]$ChangedFiles, + [string]$FailureInfo + ) + + # Extract potential file/test names from the failure info + $correlations = @() + + foreach ($file in $ChangedFiles) { + $fileName = [System.IO.Path]::GetFileNameWithoutExtension($file) + $fileNameWithExt = [System.IO.Path]::GetFileName($file) + + # Check if the failure mentions this file + if ($FailureInfo -match [regex]::Escape($fileName) -or + $FailureInfo -match [regex]::Escape($fileNameWithExt)) { + $correlations += @{ + File = $file + MatchType = "direct" + } + } + + # Check for test file patterns + if ($file -match '\.Tests?\.' -or $file -match '/tests?/' -or $file -match '\\tests?\\') { + # This is a test file - check if the test name appears in failures + if ($FailureInfo -match [regex]::Escape($fileName)) { + $correlations += @{ + File = $file + MatchType = "test" + } + } + } + } + + return $correlations | Select-Object -Unique -Property File, MatchType +} + +function Show-PRCorrelationSummary { + param( + [array]$ChangedFiles, + [array]$AllFailures + ) + + if ($ChangedFiles.Count -eq 0) { + return + } + + # Combine all failure info into searchable text + $failureText = ($AllFailures | ForEach-Object { + $_.TaskName + $_.JobName + $_.Errors -join "`n" + $_.HelixLogs -join "`n" + $_.FailedTests -join "`n" + }) -join "`n" + + # Also include the raw local test failure messages which may contain test class names + # These come from the "issues" property on local failures + + # Find correlations + $correlatedFiles = @() + $testFiles = @() + + foreach ($file in $ChangedFiles) { + $fileName = [System.IO.Path]::GetFileNameWithoutExtension($file) + $fileNameWithExt = [System.IO.Path]::GetFileName($file) + + # For files like NtAuthTests.FakeServer.cs, also check NtAuthTests + $baseTestName = $fileName -replace '\.[^.]+$', '' # Remove .FakeServer etc. + + # Check if this file appears in any failure + $isCorrelated = $false + + if ($failureText -match [regex]::Escape($fileName) -or + $failureText -match [regex]::Escape($fileNameWithExt) -or + $failureText -match [regex]::Escape($file) -or + ($baseTestName -and $failureText -match [regex]::Escape($baseTestName))) { + $isCorrelated = $true + } + + # Track test files separately + $isTestFile = $file -match '\.Tests?\.' -or $file -match '[/\\]tests?[/\\]' -or $file -match 'Test\.cs$' -or $file -match 'Tests\.cs$' + + if ($isCorrelated) { + if ($isTestFile) { + $testFiles += $file + } else { + $correlatedFiles += $file + } + } + } + + # Show results + if ($correlatedFiles.Count -gt 0 -or $testFiles.Count -gt 0) { + Write-Host "`n=== PR Change Correlation ===" -ForegroundColor Magenta + + if ($testFiles.Count -gt 0) { + Write-Host "⚠️ Test files changed by this PR are failing:" -ForegroundColor Yellow + $shown = 0 + foreach ($file in $testFiles) { + if ($shown -ge 10) { + Write-Host " ... and $($testFiles.Count - 10) more test files" -ForegroundColor Gray + break + } + Write-Host " $file" -ForegroundColor Red + $shown++ + } + } + + if ($correlatedFiles.Count -gt 0) { + Write-Host "⚠️ Files changed by this PR appear in failures:" -ForegroundColor Yellow + $shown = 0 + foreach ($file in $correlatedFiles) { + if ($shown -ge 10) { + Write-Host " ... and $($correlatedFiles.Count - 10) more files" -ForegroundColor Gray + break + } + Write-Host " $file" -ForegroundColor Red + $shown++ + } + } + + Write-Host "`nThese failures are likely PR-related." -ForegroundColor Yellow + } +} + +function Get-AzDOBuildStatus { + param([int]$Build) + + $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/${Build}?api-version=7.0" + + try { + # First check cache to see if we have a completed status + $cached = Get-CachedResponse -Url $url + if ($cached) { + $cachedData = $cached | ConvertFrom-Json + # Only use cache if build was completed - in-progress status goes stale quickly + if ($cachedData.status -eq "completed") { + return @{ + Status = $cachedData.status + Result = $cachedData.result + StartTime = $cachedData.startTime + FinishTime = $cachedData.finishTime + } + } + Write-Verbose "Skipping cached in-progress build status" + } + + # Fetch fresh status + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson -SkipCache + + # Only cache if completed + if ($response.status -eq "completed") { + $content = $response | ConvertTo-Json -Depth 10 -Compress + Set-CachedResponse -Url $url -Content $content + } + + return @{ + Status = $response.status # notStarted, inProgress, completed + Result = $response.result # succeeded, failed, canceled (only set when completed) + StartTime = $response.startTime + FinishTime = $response.finishTime + } + } + catch { + Write-Verbose "Failed to fetch build status: $_" + return $null + } +} + +function Get-AzDOTimeline { + param( + [int]$Build, + [switch]$BuildInProgress + ) + + $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/$Build/timeline?api-version=7.0" + Write-Host "Fetching build timeline..." -ForegroundColor Cyan + + try { + # Don't cache timeline for in-progress builds - it changes as jobs complete + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson -SkipCacheWrite:$BuildInProgress + return $response + } + catch { + if ($ContinueOnError) { + Write-Warning "Failed to fetch build timeline: $_" + return $null + } + throw "Failed to fetch build timeline: $_" + } +} + +function Get-FailedJobs { + param($Timeline) + + if ($null -eq $Timeline -or $null -eq $Timeline.records) { + return @() + } + + $failedJobs = $Timeline.records | Where-Object { + $_.type -eq "Job" -and $_.result -eq "failed" + } + + return $failedJobs +} + +function Get-CanceledJobs { + param($Timeline) + + if ($null -eq $Timeline -or $null -eq $Timeline.records) { + return @() + } + + $canceledJobs = $Timeline.records | Where-Object { + $_.type -eq "Job" -and $_.result -eq "canceled" + } + + return $canceledJobs +} + +function Get-HelixJobInfo { + param($Timeline, $JobId) + + if ($null -eq $Timeline -or $null -eq $Timeline.records) { + return @() + } + + # Find tasks in this job that mention Helix + $helixTasks = $Timeline.records | Where-Object { + $_.parentId -eq $JobId -and + $_.name -like "*Helix*" -and + $_.result -eq "failed" + } + + return $helixTasks +} + +function Get-BuildLog { + param([int]$Build, [int]$LogId) + + $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/$Build/logs/${LogId}?api-version=7.0" + + try { + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec + return $response + } + catch { + Write-Warning "Failed to fetch log ${LogId}: $_" + return $null + } +} + +#endregion Azure DevOps API Functions + +#region Log Parsing Functions + +function Extract-HelixUrls { + param([string]$LogContent) + + $urls = @() + + # First, normalize the content by removing line breaks that might split URLs + $normalizedContent = $LogContent -replace "`r`n", "" -replace "`n", "" + + # Match Helix console log URLs - workitem names can contain dots, dashes, and other chars + $urlMatches = [regex]::Matches($normalizedContent, 'https://helix\.dot\.net/api/[^/]+/jobs/[a-f0-9-]+/workitems/[^/\s]+/console') + foreach ($match in $urlMatches) { + $urls += $match.Value + } + + Write-Verbose "Found $($urls.Count) Helix URLs" + return $urls | Select-Object -Unique +} + +function Extract-TestFailures { + param([string]$LogContent) + + $failures = @() + + # Match test failure patterns from MSBuild output + $pattern = 'error\s*:\s*.*Test\s+(\S+)\s+has failed' + $failureMatches = [regex]::Matches($LogContent, $pattern, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) + + foreach ($match in $failureMatches) { + $failures += @{ + TestName = $match.Groups[1].Value + FullMatch = $match.Value + } + } + + Write-Verbose "Found $($failures.Count) test failures" + return $failures +} + +function Extract-BuildErrors { + param( + [string]$LogContent, + [int]$Context = 5 + ) + + $errors = @() + $lines = $LogContent -split "`n" + + # Patterns for common build errors - ordered from most specific to least specific + $errorPatterns = @( + 'error\s+CS\d+:.*', # C# compiler errors + 'error\s+MSB\d+:.*', # MSBuild errors + 'error\s+NU\d+:.*', # NuGet errors + '\.pcm: No such file or directory', # Clang module cache + 'EXEC\s*:\s*error\s*:.*', # Exec task errors + 'fatal error:.*', # Fatal errors (clang, etc) + ':\s*error:', # Clang/GCC errors (file.cpp:123: error:) + 'undefined reference to', # Linker errors + 'cannot find -l', # Linker missing library + 'collect2: error:', # GCC linker wrapper errors + '##\[error\].*' # AzDO error annotations (last - catch-all) + ) + + $combinedPattern = ($errorPatterns -join '|') + + # Track if we only found MSBuild wrapper errors + $foundRealErrors = $false + $msbWrapperLines = @() + + for ($i = 0; $i -lt $lines.Count; $i++) { + if ($lines[$i] -match $combinedPattern) { + # Skip MSBuild wrapper "exited with code" if we find real errors + if ($lines[$i] -match 'exited with code \d+') { + $msbWrapperLines += $i + continue + } + + # Skip duplicate MSBuild errors (they often repeat) + if ($lines[$i] -match 'error MSB3073.*exited with code') { + continue + } + + $foundRealErrors = $true + + # Clean up the line (remove timestamps, etc) + $cleanLine = $lines[$i] -replace '^\d{4}-\d{2}-\d{2}T[\d:\.]+Z\s*', '' + $cleanLine = $cleanLine -replace '##\[error\]', 'ERROR: ' + + # Add context lines if requested + if ($Context -gt 0) { + $contextStart = [Math]::Max(0, $i - $Context) + $contextLines = @() + for ($j = $contextStart; $j -lt $i; $j++) { + $contextLines += " " + $lines[$j].Trim() + } + if ($contextLines.Count -gt 0) { + $errors += ($contextLines -join "`n") + } + } + + $errors += $cleanLine.Trim() + } + } + + # If we only found MSBuild wrapper errors, show context around them + if (-not $foundRealErrors -and $msbWrapperLines.Count -gt 0) { + $wrapperLine = $msbWrapperLines[0] + # Look for real errors in the 50 lines before the wrapper error + $searchStart = [Math]::Max(0, $wrapperLine - 50) + for ($i = $searchStart; $i -lt $wrapperLine; $i++) { + $line = $lines[$i] + # Look for C++/clang/gcc style errors + if ($line -match ':\s*error:' -or $line -match 'fatal error:' -or $line -match 'undefined reference') { + $cleanLine = $line -replace '^\d{4}-\d{2}-\d{2}T[\d:\.]+Z\s*', '' + $errors += $cleanLine.Trim() + } + } + } + + return $errors | Select-Object -First 20 | Select-Object -Unique +} + +function Extract-HelixLogUrls { + param([string]$LogContent) + + $urls = @() + + # Match Helix console log URLs from log content + # Pattern: https://helix.dot.net/api/2019-06-17/jobs/{jobId}/workitems/{workItemName}/console + $pattern = 'https://helix\.dot\.net/api/[^/]+/jobs/([a-f0-9-]+)/workitems/([^/\s]+)/console' + $urlMatches = [regex]::Matches($LogContent, $pattern) + + foreach ($match in $urlMatches) { + $urls += @{ + Url = $match.Value + JobId = $match.Groups[1].Value + WorkItem = $match.Groups[2].Value + } + } + + # Deduplicate by URL + $uniqueUrls = @{} + foreach ($url in $urls) { + if (-not $uniqueUrls.ContainsKey($url.Url)) { + $uniqueUrls[$url.Url] = $url + } + } + + return $uniqueUrls.Values +} + +#endregion Log Parsing Functions + +#region Known Issues Search + +function Search-MihuBotIssues { + param( + [string[]]$SearchTerms, + [string]$ExtraContext = "", + [string]$Repository = "dotnet/runtime", + [bool]$IncludeOpen = $true, + [bool]$IncludeClosed = $true, + [int]$TimeoutSec = 30 + ) + + $results = @() + + if (-not $SearchTerms -or $SearchTerms.Count -eq 0) { + return $results + } + + try { + # MihuBot MCP endpoint - call as JSON-RPC style request + $mcpUrl = "https://mihubot.xyz/mcp" + + # Build the request payload matching the MCP tool schema + $payload = @{ + jsonrpc = "2.0" + method = "tools/call" + id = [guid]::NewGuid().ToString() + params = @{ + name = "search_dotnet_repos" + arguments = @{ + repository = $Repository + searchTerms = $SearchTerms + extraSearchContext = $ExtraContext + includeOpen = $IncludeOpen + includeClosed = $IncludeClosed + includeIssues = $true + includePullRequests = $true + includeComments = $false + } + } + } | ConvertTo-Json -Depth 10 + + Write-Verbose "Calling MihuBot MCP endpoint with terms: $($SearchTerms -join ', ')" + + $response = Invoke-RestMethod -Uri $mcpUrl -Method Post -Body $payload -ContentType "application/json" -TimeoutSec $TimeoutSec + + # Parse MCP response + if ($response.result -and $response.result.content) { + foreach ($content in $response.result.content) { + if ($content.type -eq "text" -and $content.text) { + $issueData = $content.text | ConvertFrom-Json -ErrorAction SilentlyContinue + if ($issueData) { + foreach ($issue in $issueData) { + $results += @{ + Number = $issue.Number + Title = $issue.Title + Url = $issue.Url + Repository = $issue.Repository + State = $issue.State + Source = "MihuBot" + } + } + } + } + } + } + + # Deduplicate by issue number and repo + $unique = @{} + foreach ($issue in $results) { + $key = "$($issue.Repository)#$($issue.Number)" + if (-not $unique.ContainsKey($key)) { + $unique[$key] = $issue + } + } + + return $unique.Values | Select-Object -First 5 + } + catch { + Write-Verbose "MihuBot search failed: $_" + return @() + } +} + +function Search-KnownIssues { + param( + [string]$TestName, + [string]$ErrorMessage, + [string]$Repository = "dotnet/runtime" + ) + + # Search for known issues using the "Known Build Error" label + # This label is used by Build Analysis across dotnet repositories + + $knownIssues = @() + + # Check if gh CLI is available + if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { + Write-Verbose "GitHub CLI not available for searching known issues" + return $knownIssues + } + + try { + # Extract search terms from test name and error message + $searchTerms = @() + + # First priority: Look for [FAIL] test names in the error message + # Pattern: "TestName [FAIL]" - the test name comes BEFORE [FAIL] + if ($ErrorMessage -match '(\S+)\s+\[FAIL\]') { + $failedTest = $Matches[1] + # Extract just the method name (after last .) + if ($failedTest -match '\.([^.]+)$') { + $searchTerms += $Matches[1] + } + # Also add the full test name + $searchTerms += $failedTest + } + + # Second priority: Extract test class/method from stack traces + if ($ErrorMessage -match 'at\s+(\w+\.\w+)\(' -and $searchTerms.Count -eq 0) { + $searchTerms += $Matches[1] + } + + if ($TestName) { + # Try to get the test method name from the work item + if ($TestName -match '\.([^.]+)$') { + $methodName = $Matches[1] + # Only add if it looks like a test name (not just "Tests") + if ($methodName -ne "Tests" -and $methodName.Length -gt 5) { + $searchTerms += $methodName + } + } + # Also try the full test name if it's not too long and looks specific + if ($TestName.Length -lt 100 -and $TestName -notmatch '^System\.\w+\.Tests$') { + $searchTerms += $TestName + } + } + + # Third priority: Extract specific exception patterns (but not generic TimeoutException) + if ($ErrorMessage -and $searchTerms.Count -eq 0) { + # Look for specific exception types + if ($ErrorMessage -match '(System\.(?:InvalidOperation|ArgumentNull|Format)\w*Exception)') { + $searchTerms += $Matches[1] + } + } + + # Deduplicate and limit search terms + $searchTerms = $searchTerms | Select-Object -Unique | Select-Object -First 3 + + foreach ($term in $searchTerms) { + if (-not $term) { continue } + + # Sanitize the search term to avoid passing unsafe characters to gh CLI + $safeTerm = Get-SafeSearchTerm -Term $term + if (-not $safeTerm) { continue } + + Write-Verbose "Searching for known issues with term: $safeTerm" + + # Search for open issues with the "Known Build Error" label + $results = & gh issue list ` + --repo $Repository ` + --label "Known Build Error" ` + --state open ` + --search $safeTerm ` + --limit 3 ` + --json number,title,url 2>$null | ConvertFrom-Json + + if ($results) { + foreach ($issue in $results) { + # Check if the title actually contains our search term (avoid false positives) + if ($issue.title -match [regex]::Escape($safeTerm)) { + $knownIssues += @{ + Number = $issue.number + Title = $issue.title + Url = $issue.url + SearchTerm = $safeTerm + } + } + } + } + + # If we found issues, stop searching + if ($knownIssues.Count -gt 0) { + break + } + } + + # Deduplicate by issue number + $unique = @{} + foreach ($issue in $knownIssues) { + if (-not $unique.ContainsKey($issue.Number)) { + $unique[$issue.Number] = $issue + } + } + + return $unique.Values + } + catch { + Write-Verbose "Failed to search for known issues: $_" + return @() + } +} + +function Show-KnownIssues { + param( + [string]$TestName = "", + [string]$ErrorMessage = "", + [string]$Repository = $script:Repository, + [switch]$IncludeMihuBot + ) + + # Search for known issues if we have a test name or error + if ($TestName -or $ErrorMessage) { + $knownIssues = Search-KnownIssues -TestName $TestName -ErrorMessage $ErrorMessage -Repository $Repository + if ($knownIssues -and $knownIssues.Count -gt 0) { + Write-Host "`n Known Issues:" -ForegroundColor Magenta + foreach ($issue in $knownIssues) { + Write-Host " #$($issue.Number): $($issue.Title)" -ForegroundColor Magenta + Write-Host " $($issue.Url)" -ForegroundColor Gray + } + } + + # Search MihuBot for related issues/discussions + if ($IncludeMihuBot) { + $searchTerms = @() + + # Extract meaningful search terms + if ($ErrorMessage -match '(\S+)\s+\[FAIL\]') { + $failedTest = $Matches[1] + if ($failedTest -match '\.([^.]+)$') { + $searchTerms += $Matches[1] + } + } + + if ($TestName -and $TestName -match '\.([^.]+)$') { + $methodName = $Matches[1] + if ($methodName -ne "Tests" -and $methodName.Length -gt 5) { + $searchTerms += $methodName + } + } + + # Add test name as context + if ($TestName) { + $searchTerms += $TestName + } + + $searchTerms = $searchTerms | Select-Object -Unique | Select-Object -First 3 + + if ($searchTerms.Count -gt 0) { + $mihuBotResults = Search-MihuBotIssues -SearchTerms $searchTerms -Repository $Repository -ExtraContext "test failure $TestName" + if ($mihuBotResults -and $mihuBotResults.Count -gt 0) { + # Filter out issues already shown from Known Build Error search + $knownNumbers = @() + if ($knownIssues) { + $knownNumbers = $knownIssues | ForEach-Object { $_.Number } + } + $newResults = $mihuBotResults | Where-Object { $_.Number -notin $knownNumbers } + + if ($newResults -and @($newResults).Count -gt 0) { + Write-Host "`n Related Issues (MihuBot):" -ForegroundColor Blue + foreach ($issue in $newResults) { + $stateIcon = if ($issue.State -eq "open") { "[open]" } else { "[closed]" } + Write-Host " #$($issue.Number): $($issue.Title) $stateIcon" -ForegroundColor Blue + Write-Host " $($issue.Url)" -ForegroundColor Gray + } + } + } + } + } + } +} + +#endregion Known Issues Search + +#region Test Results Functions + +function Get-AzDOTestResults { + param( + [string]$RunId, + [string]$Org = "https://dev.azure.com/$Organization" + ) + + # Check if az devops CLI is available + if (-not (Get-Command az -ErrorAction SilentlyContinue)) { + Write-Verbose "Azure CLI not available for fetching test results" + return $null + } + + try { + Write-Verbose "Fetching test results for run $RunId via az devops CLI..." + $results = az devops invoke ` + --org $Org ` + --area test ` + --resource Results ` + --route-parameters project=$Project runId=$RunId ` + --api-version 7.0 ` + --query "value[?outcome=='Failed'].{name:testCaseTitle, outcome:outcome, error:errorMessage}" ` + -o json 2>$null | ConvertFrom-Json + + return $results + } + catch { + Write-Verbose "Failed to fetch test results via az devops: $_" + return $null + } +} + +function Extract-TestRunUrls { + param([string]$LogContent) + + $testRuns = @() + + # Match Azure DevOps Test Run URLs + # Pattern: Published Test Run : https://dev.azure.com/dnceng-public/public/_TestManagement/Runs?runId=35626550&_a=runCharts + $pattern = 'Published Test Run\s*:\s*(https://dev\.azure\.com/[^/]+/[^/]+/_TestManagement/Runs\?runId=(\d+)[^\s]*)' + $matches = [regex]::Matches($LogContent, $pattern) + + foreach ($match in $matches) { + $testRuns += @{ + Url = $match.Groups[1].Value + RunId = $match.Groups[2].Value + } + } + + Write-Verbose "Found $($testRuns.Count) test run URLs" + return $testRuns +} + +function Get-LocalTestFailures { + param( + [object]$Timeline, + [int]$BuildId + ) + + $localFailures = @() + + # Find failed test tasks (non-Helix) + # Look for tasks with "Test" in name that have issues but no Helix URLs + $testTasks = $Timeline.records | Where-Object { + ($_.name -match 'Test|xUnit' -or $_.type -eq 'Task') -and + $_.issues -and + $_.issues.Count -gt 0 + } + + foreach ($task in $testTasks) { + # Check if this task has test failures (XUnit errors) + $testErrors = $task.issues | Where-Object { + $_.message -match 'Tests failed:' -or + $_.message -match 'error\s*:.*Test.*failed' + } + + if ($testErrors.Count -gt 0) { + # This is a local test failure - find the parent job for URL construction + $parentJob = $Timeline.records | Where-Object { $_.id -eq $task.parentId -and $_.type -eq "Job" } | Select-Object -First 1 + + $failure = @{ + TaskName = $task.name + TaskId = $task.id + ParentJobId = if ($parentJob) { $parentJob.id } else { $task.parentId } + LogId = if ($task.log) { $task.log.id } else { $null } + Issues = $testErrors + TestRunUrls = @() + } + + # Try to get test run URLs from the publish task + $publishTask = $Timeline.records | Where-Object { + $_.parentId -eq $task.parentId -and + $_.name -match 'Publish.*Test.*Results' -and + $_.log + } | Select-Object -First 1 + + if ($publishTask -and $publishTask.log) { + $logContent = Get-BuildLog -Build $BuildId -LogId $publishTask.log.id + if ($logContent) { + $testRunUrls = Extract-TestRunUrls -LogContent $logContent + $failure.TestRunUrls = $testRunUrls + } + } + + $localFailures += $failure + } + } + + return $localFailures +} + +#endregion Test Results Functions + +#region Helix API Functions + +function Get-HelixJobDetails { + param([string]$JobId) + + $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId" + + try { + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson + return $response + } + catch { + Write-Warning "Failed to fetch Helix job ${JobId}: $_" + return $null + } +} + +function Get-HelixWorkItems { + param([string]$JobId) + + $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId/workitems" + + try { + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson + return $response + } + catch { + Write-Warning "Failed to fetch work items for job ${JobId}: $_" + return $null + } +} + +function Get-HelixWorkItemDetails { + param([string]$JobId, [string]$WorkItemName) + + $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId/workitems/$WorkItemName" + + try { + $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson + return $response + } + catch { + Write-Warning "Failed to fetch work item ${WorkItemName}: $_" + return $null + } +} + +function Get-HelixConsoleLog { + param([string]$Url) + + try { + $response = Invoke-CachedRestMethod -Uri $Url -TimeoutSec $TimeoutSec + return $response + } + catch { + Write-Warning "Failed to fetch Helix log from ${Url}: $_" + return $null + } +} + +function Find-WorkItemsWithBinlogs { + <# + .SYNOPSIS + Scans work items in a Helix job to find which ones contain binlog files. + .DESCRIPTION + Not all work items produce binlogs - only build/publish tests do. + This function helps locate work items that have binlogs for deeper analysis. + #> + param( + [Parameter(Mandatory)] + [string]$JobId, + [int]$MaxItems = 30, + [switch]$IncludeDetails + ) + + $workItems = Get-HelixWorkItems -JobId $JobId + if (-not $workItems) { + Write-Warning "No work items found for job $JobId" + return @() + } + + Write-Host "Scanning up to $MaxItems work items for binlogs..." -ForegroundColor Gray + + $results = @() + $scanned = 0 + + foreach ($wi in $workItems | Select-Object -First $MaxItems) { + $scanned++ + $details = Get-HelixWorkItemDetails -JobId $JobId -WorkItemName $wi.Name + if ($details -and $details.Files) { + $binlogs = @($details.Files | Where-Object { $_.FileName -like "*.binlog" }) + if ($binlogs.Count -gt 0) { + $result = @{ + Name = $wi.Name + BinlogCount = $binlogs.Count + Binlogs = $binlogs | ForEach-Object { $_.FileName } + ExitCode = $details.ExitCode + State = $details.State + } + if ($IncludeDetails) { + $result.BinlogUris = $binlogs | ForEach-Object { $_.Uri } + } + $results += $result + } + } + + # Progress indicator every 10 items + if ($scanned % 10 -eq 0) { + Write-Host " Scanned $scanned/$MaxItems..." -ForegroundColor DarkGray + } + } + + return $results +} + +#endregion Helix API Functions + +#region Output Formatting + +function Format-TestFailure { + param( + [string]$LogContent, + [int]$MaxLines = $MaxFailureLines, + [int]$MaxFailures = 3 + ) + + $lines = $LogContent -split "`n" + $allFailures = @() + $currentFailure = @() + $inFailure = $false + $emptyLineCount = 0 + $failureCount = 0 + + # Expanded failure detection patterns + $failureStartPatterns = @( + '\[FAIL\]', + 'Assert\.\w+\(\)\s+Failure', + 'Expected:.*but was:', + 'BUG:', + 'FAILED\s*$', + 'END EXECUTION - FAILED', + 'System\.\w+Exception:' + ) + $combinedPattern = ($failureStartPatterns -join '|') + + foreach ($line in $lines) { + # Check for new failure start + if ($line -match $combinedPattern) { + # Save previous failure if exists + if ($currentFailure.Count -gt 0) { + $allFailures += ($currentFailure -join "`n") + $failureCount++ + if ($failureCount -ge $MaxFailures) { + break + } + } + # Start new failure + $currentFailure = @($line) + $inFailure = $true + $emptyLineCount = 0 + continue + } + + if ($inFailure) { + $currentFailure += $line + + # Track consecutive empty lines to detect end of stack trace + if ($line -match '^\s*$') { + $emptyLineCount++ + } + else { + $emptyLineCount = 0 + } + + # Stop this failure after stack trace ends (2+ consecutive empty lines) or max lines reached + if ($emptyLineCount -ge 2 -or $currentFailure.Count -ge $MaxLines) { + $allFailures += ($currentFailure -join "`n") + $currentFailure = @() + $inFailure = $false + $failureCount++ + if ($failureCount -ge $MaxFailures) { + break + } + } + } + } + + # Don't forget last failure + if ($currentFailure.Count -gt 0 -and $failureCount -lt $MaxFailures) { + $allFailures += ($currentFailure -join "`n") + } + + if ($allFailures.Count -eq 0) { + return $null + } + + $result = $allFailures -join "`n`n--- Next Failure ---`n`n" + + if ($failureCount -ge $MaxFailures) { + $result += "`n`n... (more failures exist, showing first $MaxFailures)" + } + + return $result +} + +# Helper to display test results from a test run +function Show-TestRunResults { + param( + [object[]]$TestRunUrls, + [string]$Org = "https://dev.azure.com/$Organization" + ) + + if (-not $TestRunUrls -or $TestRunUrls.Count -eq 0) { return } + + Write-Host "`n Test Results:" -ForegroundColor Yellow + foreach ($testRun in $TestRunUrls) { + Write-Host " Run $($testRun.RunId): $($testRun.Url)" -ForegroundColor Gray + + $testResults = Get-AzDOTestResults -RunId $testRun.RunId -Org $Org + if ($testResults -and $testResults.Count -gt 0) { + Write-Host "`n Failed tests ($($testResults.Count)):" -ForegroundColor Red + foreach ($result in $testResults | Select-Object -First 10) { + Write-Host " - $($result.name)" -ForegroundColor White + } + if ($testResults.Count -gt 10) { + Write-Host " ... and $($testResults.Count - 10) more" -ForegroundColor Gray + } + } + } +} + +#endregion Output Formatting + +#region Main Execution + +# Main execution +try { + # Handle direct Helix job query + if ($PSCmdlet.ParameterSetName -eq 'HelixJob') { + Write-Host "`n=== Helix Job $HelixJob ===" -ForegroundColor Yellow + Write-Host "URL: https://helix.dot.net/api/jobs/$HelixJob" -ForegroundColor Gray + + # Get job details + $jobDetails = Get-HelixJobDetails -JobId $HelixJob + if ($jobDetails) { + Write-Host "`nQueue: $($jobDetails.QueueId)" -ForegroundColor Cyan + Write-Host "Source: $($jobDetails.Source)" -ForegroundColor Cyan + } + + if ($WorkItem) { + # Query specific work item + Write-Host "`n--- Work Item: $WorkItem ---" -ForegroundColor Cyan + + $workItemDetails = Get-HelixWorkItemDetails -JobId $HelixJob -WorkItemName $WorkItem + if ($workItemDetails) { + Write-Host " State: $($workItemDetails.State)" -ForegroundColor $(if ($workItemDetails.State -eq 'Passed') { 'Green' } else { 'Red' }) + Write-Host " Exit Code: $($workItemDetails.ExitCode)" -ForegroundColor White + Write-Host " Machine: $($workItemDetails.MachineName)" -ForegroundColor Gray + Write-Host " Duration: $($workItemDetails.Duration)" -ForegroundColor Gray + + # Show artifacts with binlogs highlighted + if ($workItemDetails.Files -and $workItemDetails.Files.Count -gt 0) { + Write-Host "`n Artifacts:" -ForegroundColor Yellow + $binlogs = $workItemDetails.Files | Where-Object { $_.FileName -like "*.binlog" } + $otherFiles = $workItemDetails.Files | Where-Object { $_.FileName -notlike "*.binlog" } + + # Show binlogs first with special formatting + foreach ($file in $binlogs | Select-Object -Unique FileName, Uri) { + Write-Host " 📋 $($file.FileName): $($file.Uri)" -ForegroundColor Cyan + } + if ($binlogs.Count -gt 0) { + Write-Host " (Tip: Use MSBuild MCP server or https://live.msbuildlog.com/ to analyze binlogs)" -ForegroundColor DarkGray + } + + # Show other files + foreach ($file in $otherFiles | Select-Object -Unique FileName, Uri | Select-Object -First 10) { + Write-Host " $($file.FileName): $($file.Uri)" -ForegroundColor Gray + } + } + + # Fetch console log + $consoleUrl = "https://helix.dot.net/api/2019-06-17/jobs/$HelixJob/workitems/$WorkItem/console" + Write-Host "`n Console Log: $consoleUrl" -ForegroundColor Yellow + + $consoleLog = Get-HelixConsoleLog -Url $consoleUrl + if ($consoleLog) { + $failureInfo = Format-TestFailure -LogContent $consoleLog + if ($failureInfo) { + Write-Host $failureInfo -ForegroundColor White + + # Search for known issues + Show-KnownIssues -TestName $WorkItem -ErrorMessage $failureInfo -IncludeMihuBot:$SearchMihuBot + } + else { + # Show last 50 lines if no failure pattern detected + $lines = $consoleLog -split "`n" + $lastLines = $lines | Select-Object -Last 50 + Write-Host ($lastLines -join "`n") -ForegroundColor White + } + } + } + } + else { + # List all work items in the job + Write-Host "`nWork Items:" -ForegroundColor Yellow + $workItems = Get-HelixWorkItems -JobId $HelixJob + if ($workItems) { + Write-Host " Total: $($workItems.Count)" -ForegroundColor Cyan + Write-Host " Checking for failures..." -ForegroundColor Gray + + # Need to fetch details for each to find failures (list API only shows 'Finished') + $failedItems = @() + foreach ($wi in $workItems | Select-Object -First 20) { + $details = Get-HelixWorkItemDetails -JobId $HelixJob -WorkItemName $wi.Name + if ($details -and $null -ne $details.ExitCode -and $details.ExitCode -ne 0) { + $failedItems += @{ + Name = $wi.Name + ExitCode = $details.ExitCode + State = $details.State + } + } + } + + if ($failedItems.Count -gt 0) { + Write-Host "`n Failed Work Items:" -ForegroundColor Red + foreach ($wi in $failedItems | Select-Object -First $MaxJobs) { + Write-Host " - $($wi.Name) (Exit: $($wi.ExitCode))" -ForegroundColor White + } + Write-Host "`n Use -WorkItem '' to see details" -ForegroundColor Gray + } + else { + Write-Host " No failures found in first 20 work items" -ForegroundColor Green + } + + Write-Host "`n All work items:" -ForegroundColor Yellow + foreach ($wi in $workItems | Select-Object -First 10) { + Write-Host " - $($wi.Name)" -ForegroundColor White + } + if ($workItems.Count -gt 10) { + Write-Host " ... and $($workItems.Count - 10) more" -ForegroundColor Gray + } + + # Find work items with binlogs if requested + if ($FindBinlogs) { + Write-Host "`n === Binlog Search ===" -ForegroundColor Yellow + $binlogResults = Find-WorkItemsWithBinlogs -JobId $HelixJob -MaxItems 30 -IncludeDetails + + if ($binlogResults.Count -gt 0) { + Write-Host "`n Work items with binlogs:" -ForegroundColor Cyan + foreach ($result in $binlogResults) { + $stateColor = if ($result.ExitCode -eq 0) { 'Green' } else { 'Red' } + Write-Host " $($result.Name)" -ForegroundColor $stateColor + Write-Host " Binlogs ($($result.BinlogCount)):" -ForegroundColor Gray + foreach ($binlog in $result.Binlogs | Select-Object -First 5) { + Write-Host " - $binlog" -ForegroundColor White + } + if ($result.Binlogs.Count -gt 5) { + Write-Host " ... and $($result.Binlogs.Count - 5) more" -ForegroundColor DarkGray + } + } + Write-Host "`n Tip: Use -WorkItem '' to get full binlog URIs" -ForegroundColor DarkGray + } + else { + Write-Host " No binlogs found in scanned work items." -ForegroundColor Yellow + Write-Host " This job may contain only unit tests (which don't produce binlogs)." -ForegroundColor Gray + } + } + } + } + + exit 0 + } + + # Get build ID(s) if using PR number + $buildIds = @() + $knownIssuesFromBuildAnalysis = @() + $prChangedFiles = @() + if ($PSCmdlet.ParameterSetName -eq 'PRNumber') { + $buildIds = @(Get-AzDOBuildIdFromPR -PR $PRNumber) + + # Check Build Analysis for known issues + $knownIssuesFromBuildAnalysis = @(Get-BuildAnalysisKnownIssues -PR $PRNumber) + + # Get changed files for correlation + $prChangedFiles = @(Get-PRChangedFiles -PR $PRNumber) + if ($prChangedFiles.Count -gt 0) { + Write-Verbose "PR has $($prChangedFiles.Count) changed files" + } + } + else { + $buildIds = @($BuildId) + } + + # Process each build + $totalFailedJobs = 0 + $totalLocalFailures = 0 + $allFailuresForCorrelation = @() + + foreach ($currentBuildId in $buildIds) { + Write-Host "`n=== Azure DevOps Build $currentBuildId ===" -ForegroundColor Yellow + Write-Host "URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray + + # Get and display build status + $buildStatus = Get-AzDOBuildStatus -Build $currentBuildId + if ($buildStatus) { + $statusColor = switch ($buildStatus.Status) { + "inProgress" { "Cyan" } + "completed" { if ($buildStatus.Result -eq "succeeded") { "Green" } else { "Red" } } + default { "Gray" } + } + $statusText = $buildStatus.Status + if ($buildStatus.Status -eq "completed" -and $buildStatus.Result) { + $statusText = "$($buildStatus.Status) ($($buildStatus.Result))" + } + elseif ($buildStatus.Status -eq "inProgress") { + $statusText = "IN PROGRESS - showing failures so far" + } + Write-Host "Status: $statusText" -ForegroundColor $statusColor + } + + # Get timeline + $isInProgress = $buildStatus -and $buildStatus.Status -eq "inProgress" + $timeline = Get-AzDOTimeline -Build $currentBuildId -BuildInProgress:$isInProgress + + # Handle timeline fetch failure + if (-not $timeline) { + Write-Host "`nCould not fetch build timeline" -ForegroundColor Red + Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray + continue + } + + # Get failed jobs + $failedJobs = Get-FailedJobs -Timeline $timeline + + # Get canceled jobs (different from failed - typically due to dependency failures) + $canceledJobs = Get-CanceledJobs -Timeline $timeline + + # Also check for local test failures (non-Helix) + $localTestFailures = Get-LocalTestFailures -Timeline $timeline -BuildId $currentBuildId + + if ((-not $failedJobs -or $failedJobs.Count -eq 0) -and $localTestFailures.Count -eq 0) { + if ($buildStatus -and $buildStatus.Status -eq "inProgress") { + Write-Host "`nNo failures yet - build still in progress" -ForegroundColor Cyan + Write-Host "Run again later to check for failures, or use -NoCache to get fresh data" -ForegroundColor Gray + } + else { + Write-Host "`nNo failed jobs found in build $currentBuildId" -ForegroundColor Green + } + # Still show canceled jobs if any + if ($canceledJobs -and $canceledJobs.Count -gt 0) { + Write-Host "`nNote: $($canceledJobs.Count) job(s) were canceled (not failed):" -ForegroundColor DarkYellow + foreach ($job in $canceledJobs | Select-Object -First 5) { + Write-Host " - $($job.name)" -ForegroundColor DarkGray + } + if ($canceledJobs.Count -gt 5) { + Write-Host " ... and $($canceledJobs.Count - 5) more" -ForegroundColor DarkGray + } + Write-Host " (Canceled jobs are typically due to earlier stage failures or timeouts)" -ForegroundColor DarkGray + } + continue + } + + # Report local test failures first (these may exist even without failed jobs) + if ($localTestFailures.Count -gt 0) { + Write-Host "`n=== Local Test Failures (non-Helix) ===" -ForegroundColor Yellow + Write-Host "Build: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray + + foreach ($failure in $localTestFailures) { + Write-Host "`n--- $($failure.TaskName) ---" -ForegroundColor Cyan + + # Collect issues for correlation + $issueMessages = $failure.Issues | ForEach-Object { $_.message } + $allFailuresForCorrelation += @{ + TaskName = $failure.TaskName + JobName = "Local Test" + Errors = $issueMessages + HelixLogs = @() + FailedTests = @() + } + + # Show build and log links + $jobLogUrl = "https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($failure.ParentJobId)" + if ($failure.TaskId) { + $jobLogUrl += "&t=$($failure.TaskId)" + } + Write-Host " Log: $jobLogUrl" -ForegroundColor Gray + + # Show issues + foreach ($issue in $failure.Issues) { + Write-Host " $($issue.message)" -ForegroundColor Red + } + + # Show test run URLs if available + if ($failure.TestRunUrls.Count -gt 0) { + Show-TestRunResults -TestRunUrls $failure.TestRunUrls -Org "https://dev.azure.com/$Organization" + } + + # Try to get more details from the task log + if ($failure.LogId) { + $logContent = Get-BuildLog -Build $currentBuildId -LogId $failure.LogId + if ($logContent) { + # Extract test run URLs from this log too + $additionalRuns = Extract-TestRunUrls -LogContent $logContent + if ($additionalRuns.Count -gt 0 -and $failure.TestRunUrls.Count -eq 0) { + Show-TestRunResults -TestRunUrls $additionalRuns -Org "https://dev.azure.com/$Organization" + } + + # Search for known issues based on build errors and task name + $buildErrors = Extract-BuildErrors -LogContent $logContent + if ($buildErrors.Count -gt 0) { + Show-KnownIssues -ErrorMessage ($buildErrors -join "`n") -IncludeMihuBot:$SearchMihuBot + } + elseif ($failure.TaskName) { + # If no specific errors, try searching by task name + Show-KnownIssues -TestName $failure.TaskName -IncludeMihuBot:$SearchMihuBot + } + } + } + } + } + + if (-not $failedJobs -or $failedJobs.Count -eq 0) { + Write-Host "`n=== Summary ===" -ForegroundColor Yellow + Write-Host "Local test failures: $($localTestFailures.Count)" -ForegroundColor Red + Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Cyan + $totalLocalFailures += $localTestFailures.Count + continue + } + + Write-Host "`nFound $($failedJobs.Count) failed job(s):" -ForegroundColor Red + + # Show canceled jobs if any (these are different from failed) + if ($canceledJobs -and $canceledJobs.Count -gt 0) { + Write-Host "Also $($canceledJobs.Count) job(s) were canceled (due to earlier failures/timeouts):" -ForegroundColor DarkYellow + foreach ($job in $canceledJobs | Select-Object -First 3) { + Write-Host " - $($job.name)" -ForegroundColor DarkGray + } + if ($canceledJobs.Count -gt 3) { + Write-Host " ... and $($canceledJobs.Count - 3) more" -ForegroundColor DarkGray + } + } + + $processedJobs = 0 + $errorCount = 0 + foreach ($job in $failedJobs) { + if ($processedJobs -ge $MaxJobs) { + Write-Host "`n... and $($failedJobs.Count - $MaxJobs) more failed jobs (use -MaxJobs to see more)" -ForegroundColor Yellow + break + } + + try { + Write-Host "`n--- $($job.name) ---" -ForegroundColor Cyan + Write-Host " Build: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($job.id)" -ForegroundColor Gray + + # Get Helix tasks for this job + $helixTasks = Get-HelixJobInfo -Timeline $timeline -JobId $job.id + + if ($helixTasks) { + foreach ($task in $helixTasks) { + if ($task.log) { + Write-Host " Fetching Helix task log..." -ForegroundColor Gray + $logContent = Get-BuildLog -Build $currentBuildId -LogId $task.log.id + + if ($logContent) { + # Extract test failures + $failures = Extract-TestFailures -LogContent $logContent + + if ($failures.Count -gt 0) { + Write-Host " Failed tests:" -ForegroundColor Red + foreach ($failure in $failures) { + Write-Host " - $($failure.TestName)" -ForegroundColor White + } + + # Collect for PR correlation + $allFailuresForCorrelation += @{ + TaskName = $task.name + JobName = $job.name + Errors = @() + HelixLogs = @() + FailedTests = $failures | ForEach-Object { $_.TestName } + } + } + + # Extract and optionally fetch Helix URLs + $helixUrls = Extract-HelixUrls -LogContent $logContent + + if ($helixUrls.Count -gt 0 -and $ShowLogs) { + Write-Host "`n Helix Console Logs:" -ForegroundColor Yellow + + foreach ($url in $helixUrls | Select-Object -First 3) { + Write-Host "`n $url" -ForegroundColor Gray + + # Extract work item name from URL for known issue search + $workItemName = "" + if ($url -match '/workitems/([^/]+)/console') { + $workItemName = $Matches[1] + } + + $helixLog = Get-HelixConsoleLog -Url $url + if ($helixLog) { + $failureInfo = Format-TestFailure -LogContent $helixLog + if ($failureInfo) { + Write-Host $failureInfo -ForegroundColor White + + # Search for known issues + Show-KnownIssues -TestName $workItemName -ErrorMessage $failureInfo -IncludeMihuBot:$SearchMihuBot + } + } + } + } + elseif ($helixUrls.Count -gt 0) { + Write-Host "`n Helix logs available (use -ShowLogs to fetch):" -ForegroundColor Yellow + foreach ($url in $helixUrls | Select-Object -First 3) { + Write-Host " $url" -ForegroundColor Gray + } + } + } + } + } + } + else { + # No Helix tasks - this is a build failure, extract actual errors + $buildTasks = $timeline.records | Where-Object { + $_.parentId -eq $job.id -and $_.result -eq "failed" + } + + foreach ($task in $buildTasks | Select-Object -First 3) { + Write-Host " Failed task: $($task.name)" -ForegroundColor Red + + # Fetch and parse the build log for actual errors + if ($task.log) { + $logUrl = "https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($job.id)&t=$($task.id)" + Write-Host " Log: $logUrl" -ForegroundColor Gray + $logContent = Get-BuildLog -Build $currentBuildId -LogId $task.log.id + + if ($logContent) { + $buildErrors = Extract-BuildErrors -LogContent $logContent + + if ($buildErrors.Count -gt 0) { + # Collect for PR correlation + $allFailuresForCorrelation += @{ + TaskName = $task.name + JobName = $job.name + Errors = $buildErrors + HelixLogs = @() + FailedTests = @() + } + + # Extract Helix log URLs from the full log content + $helixLogUrls = Extract-HelixLogUrls -LogContent $logContent + + if ($helixLogUrls.Count -gt 0) { + Write-Host " Helix failures ($($helixLogUrls.Count)):" -ForegroundColor Red + foreach ($helixLog in $helixLogUrls | Select-Object -First 5) { + Write-Host " - $($helixLog.WorkItem)" -ForegroundColor White + Write-Host " Log: $($helixLog.Url)" -ForegroundColor Gray + } + if ($helixLogUrls.Count -gt 5) { + Write-Host " ... and $($helixLogUrls.Count - 5) more" -ForegroundColor Gray + } + } + else { + Write-Host " Build errors:" -ForegroundColor Red + foreach ($err in $buildErrors | Select-Object -First 5) { + Write-Host " $err" -ForegroundColor White + } + if ($buildErrors.Count -gt 5) { + Write-Host " ... and $($buildErrors.Count - 5) more errors" -ForegroundColor Gray + } + } + + # Search for known issues + Show-KnownIssues -ErrorMessage ($buildErrors -join "`n") -IncludeMihuBot:$SearchMihuBot + } + else { + Write-Host " (No specific errors extracted from log)" -ForegroundColor Gray + } + } + } + } + } + + $processedJobs++ + } + catch { + $errorCount++ + if ($ContinueOnError) { + Write-Warning " Error processing job '$($job.name)': $_" + } + else { + throw [System.Exception]::new("Error processing job '$($job.name)': $($_.Exception.Message)", $_.Exception) + } + } + } + + $totalFailedJobs += $failedJobs.Count + $totalLocalFailures += $localTestFailures.Count + + Write-Host "`n=== Build $currentBuildId Summary ===" -ForegroundColor Yellow + Write-Host "Failed jobs: $($failedJobs.Count)" -ForegroundColor Red + if ($localTestFailures.Count -gt 0) { + Write-Host "Local test failures: $($localTestFailures.Count)" -ForegroundColor Red + } + if ($errorCount -gt 0) { + Write-Host "API errors (partial results): $errorCount" -ForegroundColor Yellow + } + Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Cyan +} + +# Show PR change correlation if we have changed files +if ($prChangedFiles.Count -gt 0 -and $allFailuresForCorrelation.Count -gt 0) { + Show-PRCorrelationSummary -ChangedFiles $prChangedFiles -AllFailures $allFailuresForCorrelation +} + +# Overall summary if multiple builds +if ($buildIds.Count -gt 1) { + Write-Host "`n=== Overall Summary ===" -ForegroundColor Magenta + Write-Host "Analyzed $($buildIds.Count) builds" -ForegroundColor White + Write-Host "Total failed jobs: $totalFailedJobs" -ForegroundColor Red + Write-Host "Total local test failures: $totalLocalFailures" -ForegroundColor Red + + if ($knownIssuesFromBuildAnalysis.Count -gt 0) { + Write-Host "`nKnown Issues (from Build Analysis):" -ForegroundColor Yellow + foreach ($issue in $knownIssuesFromBuildAnalysis) { + Write-Host " - #$($issue.Number): $($issue.Title)" -ForegroundColor Gray + Write-Host " $($issue.Url)" -ForegroundColor DarkGray + } + } +} + +# Smart retry recommendation +Write-Host "`n=== Recommendation ===" -ForegroundColor Magenta + +if ($knownIssuesFromBuildAnalysis.Count -gt 0) { + $knownIssueCount = $knownIssuesFromBuildAnalysis.Count + Write-Host "KNOWN ISSUES DETECTED" -ForegroundColor Yellow + Write-Host "$knownIssueCount tracked issue(s) found that may correlate with failures above." -ForegroundColor White + Write-Host "Review the failure details and linked issues to determine if retry is needed." -ForegroundColor Gray +} +elseif ($totalFailedJobs -eq 0 -and $totalLocalFailures -eq 0) { + Write-Host "BUILD SUCCESSFUL" -ForegroundColor Green + Write-Host "No failures detected." -ForegroundColor White +} +elseif ($prChangedFiles.Count -gt 0 -and $allFailuresForCorrelation.Count -gt 0) { + # Check if failures correlate with PR changes + $hasCorrelation = $false + foreach ($failure in $allFailuresForCorrelation) { + $failureText = ($failure.Errors + $failure.HelixLogs + $failure.FailedTests) -join " " + foreach ($file in $prChangedFiles) { + $fileName = [System.IO.Path]::GetFileNameWithoutExtension($file) + if ($failureText -match [regex]::Escape($fileName)) { + $hasCorrelation = $true + break + } + } + if ($hasCorrelation) { break } + } + + if ($hasCorrelation) { + Write-Host "LIKELY PR-RELATED" -ForegroundColor Red + Write-Host "Failures appear to correlate with files changed in this PR." -ForegroundColor White + Write-Host "Review the 'PR Change Correlation' section above and fix the issues before retrying." -ForegroundColor Gray + } + else { + Write-Host "POSSIBLY TRANSIENT" -ForegroundColor Yellow + Write-Host "No known issues matched, but failures don't clearly correlate with PR changes." -ForegroundColor White + Write-Host "Consider:" -ForegroundColor Gray + Write-Host " 1. Check if same tests are failing on main branch" -ForegroundColor Gray + Write-Host " 2. Search for existing issues: gh issue list --label 'Known Build Error' --search ''" -ForegroundColor Gray + Write-Host " 3. If infrastructure-related (device not found, network errors), retry may help" -ForegroundColor Gray + } +} +else { + Write-Host "REVIEW REQUIRED" -ForegroundColor Yellow + Write-Host "Could not automatically determine failure cause." -ForegroundColor White + Write-Host "Review the failures above to determine if they are PR-related or infrastructure issues." -ForegroundColor Gray +} + +} +catch { + Write-Error "Error: $_" + exit 1 +} + +#endregion Main Execution