diff --git a/eng/MemDump.win.psm1 b/eng/MemDump.win.psm1 new file mode 100644 index 00000000000..3312f55cc3b --- /dev/null +++ b/eng/MemDump.win.psm1 @@ -0,0 +1,159 @@ +<# + Licensed to the .NET Foundation under one or more agreements. The .NET Foundation licenses this file to you under the MIT license. + See the LICENSE file in the project root for more information. +#> + +# Collection of powershell build utility functions that we use across our scripts. +# Copied from https://raw.githubusercontent.com/dotnet/roslyn/3f851026b30b335ae328ce38a7817d93b96ad4ad/eng/build-utils-win.ps1 + +Set-StrictMode -version 2.0 +$ErrorActionPreference = "Stop" +$FLAG_FILE = "memdump.lock" + +. $PSScriptRoot\common\pipeline-logging-functions.ps1 + +function Get-Now { + return (Get-Date).ToString("yyyy-MM-yy HH:mm:ss") +} +function Start-MemDumpTimer() { +<# +.SYNOPSIS + +Initiates the timer to monitor a process. + +.DESCRIPTION + +Starts process monitoring by setting a lock file in the specified directory. +If the target folder already contains a lock file, then it is assumed there is another +process that is monitoring processes, this is the end. + +The monitoring is performed either until the lock file +in the target folder is deleted, or the allotted time has run out. + +.PARAMETER WorkingDir +The full path to a repository root folder. + +.PARAMETER TargetDir +The full path to a folder that contains the lock file. + +.PARAMETER WaitSeconds +The time (in seconds) allowed for a process to run before it is considered unresponsive. +At which point a memory dump is captured, and the process is terminated + +.INPUTS + +None. + +.OUTPUTS + +None. + +#> + Param ( + [string] $WorkingDir, + [string] $TargetDir, + [int] $WaitSeconds + ) + + $flagFile = Join-Path $TargetDir $FLAG_FILE; + $logFile = $flagFile.Replace('.lock', '.log'); + "[START] $WorkingDir, flag: $flagFile" >> $logFile; + Push-Location $WorkingDir + + $hasFlagFile = Test-Path $flagFile + if ($hasFlagFile) { + Write-PipelineTaskError -Message "MemDump timer is already running!" -Type 'warning' + "[$(Get-Now)] MemDump timer is already running!" >> $logFile; + return; + } + + '' | Out-File $flagFile + + $processes = @(); + + do { + $hasFlagFile = Test-Path $flagFile + if (!$hasFlagFile) { + Write-PipelineTaskError -Message "MemDump timer is stopped" -Type 'warning' + "[$(Get-Now)] MemDump timer is stopped" >> $logFile; + return; + } + + "[$(Get-Now)] MemDump wait for $WaitSeconds seconds" >> $logFile; + Start-Sleep -Seconds $WaitSeconds + + Get-WmiObject win32_process -Filter "name like 'dotnet.exe'" | ` + Where-Object { $_.CommandLine.Contains('System.Windows.Forms') } | ` + Select-Object ProcessId, ProcessName, CommandLine | ` + ForEach-Object { + $processes += $_; + } + + # collect all memory dump before killing processes, as killing one process may lead to cascade kill + $processes | ForEach-Object { + $processId = $_.ProcessId; + $dumpFile = Join-Path $TargetDir "dotnet.exe.$processId.dmp"; + + "[$(Get-Now)] MemDump collect for pid: $processId, file: $dumpFile" >> $logFile; + try { + .\.tools\dotnet-dump collect --process-id $processId --type Heap --output $dumpFile + "[$(Get-Now)] MemDump collected!" >> $logFile; + } + catch { + $_ >> $logFile; + } + } + + # kill all identified processes + $processes | ForEach-Object { + $processId = $_.ProcessId; + "[$(Get-Now)] MemDump kill pid: $processId" >> $logFile; + Stop-Process -Id $processId + "[$(Get-Now)] MemDump killed" >> $logFile; + } + + $hasFlagFile = Test-Path $flagFile + } + until (!$hasFlagFile) +} + +function Stop-MemDumpTimer() { +<# +.SYNOPSIS + +Stop process monitoring by removing the lock file. + +.DESCRIPTION + +Stop process monitoring by removing the lock file. + +.PARAMETER TargetDir +The full path to a folder that contains the lock file. + +.INPUTS + +None. + +.OUTPUTS + +None. + +#> + Param ( + [string] $TargetDir + ) + + $flagFile = Join-Path $TargetDir $FLAG_FILE; + $logFile = $flagFile.Replace('.lock', '.log'); + + $hasFlagFile = Test-Path $flagFile + if ($hasFlagFile) { + Remove-Item -Path $flagFile -Force + "[$(Get-Now)] Flag file removed" >> $logFile; + } + + "[$(Get-Now)] Stopped`r`n`r`n" >> $logFile; +} + +Export-ModuleMember -Function Start-MemDumpTimer +Export-ModuleMember -Function Stop-MemDumpTimer \ No newline at end of file diff --git a/eng/Screenshots.win.psm1 b/eng/Screenshots.win.psm1 index 98b85f18bb6..fdc963ef8a6 100644 --- a/eng/Screenshots.win.psm1 +++ b/eng/Screenshots.win.psm1 @@ -15,6 +15,10 @@ Add-Type -AssemblyName 'System.Windows.Forms' . $PSScriptRoot\common\pipeline-logging-functions.ps1 +function Get-Now { + return (Get-Date).ToString("yyyy-MM-yy HH:mm:ss") +} + function Save-Screenshot() { Param ( [string] $TargetFileName, @@ -93,7 +97,8 @@ None. #> Param ( - [string] $TargetDir + [string] $TargetDir, + [int] $WaitSeconds ) $flagFile = Join-Path $TargetDir $FLAG_FILE; @@ -103,7 +108,7 @@ None. $hasFlagFile = Test-Path $flagFile if ($hasFlagFile) { Write-PipelineTaskError -Message "Screenshots are already being taken!" -Type 'warning' - "[START] Screenshots are already being taken!" >> $logFile; + "[$(Get-Now)] Screenshots are already being taken!" >> $logFile; return; } @@ -114,13 +119,14 @@ None. $hasFlagFile = Test-Path $flagFile if (!$hasFlagFile) { Write-PipelineTaskError -Message "Screenshots no longer being taken" -Type 'warning' - "[START] Screenshots no longer being taken" >> $logFile; + "[$(Get-Now)] Screenshots no longer being taken" >> $logFile; return; } + Start-Sleep -Seconds $WaitSeconds + Capture-Screenshot -TargetDir $TargetDir -LogFile $logFile; - "[START] Screenshot taken" >> $logFile; - Start-Sleep -Seconds 180 + "[$(Get-Now)] Screenshot taken" >> $logFile; $hasFlagFile = Test-Path $flagFile } @@ -156,16 +162,14 @@ None. $flagFile = Join-Path $TargetDir $FLAG_FILE; $logFile = $flagFile.Replace('.lock', '.log'); - "[STOP] Flag: $flagFile" >> $logFile; $hasFlagFile = Test-Path $flagFile - if ($hasFlagFile) { Remove-Item -Path $flagFile -Force - "[STOP] Flag file removed" >> $logFile; + "[$(Get-Now)] Flag file removed" >> $logFile; } - "[STOP] Stopped" >> $logFile; + "[$(Get-Now)] Stopped" >> $logFile; } Export-ModuleMember -Function Start-CaptureScreenshots diff --git a/eng/ci.yml b/eng/ci.yml index 4c1ac06682f..e4f2acd0de3 100644 --- a/eng/ci.yml +++ b/eng/ci.yml @@ -92,7 +92,7 @@ stages: -configuration $(_BuildConfig) $(_OfficialBuildIdArgs) /p:Coverage=$(_Coverage) - /bl:$(BUILD.SOURCESDIRECTORY)\artifacts\log\$(_BuildConfig)\BuildSrc.binlog + /bl:$(BUILD.SOURCESDIRECTORY)\artifacts\log\$(_BuildConfig)\BuildSrc.binlog displayName: Build # Run Unit Tests diff --git a/eng/custom-cibuild.ps1 b/eng/custom-cibuild.ps1 index 115bd420362..e7ef838015d 100644 --- a/eng/custom-cibuild.ps1 +++ b/eng/custom-cibuild.ps1 @@ -1,8 +1,12 @@ [CmdletBinding(PositionalBinding=$false)] Param( + [string][Alias('c')]$configuration = "Debug", [Parameter(ValueFromRemainingArguments=$true)][String[]]$properties ) +# How long to wait before we consider a build/test run to be unresponsive +$WaitSeconds = 900 # 15 min + . $PSScriptRoot\common\tools.ps1 # CI mode - set to true on CI server for PR validation build or official build. @@ -13,39 +17,72 @@ Param( [bool]$integrationTest = $properties.Contains('-integrationTest') [bool]$performanceTest = $properties.Contains('-performanceTest') -$moduleLocation = Resolve-Path "$PSScriptRoot\Screenshots.win.psm1" -$initScreenshotsModule = [scriptblock]::Create("Import-Module $moduleLocation") +$screenshotsModuleLocation = Resolve-Path "$PSScriptRoot\Screenshots.win.psm1" +$initScreenshotsModule = [scriptblock]::Create("Import-Module $screenshotsModuleLocation") +$memDumpModuleLocation = Resolve-Path "$PSScriptRoot\MemDump.win.psm1" +$initMemDumpModule = [scriptblock]::Create("Import-Module $memDumpModuleLocation") -$TakeScreenshots = $ci -and ($test -or $integrationTest -or $performanceTest); +$CollectDebugInfo = $ci -and ($test -or $integrationTest -or $performanceTest); $ImageLogs = ''; -if ($TakeScreenshots) { +if ($CollectDebugInfo) { + + dotnet tool install dotnet-dump --tool-path .\.tools + + # Collect screenshots + # ----------------------------------------------------------- $ImageLogs = Join-Path $LogDir 'screenshots' Create-Directory $ImageLogs [ScriptBlock] $ScreenshotCaptureScript = { - param($ImageLogs) - Start-CaptureScreenshots "$ImageLogs" + param($ImageLogs, $WaitSeconds) + Start-CaptureScreenshots -TargetDir $ImageLogs -WaitSeconds $WaitSeconds }; - $job = Start-Job -InitializationScript $initScreenshotsModule ` -ScriptBlock $ScreenshotCaptureScript ` - -ArgumentList $ImageLogs + -ArgumentList @( $ImageLogs, $WaitSeconds ); + + # Collect memory dump + # ----------------------------------------------------------- + [ScriptBlock] $MemDumpScript = { + param($WorkingDir, $LogDir, $WaitSeconds) + Start-MemDumpTimer -WorkingDir $WorkingDir -TargetDir $LogDir -WaitSeconds $WaitSeconds + }; + $job = Start-Job -InitializationScript $initMemDumpModule ` + -ScriptBlock $MemDumpScript ` + -ArgumentList @( $(Get-Location), $LogDir, $WaitSeconds ) + } try { + # Run the build script that does the actual work - powershell.exe -File $PSScriptRoot\common\Build.ps1 @properties + # ----------------------------------------------------------- + powershell.exe -File $PSScriptRoot\common\Build.ps1 -c $configuration @properties $exitCode = $LASTEXITCODE Exit $exitCode; + } finally { - if ($TakeScreenshots) { + if ($CollectDebugInfo) { + + # Stop collecting screenshots + # ----------------------------------------------------------- [ScriptBlock] $ScreenshotCaptureScript = { param($ImageLogs) Stop-CaptureScreenshots -TargetDir $ImageLogs }; Start-Job -InitializationScript $initScreenshotsModule ` -ScriptBlock $ScreenshotCaptureScript ` - -ArgumentList $ImageLogs | Receive-Job -AutoRemoveJob -Wait - } + -ArgumentList $ImageLogs | Receive-Job -AutoRemoveJob -Wait; + + # Stop collect memory dumps + # ----------------------------------------------------------- + [ScriptBlock] $MemDumpScript = { + param($LogDir) + Stop-MemDumpTimer -TargetDir $LogDir + }; + Start-Job -InitializationScript $initMemDumpModule ` + -ScriptBlock $MemDumpScript ` + -ArgumentList $LogDir | Receive-Job -AutoRemoveJob -Wait + } }