Skip to content

Commit

Permalink
Ensure child worker processes are monitored after specialization (#7974)
Browse files Browse the repository at this point in the history
  • Loading branch information
mathewc authored Dec 10, 2021
1 parent f1c6016 commit 6aac496
Show file tree
Hide file tree
Showing 10 changed files with 271 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public void Configure(string name, ScriptApplicationHostOptions options)
options.IsStandbyConfiguration = true;
}

options.IsFileSystemReadOnly = IsZipDeployment(out bool isScmRunFromPackage);
options.IsFileSystemReadOnly |= IsZipDeployment(out bool isScmRunFromPackage);
options.IsScmRunFromPackage = isScmRunFromPackage;
}

Expand Down
8 changes: 7 additions & 1 deletion src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.ObjectModel;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ApplicationInsights.AspNetCore;
Expand All @@ -16,6 +17,7 @@
using Microsoft.Azure.WebJobs.Logging;
using Microsoft.Azure.WebJobs.Script.Configuration;
using Microsoft.Azure.WebJobs.Script.Diagnostics;
using Microsoft.Azure.WebJobs.Script.Eventing;
using Microsoft.Azure.WebJobs.Script.Scale;
using Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.Extensions;
using Microsoft.Extensions.Configuration;
Expand Down Expand Up @@ -44,6 +46,7 @@ public class WebJobsScriptHostService : IHostedService, IScriptHostManager, ISer
private readonly SemaphoreSlim _hostStartSemaphore = new SemaphoreSlim(1, 1);
private readonly TaskCompletionSource<bool> _hostStartedSource = new TaskCompletionSource<bool>();
private readonly Task _hostStarted;
private IScriptEventManager _eventManager;

private IHost _host;
private ScriptHostState _state;
Expand All @@ -60,7 +63,7 @@ public class WebJobsScriptHostService : IHostedService, IScriptHostManager, ISer
public WebJobsScriptHostService(IOptionsMonitor<ScriptApplicationHostOptions> applicationHostOptions, IScriptHostBuilder scriptHostBuilder, ILoggerFactory loggerFactory,
IScriptWebHostEnvironment scriptWebHostEnvironment, IEnvironment environment,
HostPerformanceManager hostPerformanceManager, IOptions<HostHealthMonitorOptions> healthMonitorOptions,
IMetricsLogger metricsLogger, IApplicationLifetime applicationLifetime, IConfiguration config)
IMetricsLogger metricsLogger, IApplicationLifetime applicationLifetime, IConfiguration config, IScriptEventManager eventManager)
{
ArgumentNullException.ThrowIfNull(loggerFactory);

Expand All @@ -79,6 +82,7 @@ public WebJobsScriptHostService(IOptionsMonitor<ScriptApplicationHostOptions> ap
_healthMonitorOptions = healthMonitorOptions ?? throw new ArgumentNullException(nameof(healthMonitorOptions));
_logger = loggerFactory.CreateLogger(ScriptConstants.LogCategoryHostGeneral);
_config = config ?? throw new ArgumentNullException(nameof(config));
_eventManager = eventManager;

_hostStarted = _hostStartedSource.Task;

Expand Down Expand Up @@ -315,6 +319,8 @@ private async Task UnsynchronizedStartHostAsync(ScriptHostStartupOperation activ
State = ScriptHostState.Running;
}
}

_eventManager.Publish(new HostStartEvent());
}
catch (OperationCanceledException)
{
Expand Down
1 change: 1 addition & 0 deletions src/WebJobs.Script/Eventing/EventSources.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ public static class EventSources
public const string Worker = "Worker";
public const string WorkerProcess = "WorkerProcess";
public const string HttpWorker = "HttpWorker";
public const string Host = "Host";
}
}
13 changes: 13 additions & 0 deletions src/WebJobs.Script/Eventing/Host/HostStartEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.

namespace Microsoft.Azure.WebJobs.Script.Eventing
{
public class HostStartEvent : ScriptEvent
{
public HostStartEvent()
: base(nameof(HostStartEvent), EventSources.Host)
{
}
}
}
121 changes: 85 additions & 36 deletions src/WebJobs.Script/Workers/ProcessManagement/WorkerProcess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reactive.Linq;
using System.Threading.Tasks;
using Microsoft.Azure.WebJobs.Host.Scale;
using Microsoft.Azure.WebJobs.Logging;
Expand All @@ -24,10 +25,12 @@ internal abstract class WorkerProcess : IWorkerProcess, IDisposable
private readonly IMetricsLogger _metricsLogger;
private readonly int processExitTimeoutInMilliseconds = 1000;
private readonly IServiceProvider _serviceProvider;
private readonly IDisposable _eventSubscription;

private Process _process;
private bool _useStdErrorStreamForErrorsOnly;
private Queue<string> _processStdErrDataQueue = new Queue<string>(3);
private IHostProcessMonitor _processMonitor;
private object _syncLock = new object();

internal WorkerProcess(IScriptEventManager eventManager, IProcessRegistry processRegistry, ILogger workerProcessLogger, IWorkerConsoleLogSource consoleLogSource, IMetricsLogger metricsLogger, IServiceProvider serviceProvider, bool useStdErrStreamForErrorsOnly = false)
{
Expand All @@ -38,49 +41,52 @@ internal WorkerProcess(IScriptEventManager eventManager, IProcessRegistry proces
_metricsLogger = metricsLogger;
_useStdErrorStreamForErrorsOnly = useStdErrStreamForErrorsOnly;
_serviceProvider = serviceProvider;

// We subscribe to host start events so we can handle the restart that occurs
// on host specialization.
_eventSubscription = _eventManager.OfType<HostStartEvent>().Subscribe(OnHostStart);
}

protected bool Disposing { get; private set; }

public int Id => _process.Id;
public int Id => Process.Id;

internal Queue<string> ProcessStdErrDataQueue => _processStdErrDataQueue;

// for testing
internal Process Process { get; set; }

internal abstract Process CreateWorkerProcess();

public Task StartProcessAsync()
{
using (_metricsLogger.LatencyEvent(MetricEventNames.ProcessStart))
{
_process = CreateWorkerProcess();
Process = CreateWorkerProcess();
try
{
_process.ErrorDataReceived += (sender, e) => OnErrorDataReceived(sender, e);
_process.OutputDataReceived += (sender, e) => OnOutputDataReceived(sender, e);
_process.Exited += (sender, e) => OnProcessExited(sender, e);
_process.EnableRaisingEvents = true;
Process.ErrorDataReceived += (sender, e) => OnErrorDataReceived(sender, e);
Process.OutputDataReceived += (sender, e) => OnOutputDataReceived(sender, e);
Process.Exited += (sender, e) => OnProcessExited(sender, e);
Process.EnableRaisingEvents = true;

_workerProcessLogger?.LogDebug($"Starting worker process with FileName:{_process.StartInfo.FileName} WorkingDirectory:{_process.StartInfo.WorkingDirectory} Arguments:{_process.StartInfo.Arguments}");
_process.Start();
_workerProcessLogger?.LogDebug($"{_process.StartInfo.FileName} process with Id={_process.Id} started");
_workerProcessLogger?.LogDebug($"Starting worker process with FileName:{Process.StartInfo.FileName} WorkingDirectory:{Process.StartInfo.WorkingDirectory} Arguments:{Process.StartInfo.Arguments}");
Process.Start();
_workerProcessLogger?.LogDebug($"{Process.StartInfo.FileName} process with Id={Process.Id} started");

_process.BeginErrorReadLine();
_process.BeginOutputReadLine();
Process.BeginErrorReadLine();
Process.BeginOutputReadLine();

// Register process only after it starts
_processRegistry?.Register(_process);
_processRegistry?.Register(Process);

var processMonitor = _serviceProvider.GetScriptHostServiceOrNull<IHostProcessMonitor>();
if (processMonitor != null)
{
processMonitor.RegisterChildProcess(_process);
}
RegisterWithProcessMonitor();

return Task.CompletedTask;
}
catch (Exception ex)
{
_workerProcessLogger.LogError(ex, $"Failed to start Worker Channel. Process fileName: {_process.StartInfo.FileName}");
_workerProcessLogger.LogError(ex, $"Failed to start Worker Channel. Process fileName: {Process.StartInfo.FileName}");
return Task.FromException(ex);
}
}
Expand Down Expand Up @@ -139,20 +145,20 @@ private void OnProcessExited(object sender, EventArgs e)

try
{
if (_process.ExitCode == WorkerConstants.SuccessExitCode)
if (Process.ExitCode == WorkerConstants.SuccessExitCode)
{
_process.WaitForExit();
_process.Close();
Process.WaitForExit();
Process.Close();
}
else if (_process.ExitCode == WorkerConstants.IntentionalRestartExitCode)
else if (Process.ExitCode == WorkerConstants.IntentionalRestartExitCode)
{
HandleWorkerProcessRestart();
}
else
{
var processExitEx = new WorkerProcessExitException($"{_process.StartInfo.FileName} exited with code {_process.ExitCode}\n {exceptionMessage}");
processExitEx.ExitCode = _process.ExitCode;
processExitEx.Pid = _process.Id;
var processExitEx = new WorkerProcessExitException($"{Process.StartInfo.FileName} exited with code {Process.ExitCode}\n {exceptionMessage}");
processExitEx.ExitCode = Process.ExitCode;
processExitEx.Pid = Process.Id;
HandleWorkerProcessExitError(processExitEx);
}
}
Expand All @@ -163,11 +169,7 @@ private void OnProcessExited(object sender, EventArgs e)
}
finally
{
var processMonitor = _serviceProvider.GetScriptHostServiceOrNull<IHostProcessMonitor>();
if (processMonitor != null)
{
processMonitor.UnregisterChildProcess(_process);
}
UnregisterFromProcessMonitor();
}
}

Expand Down Expand Up @@ -206,17 +208,19 @@ public void Dispose()
// best effort process disposal
try
{
if (_process != null)
_eventSubscription?.Dispose();

if (Process != null)
{
if (!_process.HasExited)
if (!Process.HasExited)
{
_process.Kill();
if (!_process.WaitForExit(processExitTimeoutInMilliseconds))
Process.Kill();
if (!Process.WaitForExit(processExitTimeoutInMilliseconds))
{
_workerProcessLogger.LogWarning($"Worker process has not exited despite waiting for {processExitTimeoutInMilliseconds} ms");
}
}
_process.Dispose();
Process.Dispose();
}
}
catch (Exception exc)
Expand All @@ -225,5 +229,50 @@ public void Dispose()
//ignore
}
}

internal void OnHostStart(HostStartEvent evt)
{
if (!Disposing)
{
RegisterWithProcessMonitor();
}
}

/// <summary>
/// Ensures that our process is registered with <see cref="IHostProcessMonitor"/>.
/// </summary>
/// <remarks>
/// The goal is to ensure that all worker processes are registered with the monitor for the active host.
/// There are a few different cases to consider:
/// - Starting up in normal mode, we register on when the process is started.
/// - When a placeholder mode host is specialized, a new host will be started but the previously initialized
/// worker process will remain running. We need to re-register with the new host.
/// - When the worker process dies and a new instance of this class is created.
/// </remarks>
internal void RegisterWithProcessMonitor()
{
var processMonitor = _serviceProvider.GetScriptHostServiceOrNull<IHostProcessMonitor>();
lock (_syncLock)
{
if (processMonitor != null && processMonitor != _processMonitor && Process != null)
{
processMonitor.RegisterChildProcess(Process);
_processMonitor = processMonitor;
}
}
}

internal void UnregisterFromProcessMonitor()
{
lock (_syncLock)
{
if (_processMonitor != null && Process != null)
{
// if we've registered our process with the monitor, unregister
_processMonitor.UnregisterChildProcess(Process);
_processMonitor = null;
}
}
}
}
}
20 changes: 20 additions & 0 deletions test/WebJobs.Script.Tests.Shared/TestScriptEventManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.

using System;
using Microsoft.Azure.WebJobs.Script.Eventing;

namespace Microsoft.Azure.WebJobs.Script.Tests
{
public class TestScriptEventManager : IScriptEventManager
{
public void Publish(ScriptEvent scriptEvent)
{
}

public IDisposable Subscribe(IObserver<ScriptEvent> observer)
{
return null;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
<Compile Include="$(MSBuildThisFileDirectory)TestOptionsFactory.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestOptionsMonitor.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestScaleMonitor.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestScriptEventManager.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestServiceCollectionExtensions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestServiceProviderExtensions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)TestTraits.cs" />
Expand Down
Loading

0 comments on commit 6aac496

Please sign in to comment.