From 36265895eec229b378163560ff96fd26f8dd25c4 Mon Sep 17 00:00:00 2001 From: George Pollard Date: Sat, 29 Oct 2022 08:45:27 +1300 Subject: [PATCH] Reduce fetches to VMSS (#2577) * Remove one GET from ListInstanceIds * Only invoke ListInstanceIds if needed * Format code * Downgrade Exception log to Verbose * Insert InstanceID during CleanupNodes Co-authored-by: Cheick Keita --- .../TestHooks/NodeOperationsTestHooks.cs | 2 +- .../ApiService/onefuzzlib/NodeOperations.cs | 10 +- .../onefuzzlib/ScalesetOperations.cs | 43 ++++---- .../ApiService/onefuzzlib/VmssOperations.cs | 100 +++++++++--------- .../Fakes/TestVmssOperations.cs | 18 ++-- 5 files changed, 89 insertions(+), 84 deletions(-) diff --git a/src/ApiService/ApiService/TestHooks/NodeOperationsTestHooks.cs b/src/ApiService/ApiService/TestHooks/NodeOperationsTestHooks.cs index f3607e4cfc..90f0875d69 100644 --- a/src/ApiService/ApiService/TestHooks/NodeOperationsTestHooks.cs +++ b/src/ApiService/ApiService/TestHooks/NodeOperationsTestHooks.cs @@ -222,7 +222,7 @@ public async Task CreateNode([HttpTrigger(AuthorizationLevel.A bool isNew = UriExtension.GetBool("isNew", query, false); - var node = await _nodeOps.Create(poolId, poolName, machineId, scaleSetId, version, isNew); + var node = await _nodeOps.Create(poolId, poolName, machineId, null, scaleSetId, version, isNew); var resp = req.CreateResponse(HttpStatusCode.OK); await resp.WriteAsJsonAsync(JsonSerializer.Serialize(node, EntityConverter.GetJsonSerializerOptions())); diff --git a/src/ApiService/ApiService/onefuzzlib/NodeOperations.cs b/src/ApiService/ApiService/onefuzzlib/NodeOperations.cs index faf259ebe8..09439d46b1 100644 --- a/src/ApiService/ApiService/onefuzzlib/NodeOperations.cs +++ b/src/ApiService/ApiService/onefuzzlib/NodeOperations.cs @@ -37,6 +37,7 @@ IAsyncEnumerable SearchStates(Guid? poolId = default, Guid poolId, PoolName poolName, Guid machineId, + string? instanceId, Guid? scaleSetId, string version, bool isNew = false); @@ -379,11 +380,18 @@ public IAsyncEnumerable GetDeadNodes(Guid scaleSetId, TimeSpan expirationP Guid poolId, PoolName poolName, Guid machineId, + string? instanceId, Guid? scaleSetId, string version, bool isNew = false) { - var node = new Node(poolName, machineId, poolId, version, ScalesetId: scaleSetId); + var node = new Node( + poolName, + machineId, + poolId, + version, + InstanceId: instanceId, + ScalesetId: scaleSetId); ResultVoid<(HttpStatusCode Status, string Reason)> r; if (isNew) { diff --git a/src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs b/src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs index c65592ab26..7858a55d99 100644 --- a/src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs +++ b/src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs @@ -277,7 +277,7 @@ public async Async.Task Setup(Scaleset scaleset) { return await SetFailed(scaleset, result.ErrorV); } - //TODO : why are we saving scaleset here ? + //TODO : why are we saving scaleset here ? var r = await Update(scaleset); if (!r.IsOk) { _logTracer.Error($"Failed to save scaleset {scaleset.ScalesetId:Tag:ScalesetId} due to {r.ErrorV:Tag:Error}"); @@ -514,6 +514,11 @@ public async Async.Task Halt(Scaleset scaleset) { //ground truth of existing nodes var azureNodes = await _context.VmssOperations.ListInstanceIds(scaleSet.ScalesetId); + if (azureNodes is null) { + // didn't find scaleset + return (false, scaleSet); + } + var nodes = _context.NodeOperations.SearchStates(scalesetId: scaleSet.ScalesetId); //# Nodes do not exists in scalesets but in table due to unknown failure @@ -536,6 +541,7 @@ public async Async.Task Halt(Scaleset scaleset) { foreach (var azureNode in azureNodes) { var machineId = azureNode.Key; + var instanceId = azureNode.Value; if (nodeMachineIds.Contains(machineId)) { continue; } @@ -550,6 +556,7 @@ public async Async.Task Halt(Scaleset scaleset) { pool.OkV.PoolId, scaleSet.PoolName, machineId, + instanceId, scaleSet.ScalesetId, _context.ServiceConfiguration.OneFuzzVersion, isNew: true); @@ -615,7 +622,6 @@ where x.State.ReadyForReset() public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable nodes, NodeDisposalStrategy disposalStrategy) { - if (nodes is null || !nodes.Any()) { _log.Info($"no nodes to reimage: {scaleset.ScalesetId:Tag:ScalesetId}"); return; @@ -632,20 +638,20 @@ public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable nodes, return; } - var machineIds = new HashSet(); + var nodesToReimage = new List(); foreach (var node in nodes) { if (node.State != NodeState.Done) { continue; } if (node.DebugKeepNode) { - _log.Warning($"not reimaging manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}"); + _log.Warning($"not reimaging manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}"); } else { - _ = machineIds.Add(node.MachineId); + nodesToReimage.Add(node); } } - if (!machineIds.Any()) { + if (!nodesToReimage.Any()) { _log.Info($"no nodes to reimage {scaleset.ScalesetId:Tag:ScalesetId}"); return; } @@ -653,18 +659,16 @@ public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable nodes, switch (disposalStrategy) { case NodeDisposalStrategy.Decommission: _log.Info($"decommissioning nodes"); - await Async.Task.WhenAll(nodes - .Where(node => machineIds.Contains(node.MachineId)) + await Async.Task.WhenAll(nodesToReimage .Select(async node => { await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult(); })); return; case NodeDisposalStrategy.ScaleIn: - var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, machineIds); + var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, nodesToReimage); if (r.IsOk) { - await Async.Task.WhenAll(nodes - .Where(node => machineIds.Contains(node.MachineId)) + await Async.Task.WhenAll(nodesToReimage .Select(async node => { var r = await _context.NodeOperations.ReleaseScaleInProtection(node); if (r.IsOk) { @@ -693,33 +697,30 @@ public async Async.Task DeleteNodes(Scaleset scaleset, IEnumerable nodes, return; } - HashSet machineIds = new(); + var nodesToDelete = new List(); foreach (var node in nodes) { if (node.DebugKeepNode) { - _log.Warning($"not deleting manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}"); + _log.Warning($"not deleting manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}"); } else { - _ = machineIds.Add(node.MachineId); + nodesToDelete.Add(node); } } switch (disposalStrategy) { case NodeDisposalStrategy.Decommission: _log.Info($"decommissioning nodes"); - await Async.Task.WhenAll(nodes - .Where(node => machineIds.Contains(node.MachineId)) + await Async.Task.WhenAll(nodesToDelete .Select(async node => { await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult(); })); return; case NodeDisposalStrategy.ScaleIn: - _log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", machineIds):Tag:MachineIds}"); - await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, machineIds); - await Async.Task.WhenAll(nodes - .Where(node => machineIds.Contains(node.MachineId)) + _log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", nodesToDelete.Select(n => n.MachineId)):Tag:MachineIds}"); + await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, nodesToDelete); + await Async.Task.WhenAll(nodesToDelete .Select(async node => { await _context.NodeOperations.Delete(node); - await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult(); })); return; } diff --git a/src/ApiService/ApiService/onefuzzlib/VmssOperations.cs b/src/ApiService/ApiService/onefuzzlib/VmssOperations.cs index 6d060bb720..fefb7c9b95 100644 --- a/src/ApiService/ApiService/onefuzzlib/VmssOperations.cs +++ b/src/ApiService/ApiService/onefuzzlib/VmssOperations.cs @@ -21,7 +21,7 @@ public interface IVmssOperations { Async.Task DeleteVmss(Guid name, bool? forceDeletion = null); - Async.Task> ListInstanceIds(Guid name); + Async.Task?> ListInstanceIds(Guid name); Async.Task GetVmssSize(Guid name); @@ -42,8 +42,8 @@ Async.Task CreateVmss( IDictionary tags); IAsyncEnumerable ListVmss(Guid name); - Async.Task ReimageNodes(Guid scalesetId, IReadOnlySet machineIds); - Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet machineIds); + Async.Task ReimageNodes(Guid scalesetId, IEnumerable nodes); + Async.Task DeleteNodes(Guid scalesetId, IEnumerable nodes); } public class VmssOperations : IVmssOperations { @@ -51,7 +51,6 @@ public class VmssOperations : IVmssOperations { private readonly ICreds _creds; private readonly IImageOperations _imageOps; private readonly IServiceConfig _serviceConfig; - private readonly IOnefuzzContext _context; private readonly IMemoryCache _cache; @@ -60,9 +59,7 @@ public VmssOperations(ILogTracer log, IOnefuzzContext context, IMemoryCache cach _creds = context.Creds; _imageOps = context.ImageOperations; _serviceConfig = context.ServiceConfiguration; - _context = context; _cache = cache; - } public async Async.Task DeleteVmss(Guid name, bool? forceDeletion = null) { @@ -161,37 +158,24 @@ public async Async.Task UpdateExtensions(Guid name, IList> ListInstanceIds(Guid name) { + public async Async.Task?> ListInstanceIds(Guid name) { _log.Verbose($"get instance IDs for scaleset {name:Tag:VmssName}"); - var results = new Dictionary(); - VirtualMachineScaleSetResource res; try { - var r = await GetVmssResource(name).GetAsync(); - res = r.Value; - } catch (Exception ex) when (ex is RequestFailedException) { - _log.Verbose($"vm does not exist {name:Tag:VmssName}"); - return results; - } - - if (res is null) { - _log.Verbose($"vm does not exist {name:Tag:VmssName}"); - return results; - } else { - try { - await foreach (var instance in res.GetVirtualMachineScaleSetVms()) { - if (instance is not null) { - if (Guid.TryParse(instance.Data.VmId, out var key)) { - results[key] = instance.Data.InstanceId; - } else { - _log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}"); - } + var results = new Dictionary(); + await foreach (var instance in GetVmssResource(name).GetVirtualMachineScaleSetVms()) { + if (instance is not null) { + if (Guid.TryParse(instance.Data.VmId, out var machineId)) { + results[machineId] = instance.Data.InstanceId; + } else { + _log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}"); } } - } catch (Exception ex) when (ex is RequestFailedException || ex is CloudException) { - _log.Exception(ex, $"vm does not exist {name:Tag:VmssName}"); } + return results; + } catch (RequestFailedException ex) when (ex.Status == 404) { + _log.Verbose($"scaleset does not exist {name:Tag:VmssName}"); + return null; } - return results; } private record InstanceIdKey(Guid Scaleset, Guid VmId); @@ -444,22 +428,43 @@ public Async.Task> ListAvailableSkus(Region region) return skuNames; }); - public async Async.Task ReimageNodes(Guid scalesetId, IReadOnlySet machineIds) { - var result = await CheckCanUpdate(scalesetId); - if (!result.IsOk) { - return OneFuzzResultVoid.Error(result.ErrorV); - } + private async Async.Task> ResolveInstanceIds(Guid scalesetId, IEnumerable nodes) { + + // only initialize this if we find a missing InstanceId + var machineToInstanceLazy = new Lazy>>(async () => { + var machineToInstance = await ListInstanceIds(scalesetId); + if (machineToInstance is null) { + throw new Exception($"cannot find nodes in scaleset {scalesetId}: scaleset does not exist"); + } + + return machineToInstance; + }); var instanceIds = new HashSet(); - var machineToInstance = await ListInstanceIds(scalesetId); - foreach (var machineId in machineIds) { - if (machineToInstance.TryGetValue(machineId, out var instanceId)) { - _ = instanceIds.Add(instanceId); + foreach (var node in nodes) { + if (node.InstanceId is not null) { + _ = instanceIds.Add(node.InstanceId); + continue; + } + + var lookup = await machineToInstanceLazy.Value; + if (lookup.TryGetValue(node.MachineId, out var foundId)) { + _ = instanceIds.Add(foundId); } else { - _log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:MachineId}"); + _log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {node.MachineId:Tag:VmId}"); } } + return instanceIds; + } + + public async Async.Task ReimageNodes(Guid scalesetId, IEnumerable nodes) { + var result = await CheckCanUpdate(scalesetId); + if (!result.IsOk) { + return OneFuzzResultVoid.Error(result.ErrorV); + } + + var instanceIds = await ResolveInstanceIds(scalesetId, nodes); if (!instanceIds.Any()) { return OneFuzzResultVoid.Ok; } @@ -499,22 +504,13 @@ public async Async.Task ReimageNodes(Guid scalesetId, IReadOn return OneFuzzResultVoid.Ok; } - public async Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet machineIds) { + public async Async.Task DeleteNodes(Guid scalesetId, IEnumerable nodes) { var result = await CheckCanUpdate(scalesetId); if (!result.IsOk) { throw new Exception($"cannot delete nodes from scaleset {scalesetId} : {result.ErrorV}"); } - var instanceIds = new HashSet(); - var machineToInstance = await ListInstanceIds(scalesetId); - foreach (var machineId in machineIds) { - if (machineToInstance.TryGetValue(machineId, out var instanceId)) { - _ = instanceIds.Add(instanceId); - } else { - _log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:VmId}"); - } - } - + var instanceIds = await ResolveInstanceIds(scalesetId, nodes); if (!instanceIds.Any()) { return; } diff --git a/src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs b/src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs index 943ec48155..01d93b572f 100644 --- a/src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs +++ b/src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs @@ -23,10 +23,6 @@ public Task CreateVmss(Region location, Guid name, string vmS throw new NotImplementedException(); } - public System.Threading.Tasks.Task DeleteNodes(Guid scalesetId, IReadOnlySet machineIds) { - throw new NotImplementedException(); - } - public Task DeleteVmss(Guid name, bool? forceDeletion = null) { throw new NotImplementedException(); } @@ -44,7 +40,7 @@ public Task> GetInstanceId(Guid name, Guid vmId) { } - public Task> ListInstanceIds(Guid name) { + public Task?> ListInstanceIds(Guid name) { throw new NotImplementedException(); } @@ -52,10 +48,6 @@ public IAsyncEnumerable ListVmss(Guid name) { throw new NotImplementedException(); } - public Task ReimageNodes(Guid scalesetId, IReadOnlySet machineIds) { - throw new NotImplementedException(); - } - public Task ResizeVmss(Guid name, long capacity) { throw new NotImplementedException(); } @@ -67,4 +59,12 @@ public Task UpdateExtensions(Guid name, IList UpdateScaleInProtection(Scaleset scaleset, string instanceId, bool protectFromScaleIn) { throw new NotImplementedException(); } + + public Task ReimageNodes(Guid scalesetId, IEnumerable nodes) { + throw new NotImplementedException(); + } + + public Async.Task DeleteNodes(Guid scalesetId, IEnumerable nodes) { + throw new NotImplementedException(); + } }