Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Reduce fetches to VMSS #2577

Merged
merged 7 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,11 @@ public async Async.Task<Scaleset> Halt(Scaleset scaleset) {

//ground truth of existing nodes
var azureNodes = await _context.VmssOperations.ListInstanceIds(scaleSet.ScalesetId);
if (azureNodes is null) {
// didn't find scaleset
return (false, scaleSet);
}

var nodes = _context.NodeOperations.SearchStates(scalesetId: scaleSet.ScalesetId);

//# Nodes do not exists in scalesets but in table due to unknown failure
Expand Down Expand Up @@ -615,7 +620,6 @@ where x.State.ReadyForReset()


public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable<Node> nodes, NodeDisposalStrategy disposalStrategy) {

if (nodes is null || !nodes.Any()) {
_log.Info($"no nodes to reimage: {scaleset.ScalesetId:Tag:ScalesetId}");
return;
Expand All @@ -632,39 +636,37 @@ public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable<Node> nodes,
return;
}

var machineIds = new HashSet<Guid>();
var nodesToReimage = new List<Node>();
foreach (var node in nodes) {
if (node.State != NodeState.Done) {
continue;
}

if (node.DebugKeepNode) {
_log.Warning($"not reimaging manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
_log.Warning($"not reimaging manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
} else {
_ = machineIds.Add(node.MachineId);
nodesToReimage.Add(node);
}
}

if (!machineIds.Any()) {
if (!nodesToReimage.Any()) {
_log.Info($"no nodes to reimage {scaleset.ScalesetId:Tag:ScalesetId}");
return;
}

switch (disposalStrategy) {
case NodeDisposalStrategy.Decommission:
_log.Info($"decommissioning nodes");
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToReimage
.Select(async node => {
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;

case NodeDisposalStrategy.ScaleIn:
chkeita marked this conversation as resolved.
Show resolved Hide resolved
var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, machineIds);
var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, nodesToReimage);
if (r.IsOk) {
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToReimage
.Select(async node => {
var r = await _context.NodeOperations.ReleaseScaleInProtection(node);
if (r.IsOk) {
Expand Down Expand Up @@ -693,33 +695,32 @@ public async Async.Task DeleteNodes(Scaleset scaleset, IEnumerable<Node> nodes,
return;
}

HashSet<Guid> machineIds = new();
var nodesToDelete = new List<Node>();
foreach (var node in nodes) {
if (node.DebugKeepNode) {
_log.Warning($"not deleting manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
_log.Warning($"not deleting manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
} else {
_ = machineIds.Add(node.MachineId);
nodesToDelete.Add(node);
}
}

switch (disposalStrategy) {
case NodeDisposalStrategy.Decommission:
_log.Info($"decommissioning nodes");
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToDelete
.Select(async node => {
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;

case NodeDisposalStrategy.ScaleIn:
_log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", machineIds):Tag:MachineIds}");
await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, machineIds);
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
_log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", nodesToDelete.Select(n => n.MachineId)):Tag:MachineIds}");
await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, nodesToDelete);
await Async.Task.WhenAll(nodesToDelete
.Select(async node => {
await _context.NodeOperations.Delete(node);
Porges marked this conversation as resolved.
Show resolved Hide resolved
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
// REMOVE?: don't need to release scale-in protection if we have deleted the node
chkeita marked this conversation as resolved.
Show resolved Hide resolved
// await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;
}
Expand Down
100 changes: 48 additions & 52 deletions src/ApiService/ApiService/onefuzzlib/VmssOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public interface IVmssOperations {

Async.Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null);

Async.Task<IDictionary<Guid, string>> ListInstanceIds(Guid name);
Async.Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name);

Async.Task<long?> GetVmssSize(Guid name);

Expand All @@ -42,16 +42,15 @@ Async.Task<OneFuzzResultVoid> CreateVmss(
IDictionary<string, string> tags);

IAsyncEnumerable<VirtualMachineScaleSetVmResource> ListVmss(Guid name);
Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds);
Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds);
Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes);
Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes);
}

public class VmssOperations : IVmssOperations {
private readonly ILogTracer _log;
private readonly ICreds _creds;
private readonly IImageOperations _imageOps;
private readonly IServiceConfig _serviceConfig;
private readonly IOnefuzzContext _context;
private readonly IMemoryCache _cache;


Expand All @@ -60,9 +59,7 @@ public VmssOperations(ILogTracer log, IOnefuzzContext context, IMemoryCache cach
_creds = context.Creds;
_imageOps = context.ImageOperations;
_serviceConfig = context.ServiceConfiguration;
_context = context;
_cache = cache;

}

public async Async.Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null) {
Expand Down Expand Up @@ -161,37 +158,24 @@ public async Async.Task<OneFuzzResultVoid> UpdateExtensions(Guid name, IList<Vir
}
}

public async Async.Task<IDictionary<Guid, string>> ListInstanceIds(Guid name) {
public async Async.Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name) {
chkeita marked this conversation as resolved.
Show resolved Hide resolved
_log.Verbose($"get instance IDs for scaleset {name:Tag:VmssName}");
var results = new Dictionary<Guid, string>();
VirtualMachineScaleSetResource res;
try {
var r = await GetVmssResource(name).GetAsync();
res = r.Value;
} catch (Exception ex) when (ex is RequestFailedException) {
_log.Verbose($"vm does not exist {name:Tag:VmssName}");
return results;
}

if (res is null) {
_log.Verbose($"vm does not exist {name:Tag:VmssName}");
return results;
} else {
try {
await foreach (var instance in res.GetVirtualMachineScaleSetVms()) {
if (instance is not null) {
if (Guid.TryParse(instance.Data.VmId, out var key)) {
results[key] = instance.Data.InstanceId;
} else {
_log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}");
}
var results = new Dictionary<Guid, string>();
await foreach (var instance in GetVmssResource(name).GetVirtualMachineScaleSetVms()) {
if (instance is not null) {
if (Guid.TryParse(instance.Data.VmId, out var machineId)) {
results[machineId] = instance.Data.InstanceId;
} else {
_log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}");
}
}
} catch (Exception ex) when (ex is RequestFailedException || ex is CloudException) {
_log.Exception(ex, $"vm does not exist {name:Tag:VmssName}");
}
return results;
} catch (RequestFailedException ex) when (ex.Status == 404) {
_log.Exception(ex, $"scaleset does not exist {name:Tag:VmssName}");
return null;
}
return results;
}

private record InstanceIdKey(Guid Scaleset, Guid VmId);
Expand Down Expand Up @@ -444,22 +428,43 @@ public Async.Task<IReadOnlyList<string>> ListAvailableSkus(Region region)
return skuNames;
});

public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
return OneFuzzResultVoid.Error(result.ErrorV);
}
private async Async.Task<HashSet<string>> ResolveInstanceIds(Guid scalesetId, IEnumerable<Node> nodes) {

// only initialize this if we find a missing InstanceId
var machineToInstanceLazy = new Lazy<Task<IDictionary<Guid, string>>>(async () => {
var machineToInstance = await ListInstanceIds(scalesetId);
if (machineToInstance is null) {
throw new Exception($"cannot find nodes in scaleset {scalesetId}: scaleset does not exist");
}

return machineToInstance;
});

var instanceIds = new HashSet<string>();
var machineToInstance = await ListInstanceIds(scalesetId);
foreach (var machineId in machineIds) {
if (machineToInstance.TryGetValue(machineId, out var instanceId)) {
_ = instanceIds.Add(instanceId);
foreach (var node in nodes) {
if (node.InstanceId is not null) {
_ = instanceIds.Add(node.InstanceId);
continue;
}

var lookup = await machineToInstanceLazy.Value;
Porges marked this conversation as resolved.
Show resolved Hide resolved
if (lookup.TryGetValue(node.MachineId, out var foundId)) {
_ = instanceIds.Add(foundId);
} else {
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:MachineId}");
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {node.MachineId:Tag:VmId}");
}
}

return instanceIds;
}

public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
return OneFuzzResultVoid.Error(result.ErrorV);
}

var instanceIds = await ResolveInstanceIds(scalesetId, nodes);
if (!instanceIds.Any()) {
return OneFuzzResultVoid.Ok;
}
Expand Down Expand Up @@ -499,22 +504,13 @@ public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOn
return OneFuzzResultVoid.Ok;
}

public async Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
public async Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
throw new Exception($"cannot delete nodes from scaleset {scalesetId} : {result.ErrorV}");
}

var instanceIds = new HashSet<string>();
var machineToInstance = await ListInstanceIds(scalesetId);
foreach (var machineId in machineIds) {
if (machineToInstance.TryGetValue(machineId, out var instanceId)) {
_ = instanceIds.Add(instanceId);
} else {
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:VmId}");
}
}

var instanceIds = await ResolveInstanceIds(scalesetId, nodes);
if (!instanceIds.Any()) {
return;
}
Expand Down
18 changes: 9 additions & 9 deletions src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ public Task<OneFuzzResultVoid> CreateVmss(Region location, Guid name, string vmS
throw new NotImplementedException();
}

public System.Threading.Tasks.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
throw new NotImplementedException();
}

public Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null) {
throw new NotImplementedException();
}
Expand All @@ -44,18 +40,14 @@ public Task<OneFuzzResult<string>> GetInstanceId(Guid name, Guid vmId) {
}


public Task<IDictionary<Guid, string>> ListInstanceIds(Guid name) {
public Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name) {
throw new NotImplementedException();
}

public IAsyncEnumerable<VirtualMachineScaleSetVmResource> ListVmss(Guid name) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ResizeVmss(Guid name, long capacity) {
throw new NotImplementedException();
}
Expand All @@ -67,4 +59,12 @@ public Task<OneFuzzResultVoid> UpdateExtensions(Guid name, IList<VirtualMachineS
public Task<OneFuzzResultVoid> UpdateScaleInProtection(Scaleset scaleset, string instanceId, bool protectFromScaleIn) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes) {
throw new NotImplementedException();
}

public Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes) {
throw new NotImplementedException();
}
}