Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Commit

Permalink
Reduce fetches to VMSS (#2577)
Browse files Browse the repository at this point in the history
* Remove one GET from ListInstanceIds

* Only invoke ListInstanceIds if needed

* Format code

* Downgrade Exception log to Verbose

* Insert InstanceID during CleanupNodes

Co-authored-by: Cheick Keita <chkeita@microsoft.com>
  • Loading branch information
Porges and chkeita authored Oct 28, 2022
1 parent 4b305d0 commit 3626589
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ public async Task<HttpResponseData> CreateNode([HttpTrigger(AuthorizationLevel.A

bool isNew = UriExtension.GetBool("isNew", query, false);

var node = await _nodeOps.Create(poolId, poolName, machineId, scaleSetId, version, isNew);
var node = await _nodeOps.Create(poolId, poolName, machineId, null, scaleSetId, version, isNew);

var resp = req.CreateResponse(HttpStatusCode.OK);
await resp.WriteAsJsonAsync(JsonSerializer.Serialize(node, EntityConverter.GetJsonSerializerOptions()));
Expand Down
10 changes: 9 additions & 1 deletion src/ApiService/ApiService/onefuzzlib/NodeOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ IAsyncEnumerable<Node> SearchStates(Guid? poolId = default,
Guid poolId,
PoolName poolName,
Guid machineId,
string? instanceId,
Guid? scaleSetId,
string version,
bool isNew = false);
Expand Down Expand Up @@ -379,11 +380,18 @@ public IAsyncEnumerable<Node> GetDeadNodes(Guid scaleSetId, TimeSpan expirationP
Guid poolId,
PoolName poolName,
Guid machineId,
string? instanceId,
Guid? scaleSetId,
string version,
bool isNew = false) {

var node = new Node(poolName, machineId, poolId, version, ScalesetId: scaleSetId);
var node = new Node(
poolName,
machineId,
poolId,
version,
InstanceId: instanceId,
ScalesetId: scaleSetId);

ResultVoid<(HttpStatusCode Status, string Reason)> r;
if (isNew) {
Expand Down
43 changes: 22 additions & 21 deletions src/ApiService/ApiService/onefuzzlib/ScalesetOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public async Async.Task<Scaleset> Setup(Scaleset scaleset) {
return await SetFailed(scaleset, result.ErrorV);
}

//TODO : why are we saving scaleset here ?
//TODO : why are we saving scaleset here ?
var r = await Update(scaleset);
if (!r.IsOk) {
_logTracer.Error($"Failed to save scaleset {scaleset.ScalesetId:Tag:ScalesetId} due to {r.ErrorV:Tag:Error}");
Expand Down Expand Up @@ -514,6 +514,11 @@ public async Async.Task<Scaleset> Halt(Scaleset scaleset) {

//ground truth of existing nodes
var azureNodes = await _context.VmssOperations.ListInstanceIds(scaleSet.ScalesetId);
if (azureNodes is null) {
// didn't find scaleset
return (false, scaleSet);
}

var nodes = _context.NodeOperations.SearchStates(scalesetId: scaleSet.ScalesetId);

//# Nodes do not exists in scalesets but in table due to unknown failure
Expand All @@ -536,6 +541,7 @@ public async Async.Task<Scaleset> Halt(Scaleset scaleset) {

foreach (var azureNode in azureNodes) {
var machineId = azureNode.Key;
var instanceId = azureNode.Value;
if (nodeMachineIds.Contains(machineId)) {
continue;
}
Expand All @@ -550,6 +556,7 @@ public async Async.Task<Scaleset> Halt(Scaleset scaleset) {
pool.OkV.PoolId,
scaleSet.PoolName,
machineId,
instanceId,
scaleSet.ScalesetId,
_context.ServiceConfiguration.OneFuzzVersion,
isNew: true);
Expand Down Expand Up @@ -615,7 +622,6 @@ where x.State.ReadyForReset()


public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable<Node> nodes, NodeDisposalStrategy disposalStrategy) {

if (nodes is null || !nodes.Any()) {
_log.Info($"no nodes to reimage: {scaleset.ScalesetId:Tag:ScalesetId}");
return;
Expand All @@ -632,39 +638,37 @@ public async Async.Task ReimageNodes(Scaleset scaleset, IEnumerable<Node> nodes,
return;
}

var machineIds = new HashSet<Guid>();
var nodesToReimage = new List<Node>();
foreach (var node in nodes) {
if (node.State != NodeState.Done) {
continue;
}

if (node.DebugKeepNode) {
_log.Warning($"not reimaging manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
_log.Warning($"not reimaging manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
} else {
_ = machineIds.Add(node.MachineId);
nodesToReimage.Add(node);
}
}

if (!machineIds.Any()) {
if (!nodesToReimage.Any()) {
_log.Info($"no nodes to reimage {scaleset.ScalesetId:Tag:ScalesetId}");
return;
}

switch (disposalStrategy) {
case NodeDisposalStrategy.Decommission:
_log.Info($"decommissioning nodes");
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToReimage
.Select(async node => {
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;

case NodeDisposalStrategy.ScaleIn:
var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, machineIds);
var r = await _context.VmssOperations.ReimageNodes(scaleset.ScalesetId, nodesToReimage);
if (r.IsOk) {
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToReimage
.Select(async node => {
var r = await _context.NodeOperations.ReleaseScaleInProtection(node);
if (r.IsOk) {
Expand Down Expand Up @@ -693,33 +697,30 @@ public async Async.Task DeleteNodes(Scaleset scaleset, IEnumerable<Node> nodes,
return;
}

HashSet<Guid> machineIds = new();
var nodesToDelete = new List<Node>();
foreach (var node in nodes) {
if (node.DebugKeepNode) {
_log.Warning($"not deleting manually overriden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
_log.Warning($"not deleting manually overridden node {node.MachineId:Tag:MachineId} in scaleset {scaleset.ScalesetId:Tag:ScalesetId}");
} else {
_ = machineIds.Add(node.MachineId);
nodesToDelete.Add(node);
}
}

switch (disposalStrategy) {
case NodeDisposalStrategy.Decommission:
_log.Info($"decommissioning nodes");
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
await Async.Task.WhenAll(nodesToDelete
.Select(async node => {
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;

case NodeDisposalStrategy.ScaleIn:
_log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", machineIds):Tag:MachineIds}");
await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, machineIds);
await Async.Task.WhenAll(nodes
.Where(node => machineIds.Contains(node.MachineId))
_log.Info($"deleting nodes {scaleset.ScalesetId:Tag:ScalesetId} {string.Join(", ", nodesToDelete.Select(n => n.MachineId)):Tag:MachineIds}");
await _context.VmssOperations.DeleteNodes(scaleset.ScalesetId, nodesToDelete);
await Async.Task.WhenAll(nodesToDelete
.Select(async node => {
await _context.NodeOperations.Delete(node);
await _context.NodeOperations.ReleaseScaleInProtection(node).IgnoreResult();
}));
return;
}
Expand Down
100 changes: 48 additions & 52 deletions src/ApiService/ApiService/onefuzzlib/VmssOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public interface IVmssOperations {

Async.Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null);

Async.Task<IDictionary<Guid, string>> ListInstanceIds(Guid name);
Async.Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name);

Async.Task<long?> GetVmssSize(Guid name);

Expand All @@ -42,16 +42,15 @@ Async.Task<OneFuzzResultVoid> CreateVmss(
IDictionary<string, string> tags);

IAsyncEnumerable<VirtualMachineScaleSetVmResource> ListVmss(Guid name);
Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds);
Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds);
Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes);
Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes);
}

public class VmssOperations : IVmssOperations {
private readonly ILogTracer _log;
private readonly ICreds _creds;
private readonly IImageOperations _imageOps;
private readonly IServiceConfig _serviceConfig;
private readonly IOnefuzzContext _context;
private readonly IMemoryCache _cache;


Expand All @@ -60,9 +59,7 @@ public VmssOperations(ILogTracer log, IOnefuzzContext context, IMemoryCache cach
_creds = context.Creds;
_imageOps = context.ImageOperations;
_serviceConfig = context.ServiceConfiguration;
_context = context;
_cache = cache;

}

public async Async.Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null) {
Expand Down Expand Up @@ -161,37 +158,24 @@ public async Async.Task<OneFuzzResultVoid> UpdateExtensions(Guid name, IList<Vir
}
}

public async Async.Task<IDictionary<Guid, string>> ListInstanceIds(Guid name) {
public async Async.Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name) {
_log.Verbose($"get instance IDs for scaleset {name:Tag:VmssName}");
var results = new Dictionary<Guid, string>();
VirtualMachineScaleSetResource res;
try {
var r = await GetVmssResource(name).GetAsync();
res = r.Value;
} catch (Exception ex) when (ex is RequestFailedException) {
_log.Verbose($"vm does not exist {name:Tag:VmssName}");
return results;
}

if (res is null) {
_log.Verbose($"vm does not exist {name:Tag:VmssName}");
return results;
} else {
try {
await foreach (var instance in res.GetVirtualMachineScaleSetVms()) {
if (instance is not null) {
if (Guid.TryParse(instance.Data.VmId, out var key)) {
results[key] = instance.Data.InstanceId;
} else {
_log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}");
}
var results = new Dictionary<Guid, string>();
await foreach (var instance in GetVmssResource(name).GetVirtualMachineScaleSetVms()) {
if (instance is not null) {
if (Guid.TryParse(instance.Data.VmId, out var machineId)) {
results[machineId] = instance.Data.InstanceId;
} else {
_log.Error($"failed to convert vmId {instance.Data.VmId:Tag:VmId} to Guid in {name:Tag:VmssName}");
}
}
} catch (Exception ex) when (ex is RequestFailedException || ex is CloudException) {
_log.Exception(ex, $"vm does not exist {name:Tag:VmssName}");
}
return results;
} catch (RequestFailedException ex) when (ex.Status == 404) {
_log.Verbose($"scaleset does not exist {name:Tag:VmssName}");
return null;
}
return results;
}

private record InstanceIdKey(Guid Scaleset, Guid VmId);
Expand Down Expand Up @@ -444,22 +428,43 @@ public Async.Task<IReadOnlyList<string>> ListAvailableSkus(Region region)
return skuNames;
});

public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
return OneFuzzResultVoid.Error(result.ErrorV);
}
private async Async.Task<HashSet<string>> ResolveInstanceIds(Guid scalesetId, IEnumerable<Node> nodes) {

// only initialize this if we find a missing InstanceId
var machineToInstanceLazy = new Lazy<Task<IDictionary<Guid, string>>>(async () => {
var machineToInstance = await ListInstanceIds(scalesetId);
if (machineToInstance is null) {
throw new Exception($"cannot find nodes in scaleset {scalesetId}: scaleset does not exist");
}

return machineToInstance;
});

var instanceIds = new HashSet<string>();
var machineToInstance = await ListInstanceIds(scalesetId);
foreach (var machineId in machineIds) {
if (machineToInstance.TryGetValue(machineId, out var instanceId)) {
_ = instanceIds.Add(instanceId);
foreach (var node in nodes) {
if (node.InstanceId is not null) {
_ = instanceIds.Add(node.InstanceId);
continue;
}

var lookup = await machineToInstanceLazy.Value;
if (lookup.TryGetValue(node.MachineId, out var foundId)) {
_ = instanceIds.Add(foundId);
} else {
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:MachineId}");
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {node.MachineId:Tag:VmId}");
}
}

return instanceIds;
}

public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
return OneFuzzResultVoid.Error(result.ErrorV);
}

var instanceIds = await ResolveInstanceIds(scalesetId, nodes);
if (!instanceIds.Any()) {
return OneFuzzResultVoid.Ok;
}
Expand Down Expand Up @@ -499,22 +504,13 @@ public async Async.Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOn
return OneFuzzResultVoid.Ok;
}

public async Async.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
public async Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes) {
var result = await CheckCanUpdate(scalesetId);
if (!result.IsOk) {
throw new Exception($"cannot delete nodes from scaleset {scalesetId} : {result.ErrorV}");
}

var instanceIds = new HashSet<string>();
var machineToInstance = await ListInstanceIds(scalesetId);
foreach (var machineId in machineIds) {
if (machineToInstance.TryGetValue(machineId, out var instanceId)) {
_ = instanceIds.Add(instanceId);
} else {
_log.Info($"unable to find instance ID for {scalesetId:Tag:ScalesetId} - {machineId:Tag:VmId}");
}
}

var instanceIds = await ResolveInstanceIds(scalesetId, nodes);
if (!instanceIds.Any()) {
return;
}
Expand Down
18 changes: 9 additions & 9 deletions src/ApiService/IntegrationTests/Fakes/TestVmssOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ public Task<OneFuzzResultVoid> CreateVmss(Region location, Guid name, string vmS
throw new NotImplementedException();
}

public System.Threading.Tasks.Task DeleteNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
throw new NotImplementedException();
}

public Task<bool> DeleteVmss(Guid name, bool? forceDeletion = null) {
throw new NotImplementedException();
}
Expand All @@ -44,18 +40,14 @@ public Task<OneFuzzResult<string>> GetInstanceId(Guid name, Guid vmId) {
}


public Task<IDictionary<Guid, string>> ListInstanceIds(Guid name) {
public Task<IDictionary<Guid, string>?> ListInstanceIds(Guid name) {
throw new NotImplementedException();
}

public IAsyncEnumerable<VirtualMachineScaleSetVmResource> ListVmss(Guid name) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IReadOnlySet<Guid> machineIds) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ResizeVmss(Guid name, long capacity) {
throw new NotImplementedException();
}
Expand All @@ -67,4 +59,12 @@ public Task<OneFuzzResultVoid> UpdateExtensions(Guid name, IList<VirtualMachineS
public Task<OneFuzzResultVoid> UpdateScaleInProtection(Scaleset scaleset, string instanceId, bool protectFromScaleIn) {
throw new NotImplementedException();
}

public Task<OneFuzzResultVoid> ReimageNodes(Guid scalesetId, IEnumerable<Node> nodes) {
throw new NotImplementedException();
}

public Async.Task DeleteNodes(Guid scalesetId, IEnumerable<Node> nodes) {
throw new NotImplementedException();
}
}

0 comments on commit 3626589

Please sign in to comment.