Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Commit

Permalink
Enable .NET functions in check-pr for Agent-specific functions (#2119)
Browse files Browse the repository at this point in the history
Enable the .NET functions for the agent by sending the agent the URI for the `-net` service.

Also fix some things causing failures when using the .NET functions (`CouldShrinkScaleset` was not implemented).

Improve error handling around table serialization/deserialization, fix an issue with int64/long mismatch between Python & C# code.

----

For `check-pr` testing:

1. There's a new parameter `enable_dotnet` which maps directly to the `--enable_dotnet` switch on `deploy.py`.
2. If you put `agent` there, all the `agent_*` functions will be enabled for .NET and disabled for Python.
3. If `agent_can_schedule` is disabled on the Python side, it will automatically tell the agent to use the .NET functions.

So to test the .NET agent functions, do a `check-pr` run with `enable_dotnet` set to `agent` and it should all work.
  • Loading branch information
Porges authored Jul 20, 2022
1 parent b1a3e75 commit 4fa6e74
Show file tree
Hide file tree
Showing 19 changed files with 183 additions and 128 deletions.
15 changes: 8 additions & 7 deletions src/ApiService/ApiService/AgentCanSchedule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,27 @@ private async Async.Task<HttpResponseData> Post(HttpRequestData req) {
ErrorCode.UNABLE_TO_FIND,
new string[] {
"unable to find node"
}
),
canScheduleRequest.MachineId.ToString()
);
}),
canScheduleRequest.MachineId.ToString());
}

var allowed = true;
var workStopped = false;

if (!await _context.NodeOperations.CanProcessNewWork(node)) {
allowed = false;
}

var task = await _context.TaskOperations.GetByTaskId(canScheduleRequest.TaskId);
workStopped = task == null || task.State.ShuttingDown();
var workStopped = task == null || task.State.ShuttingDown();

if (workStopped) {
allowed = false;
}

if (allowed) {
allowed = (await _context.NodeOperations.AcquireScaleInProtection(node)).IsOk;
}

return await RequestHandling.Ok(req, new CanSchedule(allowed, workStopped));
return await RequestHandling.Ok(req, new CanSchedule(Allowed: allowed, WorkStopped: workStopped));
}
}
2 changes: 1 addition & 1 deletion src/ApiService/ApiService/AgentEvents.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ private async Async.Task<HttpResponseData> Post(HttpRequestData req) {
return null;
}

if (_context.NodeOperations.CouldShrinkScaleset(node)) {
if (await _context.NodeOperations.CouldShrinkScaleset(node)) {
_log.Info($"stopping free node to resize scaleset: {machineId}");
await _context.NodeOperations.SetHalt(node);
return null;
Expand Down
44 changes: 25 additions & 19 deletions src/ApiService/ApiService/OneFuzzTypes/Model.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,16 @@ public record NodeCommandAddSshKey(string PublicKey);

public record NodeCommand
(
[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
StopNodeCommand? Stop = default,

[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
StopTaskNodeCommand? StopTask = default,

[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
NodeCommandAddSshKey? AddSshKey = default,

[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
NodeCommandStopIfFree? StopIfFree = default
);

Expand Down Expand Up @@ -158,15 +165,15 @@ public record UserInfo(Guid? ApplicationId, Guid? ObjectId, String? Upn);

public record TaskDetails(
TaskType Type,
int Duration,
long Duration,
string? TargetExe = null,
Dictionary<string, string>? TargetEnv = null,
List<string>? TargetOptions = null,
int? TargetWorkers = null,
long? TargetWorkers = null,
bool? TargetOptionsMerge = null,
bool? CheckAsanLog = null,
bool? CheckDebugger = null,
int? CheckRetryCount = null,
long? CheckRetryCount = null,
bool? CheckFuzzerHelp = null,
bool? ExpectCrashOnFailure = null,
bool? RenameOutput = null,
Expand All @@ -184,11 +191,11 @@ public record TaskDetails(
string? StatsFile = null,
StatsFormat? StatsFormat = null,
bool? RebootAfterSetup = null,
int? TargetTimeout = null,
int? EnsembleSyncDelay = null,
long? TargetTimeout = null,
long? EnsembleSyncDelay = null,
bool? PreserveExistingOutputs = null,
List<string>? ReportList = null,
int? MinimizedStackDepth = null,
long? MinimizedStackDepth = null,
string? CoverageFilter = null
);

Expand All @@ -197,12 +204,12 @@ public record TaskVm(
string Sku,
string Image,
bool? RebootAfterSetup,
int Count = 1,
long Count = 1,
bool SpotInstance = false
);

public record TaskPool(
int Count,
long Count,
PoolName PoolName
);

Expand Down Expand Up @@ -390,7 +397,6 @@ public record Scaleset(
Guid? ClientId,
Guid? ClientObjectId,
Dictionary<string, string> Tags

) : StatefulEntityBase<ScalesetState>(State);

[JsonConverter(typeof(ContainerConverter))]
Expand Down Expand Up @@ -433,7 +439,7 @@ public record Report(
string? AsanLog,
Guid TaskId,
Guid JobId,
int? ScarinessScore,
long? ScarinessScore,
string? ScarinessDescription,
List<string>? MinimizedStack,
string? MinimizedStackSha256,
Expand Down Expand Up @@ -492,7 +498,7 @@ public record Repro(
public record ReproConfig(
Container Container,
string Path,
int Duration
long Duration
);

// Skipping AutoScaleConfig because it's not used anymore
Expand Down Expand Up @@ -572,7 +578,7 @@ public record JobConfig(
string Project,
string Name,
string Build,
int Duration,
long Duration,
string? Logs
);

Expand Down Expand Up @@ -624,7 +630,7 @@ public override void Write(Utf8JsonWriter writer, TaskUnitConfig value, JsonSeri

public record VmDefinition(
Compare Compare,
int Value
long Value
);

public record TaskDefinition(
Expand All @@ -648,7 +654,7 @@ List<WorkUnit> WorkUnits
public record ContainerDefinition(
ContainerType Type,
Compare Compare,
int Value,
long Value,
ContainerPermission Permissions);


Expand Down Expand Up @@ -702,12 +708,12 @@ Uri HeartbeatQueue
public string? TargetExe { get; set; }
public Dictionary<string, string>? TargetEnv { get; set; }
public List<string>? TargetOptions { get; set; }
public int? TargetTimeout { get; set; }
public long? TargetTimeout { get; set; }
public bool? TargetOptionsMerge { get; set; }
public int? TargetWorkers { get; set; }
public long? TargetWorkers { get; set; }
public bool? CheckAsanLog { get; set; }
public bool? CheckDebugger { get; set; }
public int? CheckRetryCount { get; set; }
public long? CheckRetryCount { get; set; }
public bool? CheckFuzzerHelp { get; set; }
public bool? ExpectCrashOnFailure { get; set; }
public bool? RenameOutput { get; set; }
Expand All @@ -720,9 +726,9 @@ Uri HeartbeatQueue
public List<string>? AnalyzerOptions { get; set; }
public string? StatsFile { get; set; }
public StatsFormat? StatsFormat { get; set; }
public int? EnsembleSyncDelay { get; set; }
public long? EnsembleSyncDelay { get; set; }
public List<string>? ReportList { get; set; }
public int? MinimizedStackDepth { get; set; }
public long? MinimizedStackDepth { get; set; }
public string? CoverageFilter { get; set; }

// from here forwards are Container definitions. These need to be inline
Expand Down
3 changes: 3 additions & 0 deletions src/ApiService/ApiService/OneFuzzTypes/Requests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ List<Guid> Tasks
) : NodeStateData;

public record NodeDoneEventData(
[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
string? Error,

[property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
ProcessOutput? ScriptOutput
) : NodeStateData;

Expand Down
3 changes: 3 additions & 0 deletions src/ApiService/ApiService/ServiceConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ public interface IServiceConfig {

public string OneFuzzVersion { get; }

public string? OneFuzzAllowOutdatedAgent { get; }

// Prefix to add to the name of any tables & containers created. This allows
// multiple instances to run against the same storage account, which
// is useful for things like integration testing.
Expand Down Expand Up @@ -85,6 +87,7 @@ public ServiceConfiguration() {
public string? OneFuzzResourceGroup { get => Environment.GetEnvironmentVariable("ONEFUZZ_RESOURCE_GROUP"); }
public string? OneFuzzTelemetry { get => Environment.GetEnvironmentVariable("ONEFUZZ_TELEMETRY"); }
public string OneFuzzVersion { get => Environment.GetEnvironmentVariable("ONEFUZZ_VERSION") ?? "0.0.0"; }
public string? OneFuzzAllowOutdatedAgent => Environment.GetEnvironmentVariable("ONEFUZZ_ALLOW_OUTDATED_AGENT");

public string OneFuzzNodeDisposalStrategy { get => Environment.GetEnvironmentVariable("ONEFUZZ_NODE_DISPOSAL_STRATEGY") ?? "scale_in"; }
public string OneFuzzStoragePrefix => ""; // in production we never prefix the tables
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public async Task<HttpResponseData> CouldShrinkScaleset([HttpTrigger(Authorizati
var s = await req.ReadAsStringAsync();
var node = JsonSerializer.Deserialize<Node>(s!, EntityConverter.GetJsonSerializerOptions());

var r = _nodeOps.CouldShrinkScaleset(node!);
var r = await _nodeOps.CouldShrinkScaleset(node!);
var resp = req.CreateResponse(HttpStatusCode.OK);
await resp.WriteAsJsonAsync(r);
return resp;
Expand Down
6 changes: 3 additions & 3 deletions src/ApiService/ApiService/onefuzzlib/Creds.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ public async Async.Task<string> GetBaseRegion() {
return rg.Value.Data.Location.Name;
}

public Uri GetInstanceUrl() {
return new Uri($"https://{GetInstanceName()}.azurewebsites.net");
}
public Uri GetInstanceUrl()
// TODO: remove -net when promoted to main version
=> new($"https://{GetInstanceName()}-net.azurewebsites.net");

public record ScaleSetIdentity(string principalId);

Expand Down
29 changes: 22 additions & 7 deletions src/ApiService/ApiService/onefuzzlib/NodeOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public interface INodeOperations : IStatefulOrm<Node, NodeState> {
bool IsOutdated(Node node);
Async.Task Stop(Node node, bool done = false);
bool IsTooOld(Node node);
bool CouldShrinkScaleset(Node node);
Task<bool> CouldShrinkScaleset(Node node);
Async.Task SetHalt(Node node);
Async.Task SetState(Node node, NodeState state);
Async.Task ToReimage(Node node, bool done = false);
Expand Down Expand Up @@ -65,10 +65,11 @@ IOnefuzzContext context
}

public async Task<OneFuzzResultVoid> AcquireScaleInProtection(Node node) {
if (await ScalesetNodeExists(node) && node.ScalesetId != null) {
if (await ScalesetNodeExists(node) && node.ScalesetId is Guid scalesetId) {
_logTracer.Info($"Setting scale-in protection on node {node.MachineId}");
return await _context.VmssOperations.UpdateScaleInProtection((Guid)node.ScalesetId, node.MachineId, protectFromScaleIn: true);
return await _context.VmssOperations.UpdateScaleInProtection(scalesetId, node.MachineId, protectFromScaleIn: true);
}

return OneFuzzResultVoid.Ok;
}

Expand All @@ -88,7 +89,7 @@ public async Async.Task<bool> ScalesetNodeExists(Node node) {
}

public async Task<bool> CanProcessNewWork(Node node) {
if (IsOutdated(node)) {
if (IsOutdated(node) && _context.ServiceConfiguration.OneFuzzAllowOutdatedAgent != "true") {
_logTracer.Info($"can_process_new_work agent and service versions differ, stopping node. machine_id:{node.MachineId} agent_version:{node.Version} service_version:{_context.ServiceConfiguration.OneFuzzVersion}");
await Stop(node, done: true);
return false;
Expand Down Expand Up @@ -122,7 +123,7 @@ public async Task<bool> CanProcessNewWork(Node node) {
return false;
}

if (CouldShrinkScaleset(node)) {
if (await CouldShrinkScaleset(node)) {
_logTracer.Info($"can_process_new_work node scheduled to shrink. machine_id:{node.MachineId}");
await SetHalt(node);
return false;
Expand Down Expand Up @@ -286,8 +287,22 @@ public bool IsTooOld(Node node) {
&& node.InitializedAt < DateTime.UtcNow - INodeOperations.NODE_REIMAGE_TIME;
}

public bool CouldShrinkScaleset(Node node) {
throw new NotImplementedException();
public async Task<bool> CouldShrinkScaleset(Node node) {
if (node.ScalesetId is Guid scalesetId) {
var queue = new ShrinkQueue(scalesetId, _context.Queue, _logTracer);
if (await queue.ShouldShrink()) {
return true;
}
}

if (node.PoolId is Guid poolId) {
var queue = new ShrinkQueue(poolId, _context.Queue, _logTracer);
if (await queue.ShouldShrink()) {
return true;
}
}

return false;
}

public async Async.Task SetState(Node node, NodeState state) {
Expand Down
8 changes: 4 additions & 4 deletions src/ApiService/ApiService/onefuzzlib/Scheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ public async Async.Task ScheduleTasks() {
}
}

private async Async.Task<bool> ScheduleWorkset(WorkSet workSet, Pool pool, int count) {
private async Async.Task<bool> ScheduleWorkset(WorkSet workSet, Pool pool, long count) {
if (!PoolStateHelper.Available.Contains(pool.State)) {
_logTracer.Info($"pool not available for work: {pool.Name} state: {pool.State}");
return false;
}

for (var i = 0; i < count; i++) {
for (var i = 0L; i < count; i++) {
if (!await _poolOperations.ScheduleWorkset(pool, workSet)) {
_logTracer.Error($"unable to schedule workset. pool:{pool.Name} workset: {workSet}");
return false;
Expand Down Expand Up @@ -118,7 +118,7 @@ private async Async.Task<bool> ScheduleWorkset(WorkSet workSet, Pool pool, int c
}


record BucketConfig(int count, bool reboot, Container setupContainer, string? setupScript, Pool pool);
record BucketConfig(long count, bool reboot, Container setupContainer, string? setupScript, Pool pool);

private async Async.Task<(BucketConfig, WorkUnit)?> BuildWorkunit(Task task) {
Pool? pool = await _taskOperations.GetPool(task);
Expand Down Expand Up @@ -151,7 +151,7 @@ record BucketConfig(int count, bool reboot, Container setupContainer, string? se
}

var reboot = false;
var count = 1;
var count = 1L;
if (task.Config.Pool != null) {
count = task.Config.Pool.Count;
reboot = task.Config.Task.RebootAfterSetup ?? false;
Expand Down
3 changes: 2 additions & 1 deletion src/ApiService/ApiService/onefuzzlib/ShrinkQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ public ShrinkQueue(Guid baseId, IQueue queueOps, ILogTracer log) {
}

public override string ToString() {
return $"to-shrink-{_baseId.ToString("N")}";
return $"to-shrink-{_baseId:N}";
}

public string QueueName => this.ToString();

public async Async.Task Clear() {
Expand Down
Loading

0 comments on commit 4fa6e74

Please sign in to comment.