forked from apache/yunikorn-scheduler-interface
-
Notifications
You must be signed in to change notification settings - Fork 0
/
si.proto
487 lines (417 loc) · 16.6 KB
/
si.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by make build. DO NOT EDIT
syntax = "proto3";
package si.v1;
import "google/protobuf/descriptor.proto";
option go_package = "lib/go/si";
extend google.protobuf.FieldOptions {
// Indicates that a field MAY contain information that is sensitive
// and MUST be treated as such (e.g. not logged).
bool si_secret = 1059;
}
service Scheduler {
// Register a RM, if it is a reconnect from previous RM the call will
// trigger a cleanup of all in-memory data and resync with RM.
rpc RegisterResourceManager (RegisterResourceManagerRequest)
returns (RegisterResourceManagerResponse) { }
// Update Scheduler status (this includes node status update, allocation request
// updates, etc. And receive updates from scheduler for allocation changes,
// any required status changes, etc.
// Update allocation request
rpc UpdateAllocation(stream AllocationRequest)
returns (stream AllocationResponse) { }
// Update application request
rpc UpdateApplication(stream ApplicationRequest)
returns (stream ApplicationResponse) { }
// Update node info
rpc UpdateNode(stream NodeRequest)
returns (stream NodeResponse) { }
}
message RegisterResourceManagerRequest {
// An ID which can uniquely identify a RM **cluster**. (For example, if a RM cluster has multiple manager instances for HA purpose, they should use the same information when do registration).
// If RM register with the same ID, all previous scheduling state in memory will be cleaned up, and expect RM report full scheduling state after registration.
string rmID = 1;
// Version of RM scheduler interface client.
string version = 2;
// Policy group name:
// This defines which policy to use. Policy should be statically configured. (Think about network security group concept of ec2).
// Different RMs can refer to the same policyGroup if their static configuration is identical.
string policyGroup = 3;
// Pass the build information of k8shim to core.
map<string, string> buildInfo = 4;
// Pass the serialized configuration for this policyGroup to core.
string config = 5;
// Additional configuration key/value pairs for configuration not related to the policyGroup.
map<string, string> extraConfig = 6;
}
// Upon success, scheduler returns RegisterResourceManagerResponse to RM, otherwise RM receives exception.
message RegisterResourceManagerResponse {
// Intentionally empty.
}
message AllocationRequest {
// New allocation requests or replace existing allocation request (if allocationID is same)
repeated AllocationAsk asks = 1;
// Allocations can be released.
AllocationReleasesRequest releases = 2;
// ID of RM, this will be used to identify which RM of the request comes from.
string rmID = 3;
}
message ApplicationRequest {
// RM should explicitly add application when allocation request also explictly belongs to application.
// This is optional if allocation request doesn't belong to a application. (Independent allocation)
repeated AddApplicationRequest new = 1;
// RM can also remove applications, all allocation/allocation requests associated with the application will be removed
repeated RemoveApplicationRequest remove = 2;
// ID of RM, this will be used to identify which RM of the request comes from.
string rmID = 3;
}
message NodeRequest {
// New node can be scheduled. If a node is notified to be "unscheduable", it needs to be part of this field as well.
repeated NodeInfo nodes = 1;
// ID of RM, this will be used to identify which RM of the request comes from.
string rmID = 2;
}
message AllocationResponse {
// New allocations
repeated Allocation new = 1;
// Released allocations, this could be either ack from scheduler when RM asks to terminate some allocations.
// Or it could be decision made by scheduler (such as preemption or timeout).
repeated AllocationRelease released = 2;
// Released allocation asks(placeholder), when the placeholder allocation times out
repeated AllocationAskRelease releasedAsks = 3;
// Rejected allocation requests
repeated RejectedAllocationAsk rejected = 4;
}
message ApplicationResponse {
// Rejected Applications
repeated RejectedApplication rejected = 1;
// Accepted Applications
repeated AcceptedApplication accepted = 2;
// Updated Applications
repeated UpdatedApplication updated = 3;
}
message NodeResponse {
// Rejected Node Registrations
repeated RejectedNode rejected = 1;
// Accepted Node Registrations
repeated AcceptedNode accepted = 2;
}
message UpdatedApplication {
// The application ID that was updated
string applicationID = 1;
// State of the application
string state = 2;
// Timestamp of the state transition
int64 stateTransitionTimestamp = 3;
// Detailed message
string message = 4;
}
message RejectedApplication {
// The application ID that was rejected
string applicationID = 1;
// A human-readable reason message
string reason = 2;
}
message AcceptedApplication {
// The application ID that was accepted
string applicationID = 1;
}
message RejectedNode {
// The node ID that was rejected
string nodeID = 1;
// A human-readable reason message
string reason = 2;
}
message AcceptedNode {
// The node ID that was accepted
string nodeID = 1;
}
// A sparse map of resource to Quantity.
message Resource {
map<string, Quantity> resources = 1;
}
// Quantity includes a single int64 value
message Quantity {
int64 value = 1;
}
message AllocationAsk {
// Allocation key is used by both of scheduler and RM to track allocations.
// It doesn't have to be same as RM's internal allocation id (such as Pod name of K8s or ContainerID of YARN).
// Allocations from the same AllocationAsk which are returned to the RM at the same time will have the same allocationKey.
// The request is considered an update of the existing AllocationAsk if an ALlocationAsk with the same allocationKey
// already exists.
string allocationKey = 1;
// The application ID this allocation ask belongs to
string applicationID = 2;
// The partition the application belongs to
string partitionName = 3;
// The amount of resources per ask
Resource resourceAsk = 4;
// Maximum number of allocations
int32 maxAllocations = 5;
// Priority of ask
int32 priority = 6;
// Execution timeout: How long this allocation will be terminated (by scheduler)
// once allocated by scheduler, 0 or negative value means never expire.
int64 executionTimeoutMilliSeconds = 7;
// A set of tags for this spscific AllocationAsk. Allocation level tags are used in placing this specific
// ask on nodes in the cluster. These tags are used in the PlacementConstraints.
// These tags are optional.
map<string, string> tags = 8;
// The name of the TaskGroup this ask belongs to
string taskGroupName = 9;
// Is this a placeholder ask (true) or a real ask (false), defaults to false
// ignored if the taskGroupName is not set
bool placeholder = 10;
// Is this ask the originator of the application?
bool Originator = 11;
// The preemption policy for this ask
PreemptionPolicy preemptionPolicy = 12;
}
message PreemptionPolicy {
// Opt-out from preemption
bool allowPreemptSelf = 1;
// Allow preemption of other tasks with same or lower priority
bool allowPreemptOther = 2;
}
message AddApplicationRequest {
// The ID of the application, must be unique
string applicationID = 1;
// The queue this application is requesting. The scheduler will place the application into a
// queue according to policy, taking into account the requested queue as per the policy.
string queueName = 2;
// The partition the application belongs to
string partitionName = 3;
// The user group information of the application owner
UserGroupInformation ugi = 4;
// A set of tags for the application. These tags provide application level generic inforamtion.
// The tags are optional and are used in placing an appliction or scheduling.
// Application tags are not considered when processing AllocationAsks.
map<string, string> tags = 5;
// Execution timeout: How long this application can be in a running state
// 0 or negative value means never expire.
int64 executionTimeoutMilliSeconds = 6;
// The total amount of resources gang placeholders will request
Resource placeholderAsk = 7;
// Gang scheduling style can be hard (the application will fail after placeholder timeout)
// or soft (after the timeout the application will be scheduled as a normal application)
string gangSchedulingStyle = 8;
}
message RemoveApplicationRequest {
// The ID of the application to remove
string applicationID = 1;
// The partition the application belongs to
string partitionName = 2;
}
message UserGroupInformation {
// the user name
string user = 1;
// the list of groups of the user, can be empty
repeated string groups = 2;
}
message Allocation {
// AllocationKey from AllocationAsk
string allocationKey = 1;
// Allocation tags from AllocationAsk
map<string, string> allocationTags = 2;
// UUID of the allocation
string UUID = 3;
// Resource for each allocation
Resource resourcePerAlloc = 5;
// Priority of ask
int32 priority = 6;
// Node which the allocation belongs to
string nodeID = 8;
// The ID of the application
string applicationID = 9;
// Partition of the allocation
string partitionName = 10;
// The name of the TaskGroup this allocation belongs to
string taskGroupName = 11;
// Is this a placeholder allocation (true) or a real allocation (false), defaults to false
// ignored if the taskGroupName is not set
bool placeholder = 12;
reserved 7;
reserved "queueName";
}
message AllocationReleasesRequest {
// The allocations to release
repeated AllocationRelease allocationsToRelease = 1;
// The asks to release
repeated AllocationAskRelease allocationAsksToRelease = 2;
}
enum TerminationType {
UNKNOWN_TERMINATION_TYPE = 0;//TerminationType not set
STOPPED_BY_RM = 1; // Stopped or killed by ResourceManager (created by RM)
TIMEOUT = 2; // Timed out based on the executionTimeoutMilliSeconds (created by core)
PREEMPTED_BY_SCHEDULER = 3; // Preempted allocation by scheduler (created by core)
PLACEHOLDER_REPLACED = 4; // Placeholder allocation replaced by real allocation (created by core)
}
// Release allocation: this is a bidirectional message. The Terminationtype defines the origin, or creator,
// as per the comment. The confirmation or response from the receiver is the same message with the same
// termination type set.
message AllocationRelease {
// The name of the partition the allocation belongs to
string partitionName = 1;
// The application the allocation belongs to
string applicationID = 2;
// The UUID of the allocation to release, if not set all allocations are released for
// the applicationID
string UUID = 3;
// Termination type of the released allocation
TerminationType terminationType = 4;
// human-readable message
string message = 5;
// AllocationKey from AllocationAsk
string allocationKey = 6;
}
// Release ask
message AllocationAskRelease {
// Which partition to release the ask from, required.
string partitionName = 1;
// optional, when this is set, filter allocation key by application id.
// when application id is set and allocationKey is not set, release all allocations key under the application id.
string applicationID = 2;
// optional, when this is set, only release allocation ask by specified
string allocationKey = 3;
// Termination type of the released allocation ask
TerminationType terminationType = 4;
// For human-readable message
string message = 5;
}
message NodeInfo {
// Action from RM
enum ActionFromRM {
//ActionFromRM not set
UNKNOWN_ACTION_FROM_RM = 0;
// Create Node
CREATE = 1;
// Update node resources, attributes.
UPDATE = 2;
// Do not allocate new allocations on the node.
DRAIN_NODE = 3;
// Decomission node, it will immediately stop allocations on the node and
// remove the node from schedulable lists.
DECOMISSION = 4;
// From Draining state to SCHEDULABLE state.
// If node is not in draining state, error will be thrown
DRAIN_TO_SCHEDULABLE = 5;
}
// ID of node, the node must exist to be updated
string nodeID = 1;
// Action to perform by the scheduler
ActionFromRM action = 2;
// New attributes of node, which will replace previously reported attribute.
map<string, string> attributes = 3;
// new schedulable resource, scheduler may preempt allocations on the
// node or schedule more allocations accordingly.
Resource schedulableResource = 4;
// when the scheduler is co-exist with some other schedulers, some node
// resources might be occupied (allocated) by other schedulers.
Resource occupiedResource = 5;
// Allocated resources, this will be added when node registered to RM (recovery)
repeated Allocation existingAllocations = 6;
}
message RejectedAllocationAsk {
string allocationKey = 1;
// The ID of the application
string applicationID = 2;
// A human-readable reason message
string reason = 3;
}
message PredicatesArgs {
// allocation key identifies a container, the predicates function is going to check
// if this container is eligible to be placed ont to a node.
string allocationKey = 1;
// the node ID the container is assigned to.
string nodeID = 2;
// run the predicates for alloactions (true) or reservations (false)
bool allocate = 3;
}
message PreemptionPredicatesArgs {
// the allocation key of the container to check
string allocationKey = 1;
// the node ID the container should be attempted to be scheduled on
string nodeID = 2;
// a list of existing allocations that should be tentatively removed before checking
repeated string preemptAllocationKeys = 3;
// index of last allocation in starting attempt (first attempt should be 0..startIndex)
int32 startIndex = 4;
}
message PreemptionPredicatesResponse {
// whether or not container will schedule on the node
bool success = 1;
// index of last allocation which was removed before success (ignored during failure)
int32 index = 2;
}
message UpdateContainerSchedulingStateRequest {
// container scheduling states
enum SchedulingState {
//SchedulingState not set
UNKNOWN_SCHEDULING_STATE = 0;
// the container is being skipped by the scheduler
SKIPPED = 1;
// the container is scheduled and it has been assigned to a node
SCHEDULED = 2;
// the container is reserved on some node, but not yet assigned
RESERVED = 3;
// scheduler has visited all candidate nodes for this container
// but non of them could satisfy this container's requirement
FAILED = 4;
}
// application ID
string applicartionID = 1;
// allocation key used to identify a container.
string allocationKey = 2;
// container scheduling state
SchedulingState state = 3;
// an optional plain message to explain why it is in such state
string reason = 4;
}
message UpdateConfigurationRequest {
// RM ID to update
string rmID = 2;
// PolicyGroup to update
string policyGroup = 3;
// New configuration to update
string config = 4;
// Additional configuration key/value pairs for configuration not related to the policyGroup.
map<string, string> extraConfig = 5;
reserved 1;
reserved "configs";
}
message EventRecord {
enum Type {
//EventRecord Type not set
UNKNOWN_EVENTRECORD_TYPE = 0;
REQUEST = 1;
APP = 2;
NODE = 3;
QUEUE = 4;
}
// the type of the object associated with the event
Type type = 1;
// ID of the object associated with the event
string objectID = 2;
// the group this object belongs to
// it specifies the application ID for allocations and the queue for applications
string groupID = 3;
// the reason of this event
string reason = 4;
// the detailed message as string
string message = 5;
// timestamp of the event
int64 timestampNano = 6;
}