-
Notifications
You must be signed in to change notification settings - Fork 125
/
cas_server.rs
770 lines (666 loc) · 30.7 KB
/
cas_server.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
// Copyright 2024 The NativeLink Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use serde::Deserialize;
use crate::schedulers::SchedulerSpec;
use crate::serde_utils::{
convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
convert_numeric_with_shellexpand, convert_optional_numeric_with_shellexpand,
convert_optional_string_with_shellexpand, convert_string_with_shellexpand,
convert_vec_string_with_shellexpand,
};
use crate::stores::{ClientTlsConfig, ConfigDigestHashFunction, StoreRefName, StoreSpec};
/// Name of the scheduler. This type will be used when referencing a
/// scheduler in the `CasConfig::schedulers`'s map key.
pub type SchedulerRefName = String;
/// Used when the config references `instance_name` in the protocol.
pub type InstanceName = String;
#[allow(non_camel_case_types)]
#[derive(Deserialize, Debug, Default, Clone, Copy)]
pub enum HttpCompressionAlgorithm {
/// No compression.
#[default]
none,
/// Zlib compression.
gzip,
}
/// Note: Compressing data in the cloud rarely has a benefit, since most
/// cloud providers have very high bandwidth backplanes. However, for
/// clients not inside the data center, it might be a good idea to
/// compress data to and from the cloud. This will however come at a high
/// CPU and performance cost. If you are making remote execution share the
/// same CAS/AC servers as client's remote cache, you can create multiple
/// services with different compression settings that are served on
/// different ports. Then configure the non-cloud clients to use one port
/// and cloud-clients to use another.
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct HttpCompressionConfig {
/// The compression algorithm that the server will use when sending
/// responses to clients. Enabling this will likely save a lot of
/// data transfer, but will consume a lot of CPU and add a lot of
/// latency.
/// see: <https://github.com/tracemachina/nativelink/issues/109>
///
/// Default: `HttpCompressionAlgorithm::none`
pub send_compression_algorithm: Option<HttpCompressionAlgorithm>,
/// The compression algorithm that the server will accept from clients.
/// The server will broadcast the supported compression algorithms to
/// clients and the client will choose which compression algorithm to
/// use. Enabling this will likely save a lot of data transfer, but
/// will consume a lot of CPU and add a lot of latency.
/// see: <https://github.com/tracemachina/nativelink/issues/109>
///
/// Default: {no supported compression}
pub accepted_compression_algorithms: Vec<HttpCompressionAlgorithm>,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct AcStoreConfig {
/// The store name referenced in the `stores` map in the main config.
/// This store name referenced here may be reused multiple times.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub ac_store: StoreRefName,
/// Whether the Action Cache store may be written to, this if set to false
/// it is only possible to read from the Action Cache.
#[serde(default)]
pub read_only: bool,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct CasStoreConfig {
/// The store name referenced in the `stores` map in the main config.
/// This store name referenced here may be reused multiple times.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub cas_store: StoreRefName,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct CapabilitiesRemoteExecutionConfig {
/// Scheduler used to configure the capabilities of remote execution.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub scheduler: SchedulerRefName,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct CapabilitiesConfig {
/// Configuration for remote execution capabilities.
/// If not set the capabilities service will inform the client that remote
/// execution is not supported.
pub remote_execution: Option<CapabilitiesRemoteExecutionConfig>,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct ExecutionConfig {
/// The store name referenced in the `stores` map in the main config.
/// This store name referenced here may be reused multiple times.
/// This value must be a CAS store reference.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub cas_store: StoreRefName,
/// The scheduler name referenced in the `schedulers` map in the main config.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub scheduler: SchedulerRefName,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct ByteStreamConfig {
/// Name of the store in the "stores" configuration.
pub cas_stores: HashMap<InstanceName, StoreRefName>,
/// Max number of bytes to send on each grpc stream chunk.
/// According to <https://github.com/grpc/grpc.github.io/issues/371>
/// 16KiB - 64KiB is optimal.
///
///
/// Default: 64KiB
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
pub max_bytes_per_stream: usize,
/// Maximum number of bytes to decode on each grpc stream chunk.
/// Default: 4 MiB
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
pub max_decoding_message_size: usize,
/// In the event a client disconnects while uploading a blob, we will hold
/// the internal stream open for this many seconds before closing it.
/// This allows clients that disconnect to reconnect and continue uploading
/// the same blob.
///
/// Default: 10 (seconds)
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
pub persist_stream_on_disconnect_timeout: usize,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct WorkerApiConfig {
/// The scheduler name referenced in the `schedulers` map in the main config.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub scheduler: SchedulerRefName,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct PrometheusConfig {
/// Path to register prometheus metrics. If path is "/metrics", and your
/// domain is "example.com", you can reach the endpoint with:
/// <http://example.com/metrics>.
///
/// Default: "/metrics"
#[serde(default)]
pub path: String,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct AdminConfig {
/// Path to register the admin API. If path is "/admin", and your
/// domain is "example.com", you can reach the endpoint with:
/// <http://example.com/admin>.
///
/// Default: "/admin"
#[serde(default)]
pub path: String,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct HealthConfig {
/// Path to register the health status check. If path is "/status", and your
/// domain is "example.com", you can reach the endpoint with:
/// <http://example.com/status>.
///
/// Default: "/status"
#[serde(default)]
pub path: String,
}
#[derive(Deserialize, Debug)]
pub struct BepConfig {
/// The store to publish build events to.
/// The store name referenced in the `stores` map in the main config.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub store: StoreRefName,
}
#[derive(Deserialize, Debug)]
pub struct OriginEventsPublisherConfig {
/// The store to publish nativelink events to.
/// The store name referenced in the `stores` map in the main config.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub store: StoreRefName,
}
#[derive(Deserialize, Debug)]
pub struct OriginEventsConfig {
/// The publisher configuration for origin events.
pub publisher: OriginEventsPublisherConfig,
/// The maximum number of events to queue before applying back pressure.
/// IMPORTANT: Backpressure causes all clients to slow down significantly.
/// Zero is default.
///
/// Default: 65536
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
pub max_event_queue_size: usize,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct ServicesConfig {
/// The Content Addressable Storage (CAS) backend config.
/// The key is the `instance_name` used in the protocol and the
/// value is the underlying CAS store config.
pub cas: Option<HashMap<InstanceName, CasStoreConfig>>,
/// The Action Cache (AC) backend config.
/// The key is the `instance_name` used in the protocol and the
/// value is the underlying AC store config.
pub ac: Option<HashMap<InstanceName, AcStoreConfig>>,
/// Capabilities service is required in order to use most of the
/// bazel protocol. This service is used to provide the supported
/// features and versions of this bazel GRPC service.
pub capabilities: Option<HashMap<InstanceName, CapabilitiesConfig>>,
/// The remote execution service configuration.
/// NOTE: This service is under development and is currently just a
/// place holder.
pub execution: Option<HashMap<InstanceName, ExecutionConfig>>,
/// This is the service used to stream data to and from the CAS.
/// Bazel's protocol strongly encourages users to use this streaming
/// interface to interact with the CAS when the data is large.
pub bytestream: Option<ByteStreamConfig>,
/// This is the service used for workers to connect and communicate
/// through.
/// NOTE: This service should be served on a different, non-public port.
/// In other words, `worker_api` configuration should not have any other
/// services that are served on the same port. Doing so is a security
/// risk, as workers have a different permission set than a client
/// that makes the remote execution/cache requests.
pub worker_api: Option<WorkerApiConfig>,
/// Experimental - Build Event Protocol (BEP) configuration. This is
/// the service that will consume build events from the client and
/// publish them to a store for processing by an external service.
pub experimental_bep: Option<BepConfig>,
/// Experimental - Prometheus metrics configuration. Metrics are gathered
/// as a singleton but may be served on multiple endpoints.
pub experimental_prometheus: Option<PrometheusConfig>,
/// This is the service for any administrative tasks.
/// It provides a REST API endpoint for administrative purposes.
pub admin: Option<AdminConfig>,
/// This is the service for health status check.
pub health: Option<HealthConfig>,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct TlsConfig {
/// Path to the certificate file.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub cert_file: String,
/// Path to the private key file.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub key_file: String,
/// Path to the certificate authority for mTLS, if client authentication is
/// required for this endpoint.
#[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
pub client_ca_file: Option<String>,
/// Path to the certificate revocation list for mTLS, if client
/// authentication is required for this endpoint.
#[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
pub client_crl_file: Option<String>,
}
/// Advanced Http configurations. These are generally should not be set.
/// For documentation on what each of these do, see the hyper documentation:
/// See: <https://docs.rs/hyper/latest/hyper/server/conn/struct.Http.html>
///
/// Note: All of these default to hyper's default values unless otherwise
/// specified.
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct HttpServerConfig {
/// Interval to send keep-alive pings via HTTP2.
/// Note: This is in seconds.
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub http2_keep_alive_interval: Option<u32>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_max_pending_accept_reset_streams: Option<u32>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_initial_stream_window_size: Option<u32>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_initial_connection_window_size: Option<u32>,
#[serde(default)]
pub experimental_http2_adaptive_window: Option<bool>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_max_frame_size: Option<u32>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_max_concurrent_streams: Option<u32>,
/// Note: This is in seconds.
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_keep_alive_timeout: Option<u32>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_max_send_buf_size: Option<u32>,
#[serde(default)]
pub experimental_http2_enable_connect_protocol: Option<bool>,
#[serde(
default,
deserialize_with = "convert_optional_numeric_with_shellexpand"
)]
pub experimental_http2_max_header_list_size: Option<u32>,
}
#[allow(non_camel_case_types)]
#[derive(Deserialize, Debug)]
pub enum ListenerConfig {
/// Listener for HTTP/HTTPS/HTTP2 sockets.
http(HttpListener),
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct HttpListener {
/// Address to listen on. Example: `127.0.0.1:8080` or `:8080` to listen
/// to all IPs.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub socket_address: String,
/// Data transport compression configuration to use for this service.
#[serde(default)]
pub compression: HttpCompressionConfig,
/// Advanced Http server configuration.
#[serde(default)]
pub advanced_http: HttpServerConfig,
/// Tls Configuration for this server.
/// If not set, the server will not use TLS.
///
/// Default: None
#[serde(default)]
pub tls: Option<TlsConfig>,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct ServerConfig {
/// Name of the server. This is used to help identify the service
/// for telemetry and logs.
///
/// Default: {index of server in config}
#[serde(default, deserialize_with = "convert_string_with_shellexpand")]
pub name: String,
/// Configuration
pub listener: ListenerConfig,
/// Services to attach to server.
pub services: Option<ServicesConfig>,
}
#[allow(non_camel_case_types)]
#[derive(Deserialize, Debug)]
pub enum WorkerProperty {
/// List of static values.
/// Note: Generally there should only ever be 1 value, but if the platform
/// property key is `PropertyType::Priority` it may have more than one value.
#[serde(deserialize_with = "convert_vec_string_with_shellexpand")]
values(Vec<String>),
/// A dynamic configuration. The string will be executed as a command
/// (not sell) and will be split by "\n" (new line character).
query_cmd(String),
}
/// Generic config for an endpoint and associated configs.
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct EndpointConfig {
/// URI of the endpoint.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub uri: String,
/// Timeout in seconds that a request should take.
/// Default: 5 (seconds)
pub timeout: Option<f32>,
/// The TLS configuration to use to connect to the endpoint.
pub tls_config: Option<ClientTlsConfig>,
}
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Deserialize, Debug, Default)]
pub enum UploadCacheResultsStrategy {
/// Only upload action results with an exit code of 0.
#[default]
success_only,
/// Don't upload any action results.
never,
/// Upload all action results that complete.
everything,
/// Only upload action results that fail.
failures_only,
}
#[allow(non_camel_case_types)]
#[derive(Clone, Deserialize, Debug)]
pub enum EnvironmentSource {
/// The name of the platform property in the action to get the value from.
property(String),
/// The raw value to set.
value(#[serde(deserialize_with = "convert_string_with_shellexpand")] String),
/// The max amount of time in milliseconds the command is allowed to run
/// (requested by the client).
timeout_millis,
/// A special file path will be provided that can be used to communicate
/// with the parent process about out-of-band information. This file
/// will be read after the command has finished executing. Based on the
/// contents of the file, the behavior of the result may be modified.
///
/// The format of the file contents should be json with the following
/// schema:
/// {
/// // If set the command will be considered a failure.
/// // May be one of the following static strings:
/// // "timeout": Will Consider this task to be a timeout.
/// "failure": "timeout",
/// }
///
/// All fields are optional, file does not need to be created and may be
/// empty.
side_channel_file,
/// A "root" directory for the action. This directory can be used to
/// store temporary files that are not needed after the action has
/// completed. This directory will be purged after the action has
/// completed.
///
/// For example:
/// If an action writes temporary data to a path but nativelink should
/// clean up this path after the job has executed, you may create any
/// directory under the path provided in this variable. A common pattern
/// would be to use `entrypoint` to set a shell script that reads this
/// variable, `mkdir $ENV_VAR_NAME/tmp` and `export TMPDIR=$ENV_VAR_NAME/tmp`.
/// Another example might be to bind-mount the `/tmp` path in a container to
/// this path in `entrypoint`.
action_directory,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct UploadActionResultConfig {
/// Underlying AC store that the worker will use to publish execution results
/// into. Objects placed in this store should be reachable from the
/// scheduler/client-cas after they have finished updating.
/// Default: {No uploading is done}
pub ac_store: Option<StoreRefName>,
/// In which situations should the results be published to the `ac_store`,
/// if set to `SuccessOnly` then only results with an exit code of 0 will be
/// uploaded, if set to Everything all completed results will be uploaded.
///
/// Default: `UploadCacheResultsStrategy::SuccessOnly`
#[serde(default)]
pub upload_ac_results_strategy: UploadCacheResultsStrategy,
/// Store to upload historical results to. This should be a CAS store if set.
///
/// Default: {CAS store of parent}
pub historical_results_store: Option<StoreRefName>,
/// In which situations should the results be published to the historical CAS.
/// The historical CAS is where failures are published. These messages conform
/// to the CAS key-value lookup format and are always a `HistoricalExecuteResponse`
/// serialized message.
///
/// Default: `UploadCacheResultsStrategy::FailuresOnly`
#[serde(default)]
pub upload_historical_results_strategy: Option<UploadCacheResultsStrategy>,
/// Template to use for the `ExecuteResponse.message` property. This message
/// is attached to the response before it is sent to the client. The following
/// special variables are supported:
/// - `digest_function`: Digest function used to calculate the action digest.
/// - `action_digest_hash`: Action digest hash.
/// - `action_digest_size`: Action digest size.
/// - `historical_results_hash`: `HistoricalExecuteResponse` digest hash.
/// - `historical_results_size`: `HistoricalExecuteResponse` digest size.
///
/// A common use case of this is to provide a link to the web page that
/// contains more useful information for the user.
///
/// An example that is fully compatible with `bb_browser` is:
/// <https://example.com/my-instance-name-here/blobs/{digest_function}/action/{action_digest_hash}-{action_digest_size}/>
///
/// Default: "" (no message)
#[serde(default, deserialize_with = "convert_string_with_shellexpand")]
pub success_message_template: String,
/// Same as `success_message_template` but for failure case.
///
/// An example that is fully compatible with `bb_browser` is:
/// <https://example.com/my-instance-name-here/blobs/{digest_function}/historical_execute_response/{historical_results_hash}-{historical_results_size}/>
///
/// Default: "" (no message)
#[serde(default, deserialize_with = "convert_string_with_shellexpand")]
pub failure_message_template: String,
}
#[derive(Deserialize, Debug, Default)]
#[serde(deny_unknown_fields)]
pub struct LocalWorkerConfig {
/// Name of the worker. This is give a more friendly name to a worker for logging
/// and metric publishing.
/// Default: {Index position in the workers list}
#[serde(default, deserialize_with = "convert_string_with_shellexpand")]
pub name: String,
/// Endpoint which the worker will connect to the scheduler's `WorkerApiService`.
pub worker_api_endpoint: EndpointConfig,
/// The maximum time an action is allowed to run. If a task requests for a timeout
/// longer than this time limit, the task will be rejected. Value in seconds.
///
/// Default: 1200 (seconds / 20 mins)
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
pub max_action_timeout: usize,
/// If timeout is handled in `entrypoint` or another wrapper script.
/// If set to true `NativeLink` will not honor the timeout the action requested
/// and instead will always force kill the action after `max_action_timeout`
/// has been reached. If this is set to false, the smaller value of the action's
/// timeout and `max_action_timeout` will be used to which `NativeLink` will kill
/// the action.
///
/// The real timeout can be received via an environment variable set in:
/// `EnvironmentSource::TimeoutMillis`.
///
/// Example on where this is useful: `entrypoint` launches the action inside
/// a docker container, but the docker container may need to be downloaded. Thus
/// the timer should not start until the docker container has started executing
/// the action. In this case, action will likely be wrapped in another program,
/// like `timeout` and propagate timeouts via `EnvironmentSource::SideChannelFile`.
///
/// Default: false (`NativeLink` fully handles timeouts)
#[serde(default)]
pub timeout_handled_externally: bool,
/// The command to execute on every execution request. This will be parsed as
/// a command + arguments (not shell).
/// Example: "run.sh" and a job with command: "sleep 5" will result in a
/// command like: "run.sh sleep 5".
/// Default: {Use the command from the job request}.
#[serde(default, deserialize_with = "convert_string_with_shellexpand")]
pub entrypoint: String,
/// An optional script to run before every action is processed on the worker.
/// The value should be the full path to the script to execute and will pause
/// all actions on the worker if it returns an exit code other than 0.
/// If not set, then the worker will never pause and will continue to accept
/// jobs according to the scheduler configuration.
/// This is useful, for example, if the worker should not take any more
/// actions until there is enough resource available on the machine to
/// handle them.
pub experimental_precondition_script: Option<String>,
/// Underlying CAS store that the worker will use to download CAS artifacts.
/// This store must be a `FastSlowStore`. The `fast` store must be a
/// `FileSystemStore` because it will use hardlinks when building out the files
/// instead of copying the files. The slow store must eventually resolve to the
/// same store the scheduler/client uses to send job requests.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub cas_fast_slow_store: StoreRefName,
/// Configuration for uploading action results.
#[serde(default)]
pub upload_action_result: UploadActionResultConfig,
/// The directory work jobs will be executed from. This directory will be fully
/// managed by the worker service and will be purged on startup.
/// This directory and the directory referenced in `local_filesystem_store_ref`'s
/// `stores::FilesystemStore::content_path` must be on the same filesystem.
/// Hardlinks will be used when placing files that are accessible to the jobs
/// that are sourced from `local_filesystem_store_ref`'s `content_path`.
#[serde(deserialize_with = "convert_string_with_shellexpand")]
pub work_directory: String,
/// Properties of this worker. This configuration will be sent to the scheduler
/// and used to tell the scheduler to restrict what should be executed on this
/// worker.
pub platform_properties: HashMap<String, WorkerProperty>,
/// An optional mapping of environment names to set for the execution
/// as well as those specified in the action itself. If set, will set each
/// key as an environment variable before executing the job with the value
/// of the environment variable being the value of the property of the
/// action being executed of that name or the fixed value.
pub additional_environment: Option<HashMap<String, EnvironmentSource>>,
}
#[allow(non_camel_case_types)]
#[derive(Deserialize, Debug)]
pub enum WorkerConfig {
/// A worker type that executes jobs locally on this machine.
local(LocalWorkerConfig),
}
#[derive(Deserialize, Debug, Clone, Copy)]
#[serde(deny_unknown_fields)]
pub struct GlobalConfig {
/// Maximum number of open files that can be opened at one time.
/// This value is not strictly enforced, it is a best effort. Some internal libraries
/// open files or read metadata from a files which do not obey this limit, however
/// the vast majority of cases will have this limit be honored.
/// As a rule of thumb this value should be less than half the value of `ulimit -n`.
/// Any network open file descriptors is not counted in this limit, but is counted
/// in the kernel limit. It is a good idea to set a very large `ulimit -n`.
/// Note: This value must be greater than 10.
///
/// Default: 512
#[serde(deserialize_with = "convert_numeric_with_shellexpand")]
pub max_open_files: usize,
/// If a file descriptor is idle for this many milliseconds, it will be closed.
/// In the event a client or store takes a long time to send or receive data
/// the file descriptor will be closed, and since `max_open_files` blocks new
/// `open_file` requests until a slot opens up, it will allow new requests to be
/// processed. If a read or write is attempted on a closed file descriptor, the
/// file will be reopened and the operation will continue.
///
/// On services where worker(s) and scheduler(s) live in the same process, this
/// also prevents deadlocks if a file->file copy is happening, but cannot open
/// a new file descriptor because the limit has been reached.
///
/// Default: 1000 (1 second)
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
pub idle_file_descriptor_timeout_millis: u64,
/// This flag can be used to prevent metrics from being collected at runtime.
/// Metrics are still able to be collected, but this flag prevents metrics that
/// are collected at runtime (performance metrics) from being tallied. The
/// overhead of collecting metrics is very low, so this flag should only be
/// used if there is a very good reason to disable metrics.
/// This flag can be forcibly set using the `NATIVELINK_DISABLE_METRICS` variable.
/// If the variable is set it will always disable metrics regardless of what
/// this flag is set to.
///
/// Default: <true (disabled) if no prometheus service enabled, false otherwise>
#[serde(default)]
pub disable_metrics: bool,
/// Default hash function to use while uploading blobs to the CAS when not set
/// by client.
///
/// Default: `ConfigDigestHashFunction::sha256`
pub default_digest_hash_function: Option<ConfigDigestHashFunction>,
/// Default digest size to use for health check when running
/// diagnostics checks. Health checks are expected to use this
/// size for filling a buffer that is used for creation of
/// digest.
///
/// Default: 1024*1024 (1MiB)
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
pub default_digest_size_health_check: usize,
}
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct CasConfig {
/// List of stores available to use in this config.
/// The keys can be used in other configs when needing to reference a store.
pub stores: HashMap<StoreRefName, StoreSpec>,
/// Worker configurations used to execute jobs.
pub workers: Option<Vec<WorkerConfig>>,
/// List of schedulers available to use in this config.
/// The keys can be used in other configs when needing to reference a
/// scheduler.
pub schedulers: Option<HashMap<SchedulerRefName, SchedulerSpec>>,
/// Servers to setup for this process.
pub servers: Vec<ServerConfig>,
/// Experimental - Origin events configuration. This is the service that will
/// collect and publish nativelink events to a store for processing by an
/// external service.
pub experimental_origin_events: Option<OriginEventsConfig>,
/// Any global configurations that apply to all modules live here.
pub global: Option<GlobalConfig>,
}