Merge branch 'main' into rename-turbocache

TraceMachina · Nov 17, 2023 · d9c5b8e · d9c5b8e
2 parents 771a3bf + db724c0
commit d9c5b8e
Show file tree

Hide file tree

Showing 112 changed files with 885 additions and 141 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Native Link
 
-[![CI](https://github.com/trace_machina/native-link/workflows/CI/badge.svg)](https://github.com/trace_machina/native-link/actions/workflows/main.yml)
+[![CI](https://github.com/tracemachina/native-link/workflows/CI/badge.svg)](https://github.com/tracemachina/native-link/actions/workflows/main.yml)
 
 An extremely fast and efficient bazel cache service (CAS) written in rust.
 
@@ -45,13 +45,13 @@ bazel test //... \
   --remote_executor=grpc://127.0.0.1:50051 \
   --remote_default_exec_properties=cpu_count=1
 ```
-This will cause bazel to run the commands through an all-in-one `CAS`, `scheduler` and `worker`. See [here](https://github.com/trace_machina/native-link/tree/master/config) for configuration documentation and [here](https://github.com/trace_machina/native-link/tree/main/deployment-examples/terraform) for an example of multi-node cloud deployment example.
+This will cause bazel to run the commands through an all-in-one `CAS`, `scheduler` and `worker`. See [here](https://github.com/tracemachina/native-link/tree/master/config) for configuration documentation and [here](https://github.com/tracemachina/native-link/tree/main/deployment-examples/terraform) for an example of multi-node cloud deployment example.
 
 ## Example Deployments
-We currently have a few example deployments in [deployment-examples directory](https://github.com/trace_machina/native-link/tree/master/deployment-examples).
+We currently have a few example deployments in [deployment-examples directory](https://github.com/tracemachina/native-link/tree/master/deployment-examples).
 
 ### Terraform
-The [terraform deployment](https://github.com/trace_machina/native-link/tree/master/deployment-examples/terraform) is the currently preferred method as it leverages a lot of cloud resources to make everything much more robust.
+The [terraform deployment](https://github.com/tracemachina/native-link/tree/master/deployment-examples/terraform) is the currently preferred method as it leverages a lot of cloud resources to make everything much more robust.
 
 The terraform deployment is very easy to setup and configure. This deployment will show off remote execution capabilities and cache capabilities.
 
@@ -111,7 +111,7 @@ cargo build --release
 
 ### Configure
 
-Configuration is done via a JSON file that is passed in as the first parameter to the `cas` program. See [here](https://github.com/trace_machina/native-link/tree/master/config) for more details and examples.
+Configuration is done via a JSON file that is passed in as the first parameter to the `cas` program. See [here](https://github.com/tracemachina/native-link/tree/master/config) for more details and examples.
 
 ## How to update internal or external rust deps
 

diff --git a/cas/grpc_service/cas_server.rs b/cas/grpc_service/cas_server.rs
@@ -169,7 +169,7 @@ impl CasServer {
                 let digest_copy = DigestInfo::try_from(digest.clone())?;
                 let size_bytes = usize::try_from(digest_copy.size_bytes)
                     .err_tip(|| "Digest size_bytes was not convertible to usize")?;
-                // TODO(trace_machina) There is a security risk here of someone taking all the memory on the instance.
+                // TODO(allada) There is a security risk here of someone taking all the memory on the instance.
                 let result = store_pin
                     .get_part_unchunked(digest_copy, 0, None, Some(size_bytes))
                     .await

diff --git a/cas/grpc_service/worker_api_server.rs b/cas/grpc_service/worker_api_server.rs
@@ -29,11 +29,8 @@ use common::DigestInfo;
 use config::cas_server::WorkerApiConfig;
 use error::{make_err, Code, Error, ResultExt};
 use platform_property_manager::PlatformProperties;
-<<<<<<< HEAD
-use proto::com::github::allada::native_link::remote_execution::{
-=======
-use proto::com::github::trace_machina::turbo_cache::remote_execution::{
->>>>>>> upstream/main
+
+use proto::com::github::trace_machina::native_link::remote_execution::{
     execute_result, worker_api_server::WorkerApi, worker_api_server::WorkerApiServer as Server, ExecuteResult,
     GoingAwayRequest, KeepAliveRequest, SupportedProperties, UpdateForWorker,
 };

diff --git a/cas/scheduler/action_messages.rs b/cas/scheduler/action_messages.rs
@@ -608,7 +608,7 @@ impl Default for ActionResult {
     }
 }
 
-// TODO(trace_machina) Remove the need for clippy argument by making the ActionResult and ProtoActionResult
+// TODO(allada) Remove the need for clippy argument by making the ActionResult and ProtoActionResult
 // a Box.
 /// The execution status/stage. This should match `ExecutionStage::Value` in `remote_execution.proto`.
 #[derive(PartialEq, Debug, Clone)]
@@ -620,7 +620,7 @@ pub enum ActionStage {
     CacheCheck,
     /// Action has been accepted and waiting for worker to take it.
     Queued,
-    // TODO(trace_machina) We need a way to know if the job was sent to a worker, but hasn't begun
+    // TODO(allada) We need a way to know if the job was sent to a worker, but hasn't begun
     // execution yet.
     /// Worker is executing the action.
     Executing,

diff --git a/cas/scheduler/platform_property_manager.rs b/cas/scheduler/platform_property_manager.rs
@@ -72,7 +72,7 @@ impl From<ProtoPlatform> for PlatformProperties {
 /// Priority - Means the worker is given this information, but does not restrict
 ///            what workers can take this value. However, the worker must have the
 ///            associated key present to be matched.
-///            TODO(trace_machina) In the future this will be used by the scheduler and
+///            TODO(allada) In the future this will be used by the scheduler and
 ///            worker to cause the scheduler to prefer certain workers over others,
 ///            but not restrict them based on these values.
 #[derive(Eq, PartialEq, Hash, Clone, Ord, PartialOrd, Debug)]

diff --git a/cas/store/fast_slow_store.rs b/cas/store/fast_slow_store.rs
@@ -86,7 +86,7 @@ impl FastSlowStore {
 
     /// Returns the range of bytes that should be sent given a slice bounds
     /// offset so the output range maps the received_range.start to 0.
-    // TODO(trace_machina) This should be put into utils, as this logic is used
+    // TODO(allada) This should be put into utils, as this logic is used
     // elsewhere in the code.
     pub fn calculate_range(received_range: &Range<usize>, send_range: &Range<usize>) -> Option<Range<usize>> {
         // Protect against subtraction overflow.

diff --git a/cas/store/filesystem_store.rs b/cas/store/filesystem_store.rs
@@ -43,7 +43,7 @@ const DEFAULT_BUFF_SIZE: usize = 32 * 1024;
 #[derive(Debug)]
 pub struct SharedContext {
     // Used in testing to know how many active drop() spawns are running.
-    // TODO(trace_machina) It is probably a good idea to use a spin lock during
+    // TODO(allada) It is probably a good idea to use a spin lock during
     // destruction of the store to ensure that all files are actually
     // deleted (similar to how it is done in tests).
     pub active_drop_spawns: AtomicU64,

diff --git a/cas/store/grpc_store.rs b/cas/store/grpc_store.rs
@@ -86,7 +86,7 @@ impl GrpcStore {
         error_if!(config.endpoints.is_empty(), "Expected at least 1 endpoint in GrpcStore");
         let mut endpoints = Vec::with_capacity(config.endpoints.len());
         for endpoint in &config.endpoints {
-            // TODO(trace_machina) This should be moved to be done in utils/serde_utils.rs like the others.
+            // TODO(allada) This should be moved to be done in utils/serde_utils.rs like the others.
             // We currently don't have a way to handle those helpers with vectors.
             let endpoint = shellexpand::env(&endpoint)
                 .map_err(|e| make_input_err!("{}", e))
@@ -318,7 +318,7 @@ impl GrpcStore {
                             }
                             return None;
                         }
-                        // TODO(trace_machina) I'm sure there's a way to do this without a mutex, but rust can be super
+                        // TODO(allada) I'm sure there's a way to do this without a mutex, but rust can be super
                         // picky with borrowing through a stream await.
                         *local_state.error.lock() = Some(maybe_message.unwrap_err());
                         None

diff --git a/cas/worker/local_worker.rs b/cas/worker/local_worker.rs
@@ -32,11 +32,7 @@ use config::cas_server::LocalWorkerConfig;
 use error::{make_err, make_input_err, Code, Error, ResultExt};
 use fast_slow_store::FastSlowStore;
 use metrics_utils::{AsyncCounterWrapper, Collector, CollectorState, CounterWithTime, MetricsComponent, Registry};
-<<<<<<< HEAD
 use proto::com::github::trace_machina::native_link::remote_execution::{
-=======
-use proto::com::github::trace_machina::turbo_cache::remote_execution::{
->>>>>>> upstream/main
     execute_result, update_for_worker::Update, worker_api_client::WorkerApiClient, ExecuteResult, KeepAliveRequest,
     UpdateForWorker,
 };
@@ -196,7 +192,7 @@ impl<'a, T: WorkerApiClientTrait, U: RunningActionsManager> LocalWorkerImpl<'a,
                                 "Got ConnectionResult in LocalWorker::run which should never happen"
                             ));
                         }
-                        // TODO(trace_machina) We should possibly do something with this notification.
+                        // TODO(allada) We should possibly do something with this notification.
                         Update::Disconnect(()) => {
                             self.metrics.disconnects_received.inc();
                         }

diff --git a/cas/worker/running_actions_manager.rs b/cas/worker/running_actions_manager.rs
@@ -500,7 +500,7 @@ struct RunningActionImplExecutionResult {
 
 struct RunningActionImplState {
     command_proto: Option<ProtoCommand>,
-    // TODO(trace_machina) Kill is not implemented yet, but is instrumented.
+    // TODO(allada) Kill is not implemented yet, but is instrumented.
     // However, it is used if the worker disconnects to destroy current jobs.
     kill_channel_tx: Option<oneshot::Sender<()>>,
     kill_channel_rx: Option<oneshot::Receiver<()>>,
@@ -800,10 +800,10 @@ impl RunningActionImpl {
                     // Defuse our guard so it does not try to cleanup and make nessless logs.
                     drop(ScopeGuard::<_, _>::into_inner(child_process_guard));
                     let exit_status = maybe_exit_status.err_tip(|| "Failed to collect exit code of process")?;
-                    // TODO(trace_machina) We should implement stderr/stdout streaming to client here.
+                    // TODO(allada) We should implement stderr/stdout streaming to client here.
                     // If we get killed before the stream is started, then these will lock up.
-                    // TODO(trace_machina) There is a significant bug here. If we kill the action and the action creates
-                    // child processes, it can create zombies. See: https://github.com/trace_machina/native-link/issues/225
+                    // TODO(allada) There is a significant bug here. If we kill the action and the action creates
+                    // child processes, it can create zombies. See: https://github.com/tracemachina/native-link/issues/225
                     let (stdout, stderr) = if killed_action {
                         drop(timer);
                         (Bytes::new(), Bytes::new())
@@ -1078,7 +1078,7 @@ impl RunningActionImpl {
                 stdout_digest,
                 stderr_digest,
                 execution_metadata,
-                server_logs: HashMap::default(), // TODO(trace_machina) Not implemented.
+                server_logs: HashMap::default(), // TODO(allada) Not implemented.
                 error: state.error.clone(),
                 message: String::new(), // Will be filled in on cache_action_result if needed.
             });
@@ -1284,7 +1284,7 @@ impl UploadActionResults {
         action_digest_info: DigestInfo,
         maybe_historical_digest_info: Option<DigestInfo>,
     ) -> Result<String, Error> {
-        // TODO(trace_machina) Currently only sha256 is supported, but soon will be dynamic.
+        // TODO(allada) Currently only sha256 is supported, but soon will be dynamic.
         template_str.replace("digest_function", digest_function::Value::Sha256.as_str_name());
         template_str.replace("action_digest_hash", action_digest_info.hash_str());
         template_str.replace("action_digest_size", action_digest_info.size_bytes);

diff --git a/cas/worker/worker_api_client_wrapper.rs b/cas/worker/worker_api_client_wrapper.rs
@@ -13,11 +13,7 @@
 // limitations under the License.
 
 use async_trait::async_trait;
-<<<<<<< HEAD
 use proto::com::github::trace_machina::native_link::remote_execution::{
-=======
-use proto::com::github::trace_machina::turbo_cache::remote_execution::{
->>>>>>> upstream/main
     worker_api_client::WorkerApiClient, ExecuteResult, GoingAwayRequest, KeepAliveRequest, SupportedProperties,
     UpdateForWorker,
 };

diff --git a/config/README.md b/config/README.md
@@ -2,13 +2,13 @@
 
 This service uses a JSON file as the configuration format. The JSON format is compatible with how protobuf's JSON format is structed.
 
-To view the available fields please refer to [stores.rs](https://github.com/trace_machina/native-link/blob/master/config/stores.rs) and [cas_server](https://github.com/trace_machina/native-link/blob/master/config/cas_server.rs).
+To view the available fields please refer to [stores.rs](https://github.com/tracemachina/native-link/blob/master/config/stores.rs) and [cas_server](https://github.com/tracemachina/native-link/blob/master/config/cas_server.rs).
 
 These two files should have enough documentation in them on what each field does and where each field goes.
 
 ## Examples
 
-The [examples directory](https://github.com/trace_machina/native-link/tree/master/config/examples) contains a few examples of configuration files.
+The [examples directory](https://github.com/tracemachina/native-link/tree/master/config/examples) contains a few examples of configuration files.
 
 A very basic configuration that is a pure in-memory store is:
 ```js

diff --git a/config/cas_server.rs b/config/cas_server.rs
@@ -53,7 +53,7 @@ pub struct CompressionConfig {
     /// responses to clients. Enabling this will likely save a lot of
     /// data transfer, but will consume a lot of CPU and add a lot of
     /// latency.
-    /// see: https://github.com/trace_machina/native-link/issues/109
+    /// see: https://github.com/tracemachina/native-link/issues/109
     ///
     /// Default: CompressionAlgorithm::None
     pub send_compression_algorithm: Option<CompressionAlgorithm>,
@@ -63,7 +63,7 @@ pub struct CompressionConfig {
     /// clients and the client will choose which compression algorithm to
     /// use. Enabling this will likely save a lot of data transfer, but
     /// will consume a lot of CPU and add a lot of latency.
-    /// see: https://github.com/trace_machina/native-link/issues/109
+    /// see: https://github.com/tracemachina/native-link/issues/109
     ///
     /// Defaults: <no supported compression>
     pub accepted_compression_algorithms: Vec<CompressionAlgorithm>,

diff --git a/config/schedulers.rs b/config/schedulers.rs
@@ -45,7 +45,7 @@ pub enum PropertyType {
 
     /// Does not restrict on this value and instead will be passed to the worker
     /// as an informational piece.
-    /// TODO(trace_machina) In the future this will be used by the scheduler and worker
+    /// TODO(allada) In the future this will be used by the scheduler and worker
     /// to cause the scheduler to prefer certain workers over others, but not
     /// restrict them based on these values.
     Priority,

diff --git a/deployment-examples/terraform/AWS/auto_scaling_groups.tf b/deployment-examples/terraform/AWS/auto_scaling_groups.tf
@@ -158,7 +158,7 @@ resource "aws_launch_template" "scheduler_launch_template" {
 
 resource "aws_autoscaling_group" "scheduler_autoscaling_group" {
   name                      = "native_link_scheduler_autoscaling_group"
-  # TODO(trace_machina) We currently only support 1 scheduler at a time. Trying to support
+  # TODO(allada) We currently only support 1 scheduler at a time. Trying to support
   # more than 1 scheduler at a time is undefined behavior and might end up with
   # very strange routing.
   max_size                  = 1

diff --git a/deployment-examples/terraform/AWS/load_balancers.tf b/deployment-examples/terraform/AWS/load_balancers.tf
@@ -53,7 +53,7 @@ resource "aws_lb_listener" "cas_load_balancer_listener" {
 resource "aws_lb" "scheduler_load_balancer" {
   name                   = "native-link-scheduler-lb"
   internal               = false
-  # TODO(trace_machina) This really should be a TCP based load balancer, but due to it being
+  # TODO(allada) This really should be a TCP based load balancer, but due to it being
   # GRPC and not supporting HTTP1.x causes the health checker to always fail.
   load_balancer_type     = "application"
   security_groups        = [aws_security_group.schedulers_load_balancer_sg.id]

diff --git a/deployment-examples/terraform/AWS/scripts/cas.json b/deployment-examples/terraform/AWS/scripts/cas.json
@@ -15,7 +15,7 @@
               }
             },
             "slow": {
-              // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+              // TODO(allada) This needs to be some kind of sharding store, because s3 has
               // a 5k requests/s per path limit. To get around this we just need to create
               // enough shards and point them to the same bucket/path but with some key
               // to distinguish each shard based on the key.
@@ -49,7 +49,7 @@
                   }
                 },
                 "slow": {
-                  // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+                  // TODO(allada) This needs to be some kind of sharding store, because s3 has
                   // a 5k requests/s per path limit. To get around this we just need to create
                   // enough shards and point them to the same bucket/path but with some key
                   // to distinguish each shard based on the key.
@@ -85,7 +85,7 @@
                       }
                     },
                     "slow": {
-                      // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+                      // TODO(allada) This needs to be some kind of sharding store, because s3 has
                       // a 5k requests/s per path limit. To get around this we just need to create
                       // enough shards and point them to the same bucket/path but with some key
                       // to distinguish each shard based on the key.

diff --git a/deployment-examples/terraform/AWS/scripts/scheduler.json b/deployment-examples/terraform/AWS/scripts/scheduler.json
@@ -15,7 +15,7 @@
               }
             },
             "slow": {
-              // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+              // TODO(allada) This needs to be some kind of sharding store, because s3 has
               // a 5k requests/s per path limit. To get around this we just need to create
               // enough shards and point them to the same bucket/path but with some key
               // to distinguish each shard based on the key.
@@ -47,7 +47,7 @@
               }
             },
             "slow": {
-              // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+              // TODO(allada) This needs to be some kind of sharding store, because s3 has
               // a 5k requests/s per path limit. To get around this we just need to create
               // enough shards and point them to the same bucket/path but with some key
               // to distinguish each shard based on the key.
@@ -83,7 +83,7 @@
                   }
                 },
                 "slow": {
-                  // TODO(trace_machina) This needs to be some kind of sharding store, because s3 has
+                  // TODO(allada) This needs to be some kind of sharding store, because s3 has
                   // a 5k requests/s per path limit. To get around this we just need to create
                   // enough shards and point them to the same bucket/path but with some key
                   // to distinguish each shard based on the key.