Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tools/ci/canary-runner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ publish = false
[dependencies]
anyhow = "1"
aws-config = "0.3"
aws-sdk-cloudwatch = "0.3"
aws-sdk-lambda = "0.3"
aws-sdk-s3 = "0.3"
base64 = "0.13"
Expand Down
67 changes: 62 additions & 5 deletions tools/ci/canary-runner/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
// when running this locally.

use anyhow::{bail, Context, Result};
use aws_sdk_cloudwatch as cloudwatch;
use aws_sdk_lambda as lambda;
use aws_sdk_s3 as s3;
use cloudwatch::model::StandardUnit;
use s3::ByteStream;
use std::path::PathBuf;
use std::time::Duration;
use std::time::{Duration, SystemTime};
use std::{env, path::Path};
use structopt::StructOpt;
use tokio::process::Command;
Expand Down Expand Up @@ -44,6 +46,60 @@ pub struct RunOpt {
}

pub async fn run(opt: RunOpt) -> Result<()> {
let start_time = SystemTime::now();
let config = aws_config::load_from_env().await;
let result = run_canary(opt, &config).await;

let mut metrics = vec![
(
"canary-success",
if result.is_ok() { 1.0 } else { 0.0 },
StandardUnit::Count,
),
(
"canary-failure",
if result.is_ok() { 0.0 } else { 1.0 },
StandardUnit::Count,
),
(
"canary-total-time",
start_time.elapsed().expect("time in range").as_secs_f64(),
StandardUnit::Seconds,
),
];
if let Ok(invoke_time) = result {
metrics.push((
"canary-invoke-time",
invoke_time.as_secs_f64(),
StandardUnit::Seconds,
));
}

let cloudwatch_client = cloudwatch::Client::new(&config);
let mut request_builder = cloudwatch_client
.put_metric_data()
.namespace("aws-sdk-rust-canary");
for metric in metrics {
request_builder = request_builder.metric_data(
cloudwatch::model::MetricDatum::builder()
.metric_name(metric.0)
.value(metric.1)
.timestamp(SystemTime::now().into())
.unit(metric.2)
.build(),
);
}

info!("Emitting metrics...");
request_builder
.send()
.await
.context("failed to emit metrics")?;
Comment on lines +94 to +97
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there some sort of fallback where this can fail even if we somehow bricked the cloudwatch client?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought was that once we have the canary running on a regular cadence, we can set up an alarm that goes off if it hasn't logged a metric in the last 25 or so hours.


result.map(|_| ())
}

async fn run_canary(opt: RunOpt, config: &aws_config::Config) -> Result<Duration> {
let repo_root = git_root().await?;
env::set_current_dir(repo_root.join("smithy-rs/tools/ci-cdk/canary-lambda"))
.context("failed to change working directory")?;
Expand All @@ -56,9 +112,8 @@ pub async fn run(opt: RunOpt) -> Result<()> {
let bundle_file_name = bundle_path.file_name().unwrap().to_str().unwrap();
let bundle_name = bundle_path.file_stem().unwrap().to_str().unwrap();

let config = aws_config::load_from_env().await;
let s3_client = s3::Client::new(&config);
let lambda_client = lambda::Client::new(&config);
let s3_client = s3::Client::new(config);
let lambda_client = lambda::Client::new(config);

info!("Uploading Lambda code bundle to S3...");
upload_bundle(
Expand All @@ -84,12 +139,14 @@ pub async fn run(opt: RunOpt) -> Result<()> {
.await?;

info!("Invoking the canary Lambda...");
let invoke_start_time = SystemTime::now();
let invoke_result = invoke_lambda(lambda_client.clone(), bundle_name).await;
let invoke_time = invoke_start_time.elapsed().expect("time in range");

info!("Deleting the canary Lambda...");
delete_lambda_fn(lambda_client, bundle_name).await?;

invoke_result
invoke_result.map(|_| invoke_time)
}

async fn generate_cargo_toml(sdk_version: &str) -> Result<()> {
Expand Down