Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add disk metrics endpoint #1348

Merged
merged 21 commits into from
Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 81 additions & 8 deletions nexus/src/app/oximeter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::authz;
use crate::context::OpContext;
use crate::db;
use crate::db::identity::Asset;
use crate::external_api::params::ResourceMetrics;
use crate::external_api::params::ResourceMetricsPagination;
use crate::internal_api::params::OximeterInfo;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
Expand All @@ -16,6 +18,8 @@ use omicron_common::api::external::PaginationOrder;
use omicron_common::api::internal::nexus;
use omicron_common::backoff;
use oximeter_client::Client as OximeterClient;
use oximeter_db::query::Timestamp;
use oximeter_db::Measurement;
use oximeter_db::TimeseriesSchema;
use oximeter_db::TimeseriesSchemaPaginationParams;
use oximeter_producer::register;
Expand Down Expand Up @@ -162,14 +166,74 @@ impl super::Nexus {
self.timeseries_client
.timeseries_schema_list(&pag_params.page, limit)
.await
.map_err(|e| match e {
oximeter_db::Error::DatabaseUnavailable(_) => {
Error::ServiceUnavailable {
internal_message: e.to_string(),
}
}
_ => Error::InternalError { internal_message: e.to_string() },
})
.map_err(map_oximeter_err)
}

pub async fn select_timeseries(
&self,
timeseries_name: &str,
criteria: &[&str],
interval: Duration,
smklein marked this conversation as resolved.
Show resolved Hide resolved
query_params: ResourceMetricsPagination,
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
limit: NonZeroU32,
) -> Result<dropshot::ResultsPage<Measurement>, Error> {
#[inline]
fn no_results() -> dropshot::ResultsPage<Measurement> {
dropshot::ResultsPage { next_page: None, items: Vec::new() }
}

let query = match query_params.page {
dropshot::WhichPage::First(query) => query,
dropshot::WhichPage::Next(query) => query,
};
let start_time = query.start_time;
if start_time >= query.end_time {
return Ok(no_results());
}
let max_timeframe = chrono::Duration::from_std(interval * limit.get())
.map_err(|e| Error::internal_error(&e.to_string()))?;
let end_time = query.end_time.min(start_time + max_timeframe);

let timeseries_list = self
.timeseries_client
.select_timeseries_with(
timeseries_name,
criteria,
Some(Timestamp::Inclusive(start_time)),
Some(Timestamp::Exclusive(end_time)),
)
.await
.map_err(map_oximeter_err)?;

if timeseries_list.len() > 1 {
return Err(Error::internal_error(&format!(
"expected 1 timeseries but got {} ({:?} {:?})",
timeseries_list.len(),
timeseries_name,
criteria
)));
}

Ok(if let Some(timeseries) = timeseries_list.into_iter().next() {
let next_start_time = end_time;

dropshot::ResultsPage {
next_page: if next_start_time >= query.end_time {
None
} else {
Some(base64::encode_config(
serde_json::to_string(&ResourceMetrics {
start_time: next_start_time,
end_time: query.end_time,
})?,
base64::URL_SAFE,
))
},
items: timeseries.measurements,
}
} else {
no_results()
})
}

// Internal helper to build an Oximeter client from its ID and address (common data between
Expand Down Expand Up @@ -209,3 +273,12 @@ impl super::Nexus {
Ok((self.build_oximeter_client(&id, address), id))
}
}

fn map_oximeter_err(error: oximeter_db::Error) -> Error {
match error {
oximeter_db::Error::DatabaseUnavailable(_) => {
Error::ServiceUnavailable { internal_message: error.to_string() }
}
_ => Error::InternalError { internal_message: error.to_string() },
}
}
79 changes: 76 additions & 3 deletions nexus/src/external_api/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ use super::views::IpPoolRange;
use super::{
console_api, device_auth, params, views,
views::{
GlobalImage, IdentityProvider, Image, Organization, Project, Rack,
Role, Silo, Sled, Snapshot, SshKey, User, UserBuiltin, Vpc, VpcRouter,
VpcSubnet,
GlobalImage, IdentityProvider, Image, Measurement, Organization,
Project, Rack, Role, Silo, Sled, Snapshot, SshKey, User, UserBuiltin,
Vpc, VpcRouter, VpcSubnet,
},
};
use crate::authz;
Expand Down Expand Up @@ -67,11 +67,13 @@ use omicron_common::api::external::VpcFirewallRules;
use omicron_common::{
api::external::http_pagination::data_page_params_for, bail_unless,
};
use parse_display::Display;
use ref_cast::RefCast;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use std::sync::Arc;
use std::time::Duration;
use uuid::Uuid;

type NexusApiDescription = ApiDescription<Arc<ServerContext>>;
Expand Down Expand Up @@ -123,6 +125,7 @@ pub fn external_api() -> NexusApiDescription {
api.register(project_disks_post)?;
api.register(project_disks_get_disk)?;
api.register(project_disks_delete_disk)?;
api.register(project_disks_get_metrics)?;

api.register(project_instances_get)?;
api.register(project_instances_post)?;
Expand Down Expand Up @@ -1420,6 +1423,67 @@ async fn project_disks_delete_disk(
apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

#[derive(Display, Deserialize, JsonSchema)]
#[display(style = "snake_case")]
#[serde(rename_all = "snake_case")]
pub enum DiskMetricName {
smklein marked this conversation as resolved.
Show resolved Hide resolved
Activated,
Flush,
Read,
ReadBytes,
Write,
WriteBytes,
}

/// Fetch metrics for a disk.
#[endpoint {
method = GET,
path = "/organizations/{organization_name}/projects/{project_name}/disks/{disk_name}/metrics/{metric_name}",
tags = ["disks"],
}]
async fn project_disks_get_metrics(
rqctx: Arc<RequestContext<Arc<ServerContext>>>,
path_params: Path<MetricsPathParam<DiskPathParam, DiskMetricName>>,
query_params: Query<params::ResourceMetricsPagination>,
) -> Result<HttpResponseOk<ResultsPage<Measurement>>, HttpError> {
let apictx = rqctx.context();
let nexus = &apictx.nexus;

let path = path_params.into_inner();
let organization_name = &path.inner.organization_name;
let project_name = &path.inner.project_name;
let disk_name = &path.inner.disk_name;
let metric_name = path.metric_name;

let query = query_params.into_inner();
let limit = rqctx.page_limit(&query)?;

let handler = async {
let opctx = OpContext::for_external_api(&rqctx).await?;

// this ensures the user is authorized on Action::Read for this disk
nexus
.disk_fetch(&opctx, organization_name, project_name, disk_name)
.await?;

// FIXME fill this in with the code that gets the upstairs UUID from the disk UUID
let upstairs_uuid = uuid::uuid!("4fe353bf-c3a6-421f-a51d-c833091637fa");

Ok(HttpResponseOk(
nexus
.select_timeseries(
&format!("crucible_upstairs:{}", metric_name),
&[&format!("upstairs_uuid=={}", upstairs_uuid)],
Duration::from_secs(10),
query,
limit,
)
.await?,
))
};
apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

// Instances

/// List instances in a project.
Expand Down Expand Up @@ -3753,6 +3817,15 @@ async fn sshkeys_delete_key(
apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

/// Path parameters for metrics requests where `/metrics/{metric_name}` is
/// appended to an existing path parameter type
#[derive(Deserialize, JsonSchema)]
struct MetricsPathParam<T, M> {
#[serde(flatten)]
inner: T,
metric_name: M,
}

#[cfg(test)]
mod test {
use super::external_api;
Expand Down
14 changes: 14 additions & 0 deletions nexus/src/external_api/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! Params define the request bodies of API endpoints for creating or updating resources.

use crate::external_api::shared;
use chrono::{DateTime, Utc};
use dropshot::PaginationParams;
use omicron_common::api::external::{
ByteCount, IdentityMetadataCreateParams, IdentityMetadataUpdateParams,
InstanceCpuCount, Ipv4Net, Ipv6Net, Name,
Expand Down Expand Up @@ -779,6 +781,18 @@ pub struct SshKeyCreate {
pub public_key: String,
}

// METRICS

/// Query parameters common to resource metrics endpoints.
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct ResourceMetrics {
smklein marked this conversation as resolved.
Show resolved Hide resolved
pub start_time: DateTime<Utc>,
pub end_time: DateTime<Utc>,
}

pub type ResourceMetricsPagination =
PaginationParams<ResourceMetrics, ResourceMetrics>;

#[cfg(test)]
mod test {
use super::*;
Expand Down
2 changes: 2 additions & 0 deletions nexus/src/external_api/views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use serde::{Deserialize, Serialize};
use std::net::SocketAddrV6;
use uuid::Uuid;

pub use oximeter_db::Measurement;

// IDENTITY METADATA

/// Identity-related metadata that's included in "asset" public API objects
Expand Down
16 changes: 16 additions & 0 deletions nexus/tests/integration_tests/endpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//! THERE ARE NO TESTS IN THIS FILE.

use crate::integration_tests::unauthorized::HTTP_SERVER;
use chrono::Utc;
use http::method::Method;
use lazy_static::lazy_static;
use nexus_test_utils::RACK_UUID;
Expand Down Expand Up @@ -177,6 +178,13 @@ lazy_static! {
disk_source: params::DiskSource::Blank { block_size: params::BlockSize::try_from(4096).unwrap() },
size: ByteCount::from_gibibytes_u32(16),
};
pub static ref DEMO_DISK_METRICS_URL: String =
format!(
"{}/metrics/activated?start_time={:?}&end_time={:?}",
*DEMO_DISK_URL,
Utc::now(),
Utc::now(),
);

// Instance used for testing
pub static ref DEMO_INSTANCE_NAME: Name = "demo-instance".parse().unwrap();
Expand Down Expand Up @@ -829,6 +837,14 @@ lazy_static! {
],
},

VerifyEndpoint {
url: &*DEMO_DISK_METRICS_URL,
visibility: Visibility::Protected,
allowed_methods: vec![
AllowedMethod::Get,
],
},

VerifyEndpoint {
url: &*DEMO_INSTANCE_DISKS_URL,
visibility: Visibility::Protected,
Expand Down
1 change: 1 addition & 0 deletions nexus/tests/output/nexus_tags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ OPERATION ID URL PATH
project_disks_delete_disk /organizations/{organization_name}/projects/{project_name}/disks/{disk_name}
project_disks_get /organizations/{organization_name}/projects/{project_name}/disks
project_disks_get_disk /organizations/{organization_name}/projects/{project_name}/disks/{disk_name}
project_disks_get_metrics /organizations/{organization_name}/projects/{project_name}/disks/{disk_name}/metrics/{metric_name}
project_disks_post /organizations/{organization_name}/projects/{project_name}/disks

API operations found with tag "firewall"
Expand Down
Loading