diff --git a/Cargo.lock b/Cargo.lock index 40bd736257..c5c941e101 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5951,6 +5951,7 @@ name = "nexus-external-api" version = "0.1.0" dependencies = [ "anyhow", + "chrono", "dropshot 0.13.0", "http", "hyper", diff --git a/common/src/api/external/http_pagination.rs b/common/src/api/external/http_pagination.rs index 65237f73c6..3c5b265e3f 100644 --- a/common/src/api/external/http_pagination.rs +++ b/common/src/api/external/http_pagination.rs @@ -45,6 +45,8 @@ use crate::api::external::Name; use crate::api::external::NameOrId; use crate::api::external::ObjectIdentity; use crate::api::external::PaginationOrder; +use chrono::DateTime; +use chrono::Utc; use dropshot::HttpError; use dropshot::PaginationParams; use dropshot::RequestContext; @@ -409,6 +411,55 @@ impl< } } +// TODO: timestamp is not unique. does that mean we need to paginate by (timestamp, id)? + +/// Query parameters for pagination by timestamp +pub type PaginatedByTimestamp = PaginationParams< + ScanByTimestamp, + PageSelectorByTimestamp, +>; +/// Page selector for pagination by name only +pub type PageSelectorByTimestamp = + PageSelector, DateTime>; +/// Scan parameters for resources that support scanning by name only +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct ScanByTimestamp { + #[serde(default = "default_ts_sort_mode")] + sort_by: TimestampSortMode, + + #[serde(flatten)] + pub selector: Selector, +} +/// Supported set of sort modes for scanning by timestamp only +/// +/// Currently, we only support scanning in ascending order. +#[derive(Copy, Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum TimestampSortMode { + /// sort in increasing order of "name" + Ascending, +} + +fn default_ts_sort_mode() -> TimestampSortMode { + TimestampSortMode::Ascending +} + +impl< + T: Clone + Debug + DeserializeOwned + JsonSchema + PartialEq + Serialize, + > ScanParams for ScanByTimestamp +{ + type MarkerValue = DateTime; + fn direction(&self) -> PaginationOrder { + PaginationOrder::Ascending + } + fn from_query(p: &PaginatedByTimestamp) -> Result<&Self, HttpError> { + Ok(match p.page { + WhichPage::First(ref scan_params) => scan_params, + WhichPage::Next(PageSelector { ref scan, .. }) => scan, + }) + } +} + #[cfg(test)] mod test { use super::data_page_params_with_limit; diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 4c8f032fcb..874eef3a08 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -954,6 +954,7 @@ pub enum ResourceType { AddressLot, AddressLotBlock, AllowList, + AuditLogEntry, BackgroundTask, BgpConfig, BgpAnnounceSet, diff --git a/nexus/auth/src/authz/api_resources.rs b/nexus/auth/src/authz/api_resources.rs index bfde3d3b97..cce825a4df 100644 --- a/nexus/auth/src/authz/api_resources.rs +++ b/nexus/auth/src/authz/api_resources.rs @@ -407,8 +407,66 @@ impl AuthorizedResource for IpPoolList { roleset: &'fut mut RoleSet, ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on the IpPoolList, only permissions. But we still - // need to load the Fleet-related roles to verify that the actor has the - // "admin" role on the Fleet (possibly conferred from a Silo role). + // need to load the Fleet-related roles to verify that the actor's role + // on the Fleet (possibly conferred from a Silo role). + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() + } + + fn on_unauthorized( + &self, + _: &Authz, + error: Error, + _: AnyActor, + _: Action, + ) -> Error { + error + } + + fn polar_class(&self) -> oso::Class { + Self::get_polar_class() + } +} + +// Similar to IpPoolList, the audit log is a collection that doesn't exist in +// the database as an entity distinct from its children (IP pools, or in this +// case, audit log entries). We need a dummy resource here because we need +// something to hang permissions off of. We need to be able to create audit log +// children (entries) for login attempts, when there is no authenticated user, +// as well as for normal requests with an authenticated user. For retrieval, we +// want (to start out) to allow only fleet viewers to list children. + +#[derive(Clone, Copy, Debug)] +pub struct AuditLog; + +/// Singleton representing the [`AuditLog`] for authz purposes +pub const AUDIT_LOG: AuditLog = AuditLog; + +impl Eq for AuditLog {} + +impl PartialEq for AuditLog { + fn eq(&self, _: &Self) -> bool { + true + } +} + +impl oso::PolarClass for AuditLog { + fn get_polar_class_builder() -> oso::ClassBuilder { + oso::Class::builder() + .with_equality_check() + .add_attribute_getter("fleet", |_: &AuditLog| FLEET) + } +} + +impl AuthorizedResource for AuditLog { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { + // There are no roles on the AuditLog, only permissions. But we still + // need to load the Fleet-related roles to verify that the actor's role + // on the Fleet (possibly conferred from a Silo role). load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } diff --git a/nexus/auth/src/authz/omicron.polar b/nexus/auth/src/authz/omicron.polar index f9382401fd..7a5d0e0d0e 100644 --- a/nexus/auth/src/authz/omicron.polar +++ b/nexus/auth/src/authz/omicron.polar @@ -417,6 +417,25 @@ has_relation(fleet: Fleet, "parent_fleet", ip_pool_list: IpPoolList) has_permission(actor: AuthenticatedActor, "create_child", ip_pool: IpPool) if silo in actor.silo and silo.fleet = ip_pool.fleet; +# Describes the policy for reading and writing the audit log +resource AuditLog { + permissions = [ + "list_children", # retrieve audit log + "create_child", # create audit log entry + ]; + + relations = { parent_fleet: Fleet }; + + # Fleet viewers can read the audit log + "list_children" if "viewer" on "parent_fleet"; +} +# TODO: is this right? any op context should be able to write to the audit log? +# feels weird though +has_permission(_actor: AuthenticatedActor, "create_child", _audit_log: AuditLog); + +has_relation(fleet: Fleet, "parent_fleet", audit_log: AuditLog) + if audit_log.fleet = fleet; + # Describes the policy for creating and managing web console sessions. resource ConsoleSessionList { permissions = [ "create_child" ]; diff --git a/nexus/auth/src/authz/oso_generic.rs b/nexus/auth/src/authz/oso_generic.rs index acd74b2167..dff6ba859c 100644 --- a/nexus/auth/src/authz/oso_generic.rs +++ b/nexus/auth/src/authz/oso_generic.rs @@ -101,6 +101,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { let classes = [ // Hand-written classes Action::get_polar_class(), + AuditLog::get_polar_class(), AnyActor::get_polar_class(), AuthenticatedActor::get_polar_class(), BlueprintConfig::get_polar_class(), diff --git a/nexus/db-model/src/audit_log.rs b/nexus/db-model/src/audit_log.rs new file mode 100644 index 0000000000..8ae2e84e84 --- /dev/null +++ b/nexus/db-model/src/audit_log.rs @@ -0,0 +1,128 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/5.0/. + +// Copyright 2025 Oxide Computer Company + +use crate::schema::audit_log; +use chrono::{DateTime, TimeDelta, Utc}; +use diesel::prelude::*; +use nexus_types::external_api::views; +use uuid::Uuid; + +#[derive(Queryable, Insertable, Selectable, Clone, Debug)] +#[diesel(table_name = audit_log)] +pub struct AuditLogEntry { + pub id: Uuid, + pub timestamp: DateTime, + pub request_id: String, + // TODO: this isn't in the RFD but it seems nice to have + pub request_uri: String, + pub operation_id: String, + pub source_ip: String, + pub resource_type: String, + + // TODO: we probably want a dedicated enum for these columns and for that + // we need a fancier set of columns. For example, we may want to initialize + // the row with a _potential_ actor (probably a different field), like the + // username or whatever is being used for login. This should probably be + // preserved even after authentication determines an actual actor ID. See + // the Actor struct in nexus/auth/src/authn/mod.ts + + // these are optional because of requests like login attempts, where there + // is no actor until after the operation. + pub actor_id: Option, + pub actor_silo_id: Option, + + /// The specific action that was attempted (create, delete, update, etc) + pub action: String, // TODO: enum type? + + // TODO: we will need to add headers in the client to get this info + // How the actor authenticated (api_key, console, etc) + // pub access_method: String, + + // TODO: RFD 523 says: "Additionally, the response (or error) data should be + // included in the same log entry as the original request data. Separating + // the response from the request into two different log entries is extremely + // expensive for customers to identify which requests correspond to which + // responses." I guess the typical thing is to include a duration of the + // request rather than a second timestamp. + + // Seems like it has to be optional because at the beginning of the + // operation, we have not yet resolved the resource selector to an ID + pub resource_id: Option, + + // Fields that are optional because they get filled in after the action completes + /// Time in milliseconds between receiving request and responding + pub duration: Option, + + // Error information if the action failed + pub error_code: Option, + pub error_message: Option, + // TODO: including a real response complicates things + // Response data on success (if applicable) + // pub success_response: Option, +} + +impl AuditLogEntry { + pub fn new( + request_id: String, + operation_id: String, + request_uri: String, + actor_id: Option, + actor_silo_id: Option, + ) -> Self { + Self { + id: Uuid::new_v4(), + timestamp: Utc::now(), + request_id, + request_uri, + operation_id, + actor_id, + actor_silo_id, + + // TODO: actually get all these values + source_ip: String::new(), + resource_type: String::new(), + action: String::new(), + + // fields that can only be filled in after the operation + resource_id: None, + duration: None, + error_code: None, + error_message: None, + } + } +} + +// TODO: Add a struct representing only the fields set at log entry init time, +// use as an arg to the datastore init function to make misuse harder + +// TODO: AuditLogActor +// pub enum AuditLogActor { +// UserBuiltin { user_builtin_id: Uuid }, +// TODO: include info about computed roles at runtime? +// SiloUser { silo_user_id: Uuid, silo_id: Uuid }, +// Unauthenticated, +// } + +impl From for views::AuditLogEntry { + fn from(entry: AuditLogEntry) -> Self { + Self { + id: entry.id, + timestamp: entry.timestamp, + request_id: entry.request_id, + request_uri: entry.request_uri, + operation_id: entry.operation_id, + source_ip: entry.source_ip, + resource_type: entry.resource_type, + resource_id: entry.resource_id, + actor_id: entry.actor_id, + actor_silo_id: entry.actor_silo_id, + action: entry.action, + duration_ms: entry.duration.map(|d| d.num_milliseconds()), + error_code: entry.error_code, + error_message: entry.error_message, + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index b6aea15c89..a2d7e188de 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -11,6 +11,7 @@ extern crate newtype_derive; mod address_lot; mod allow_list; +mod audit_log; mod bfd; mod bgp; mod block_size; @@ -130,6 +131,7 @@ pub use self::macaddr::*; pub use self::unsigned::*; pub use address_lot::*; pub use allow_list::*; +pub use audit_log::*; pub use bfd::*; pub use bgp::*; pub use block_size::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index a8e1141db6..cd9cd3bb64 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -2124,3 +2124,22 @@ table! { region_snapshot_snapshot_id -> Nullable, } } + +table! { + audit_log (id) { + id -> Uuid, + timestamp -> Timestamptz, + request_id -> Text, + request_uri -> Text, + operation_id -> Text, + source_ip -> Text, + resource_type -> Text, + actor_id -> Nullable, + actor_silo_id -> Nullable, + action -> Text, + resource_id -> Nullable, + duration -> Nullable, + error_code -> Nullable, + error_message -> Nullable + } +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 4542283aac..d4795b3cb0 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(118, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(119, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(119, "audit-log"), KnownVersion::new(118, "support-bundles"), KnownVersion::new(117, "add-completing-and-new-region-volume"), KnownVersion::new(116, "bp-physical-disk-disposition"), diff --git a/nexus/db-queries/src/db/datastore/audit_log.rs b/nexus/db-queries/src/db/datastore/audit_log.rs new file mode 100644 index 0000000000..73e22625bb --- /dev/null +++ b/nexus/db-queries/src/db/datastore/audit_log.rs @@ -0,0 +1,244 @@ +use super::DataStore; +use crate::authz; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::model::AuditLogEntry; +use crate::db::pagination::paginated; +use crate::{context::OpContext, db::error::ErrorHandler}; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::prelude::*; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::ResourceType; +use omicron_common::api::external::{CreateResult, UpdateResult}; + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +impl DataStore { + pub async fn audit_log_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, DateTime>, + start_time: DateTime, + end_time: Option>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::AUDIT_LOG).await?; + + use db::schema::audit_log; + // TODO: this is sorting by timestamp, but in order to get stable sort + // with duplicate timestamps, we need to also sort by ID + let query = + paginated(audit_log::table, audit_log::timestamp, pagparams) + .filter(audit_log::timestamp.ge(start_time)); + // TODO: confirm and document exclusive/inclusive behavior + let query = if let Some(end) = end_time { + query.filter(audit_log::timestamp.lt(end)) + } else { + query + }; + query + .select(AuditLogEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn audit_log_entry_init( + &self, + opctx: &OpContext, + entry: AuditLogEntry, + ) -> CreateResult { + use db::schema::audit_log; + opctx.authorize(authz::Action::CreateChild, &authz::AUDIT_LOG).await?; + + let entry_id = entry.id.to_string(); + + diesel::insert_into(audit_log::table) + .values(entry) + .returning(AuditLogEntry::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::AuditLogEntry, + &entry_id, + ), + ) + }) + } + + // set duration and result on an existing entry + pub async fn audit_log_entry_complete( + /* id, duration, result */ + &self, + _opctx: &OpContext, + ) -> UpdateResult<()> { + // TODO: obviously + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::pub_test_utils::TestDatabase; + use assert_matches::assert_matches; + use omicron_common::api::external::Error; + use omicron_test_utils::dev; + use std::num::NonZeroU32; + + #[tokio::test] + async fn test_audit_log_basic() { + let logctx = dev::test_setup_log("test_audit_log"); + let log = &logctx.log; + let db = TestDatabase::new_with_datastore(log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let pagparams = DataPageParams { + marker: None, + limit: NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + let t0: DateTime = Utc::now(); + let t_future: DateTime = "2099-01-01T00:00:00Z".parse().unwrap(); + + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t0, None) + .await + .expect("retrieve empty audit log"); + assert_eq!(audit_log.len(), 0); + + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t_future, None) + .await + .expect("retrieve empty audit log"); + assert_eq!(audit_log.len(), 0); + + let entry1 = AuditLogEntry::new( + "req-1".to_string(), + "project_create".to_string(), + "https://omicron.com/projects".to_string(), + None, + None, + ); + datastore + .audit_log_entry_init(opctx, entry1.clone()) + .await + .expect("init audit log entry"); + + // inserting the same entry again blows up + let conflict = datastore + .audit_log_entry_init(opctx, entry1) + .await + .expect_err("inserting same entry again should error"); + assert_matches!(conflict, Error::ObjectAlreadyExists { .. }); + + let t1 = Utc::now(); + + let entry2 = AuditLogEntry::new( + "req-2".to_string(), + "project_delete".to_string(), + "https://omicron.com/projects/123".to_string(), + None, + None, + ); + datastore + .audit_log_entry_init(opctx, entry2.clone()) + .await + .expect("init second audit log entry"); + + // get both entries + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t0, None) + .await + .expect("retrieve audit log"); + assert_eq!(audit_log.len(), 2); + assert_eq!(audit_log[0].request_id, "req-1"); + assert_eq!(audit_log[1].request_id, "req-2"); + + // Only get first entry + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t0, Some(t1)) + .await + .expect("retrieve first audit log entry"); + assert_eq!(audit_log.len(), 1); + assert_eq!(audit_log[0].request_id, "req-1"); + + // Only get second entry + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t1, None) + .await + .expect("retrieve second audit log entry"); + assert_eq!(audit_log.len(), 1); + assert_eq!(audit_log[0].request_id, "req-2"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_audit_log_non_unique_timestamps() { + let logctx = + dev::test_setup_log("test_audit_log_non_unique_timestamps"); + let log = &logctx.log; + let db = TestDatabase::new_with_datastore(log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let t0 = Utc::now(); + + let base = AuditLogEntry::new( + "req-1".to_string(), + "project_create".to_string(), + "https://omicron.com/projects".to_string(), + None, + None, + ); + + let entry1 = AuditLogEntry { + id: "d0d59e4f-4c98-4df5-b3c5-39fc0c2ac547".parse().unwrap(), + ..base.clone() + }; + datastore + .audit_log_entry_init(opctx, entry1.clone()) + .await + .expect("init entry1"); + + // same timestamp, different ID and req ID. the point is that the ID is + // before the other one, so this one should sort first + let entry2 = AuditLogEntry { + id: "a156ad37-047e-4028-88bd-8034906d5a27".parse().unwrap(), + request_id: "req-2".to_string(), + ..base + }; + datastore + .audit_log_entry_init(opctx, entry2.clone()) + .await + .expect("init entry1"); + + let pagparams = DataPageParams { + marker: None, + limit: NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + + // retrieve both and check the order -- the one with the lower ID + // should always be first + let audit_log = datastore + .audit_log_list(opctx, &pagparams, t0, None) + .await + .expect("retrieve audit log"); + assert_eq!(audit_log.len(), 2); + dbg!(audit_log); + // TODO: uncomment these once the sorting is sorted + // assert_eq!(audit_log[0].request_id, "req-1"); + // assert_eq!(audit_log[1].request_id, "req-2"); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 3b19677fe8..a752d5f9d3 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -50,6 +50,7 @@ use std::sync::Arc; mod address_lot; mod allow_list; +mod audit_log; mod auth; mod bfd; mod bgp; diff --git a/nexus/external-api/Cargo.toml b/nexus/external-api/Cargo.toml index 0875e1f574..d702d1f049 100644 --- a/nexus/external-api/Cargo.toml +++ b/nexus/external-api/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] anyhow.workspace = true +chrono.workspace = true dropshot.workspace = true http.workspace = true hyper.workspace = true diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 4fc92b18d8..fab7bddafc 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -135,6 +135,10 @@ snapshot_delete DELETE /v1/snapshots/{snapshot} snapshot_list GET /v1/snapshots snapshot_view GET /v1/snapshots/{snapshot} +API operations found with tag "system/audit-log" +OPERATION ID METHOD URL PATH +audit_log_list GET /v1/system/audit-log + API operations found with tag "system/hardware" OPERATION ID METHOD URL PATH networking_switch_port_apply_settings POST /v1/system/hardware/switch-port/{port}/settings diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index 54ba3ab34b..f75be67ce4 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -1,6 +1,7 @@ use std::collections::BTreeMap; use anyhow::anyhow; +use chrono::{DateTime, Utc}; use dropshot::Body; use dropshot::{ EmptyScanParams, EndpointTagPolicy, HttpError, HttpResponseAccepted, @@ -11,6 +12,7 @@ use dropshot::{ WebsocketChannelResult, WebsocketConnection, }; use http::Response; +use http_pagination::{PageSelector, PaginatedByTimestamp}; use ipnetwork::IpNetwork; use nexus_types::{ authn::cookies::Cookies, @@ -151,6 +153,12 @@ pub const API_VERSION: &str = "20241204.0.0"; url = "http://docs.oxide.computer/api/vpcs" } }, + "system/audit-log" = { + description = "These endpoints relate to audit logs.", + external_docs = { + url = "http://docs.oxide.computer/api/system-audit-log" + } + }, "system/probes" = { description = "Probes for testing network connectivity", external_docs = { @@ -2932,6 +2940,19 @@ pub trait NexusExternalApi { path_params: Path, ) -> Result; + // Audit logging + + /// View audit log + #[endpoint { + method = GET, + path = "/v1/system/audit-log", + tags = ["system/audit-log"], + }] + async fn audit_log_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + // Console API: logins /// SAML login console page (just a link to the IdP) @@ -3299,3 +3320,8 @@ pub type IpPoolRangePaginationParams = /// Type used to paginate request to list timeseries schema pub type TimeseriesSchemaPaginationParams = PaginationParams; + +pub type AuditLogPaginationParams = PaginationParams< + params::AuditLog, + PageSelector>, +>; diff --git a/nexus/src/app/audit_log.rs b/nexus/src/app/audit_log.rs new file mode 100644 index 0000000000..dd5ab82f9a --- /dev/null +++ b/nexus/src/app/audit_log.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use chrono::{DateTime, Utc}; +use dropshot::RequestContext; +use nexus_db_model::AuditLogEntry; +use nexus_db_queries::context::OpContext; +use omicron_common::api::external::{ + CreateResult, DataPageParams, ListResultVec, UpdateResult, +}; + +use crate::context::ApiContext; + +impl super::Nexus { + // Currently this pulls from CRDB only, but the idea is that we are + // only storing recent entries in CRDB and moving the data in batches + // to clickhouse in a job. In that case we would need to look at both + // clickhouse and CRDB. We could potentially skip the CRDB part if we're + // confident the range excludes CRDB data, but it's probably simpler to + // always check CRDB. + + pub(crate) async fn audit_log_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, DateTime>, + start_time: DateTime, + end_time: Option>, + ) -> ListResultVec { + self.db_datastore + .audit_log_list(opctx, pagparams, start_time, end_time) + .await + } + + pub(crate) async fn audit_log_entry_init( + &self, + opctx: &OpContext, + // TODO: not sure we want the app layer to be aware of RequestContext. + // might be better to extract the relevant fields at the call site. still + // would want a helper to avoid duplication + rqctx: &RequestContext, + ) -> CreateResult { + let actor = opctx.authn.actor(); + let entry = AuditLogEntry::new( + rqctx.request_id.clone(), + rqctx.operation_id.clone(), + rqctx.request.uri().to_string(), + actor.map(|a| a.actor_id()), + actor.map(|a| a.silo_id()).flatten(), + ); + self.db_datastore.audit_log_entry_init(opctx, entry).await + } + + // set duration and result on an existing entry + pub(crate) async fn audit_log_entry_complete( + /* id, duration, result */ + &self, + opctx: &OpContext, + ) -> UpdateResult<()> { + self.db_datastore.audit_log_entry_complete(opctx).await + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 435ca2a56d..ff5f1ce860 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -46,6 +46,7 @@ use uuid::Uuid; // by resource. mod address_lot; mod allow_list; +mod audit_log; pub(crate) mod background; mod bfd; mod bgp; diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index fd22c22d0e..cffcc5c9e3 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -278,6 +278,7 @@ pub(crate) async fn login_saml( let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; + let path_params = path_params.into_inner(); // By definition, this request is not authenticated. These operations @@ -285,6 +286,13 @@ pub(crate) async fn login_saml( // keep specifically for this purpose. let opctx = nexus.opctx_external_authn(); + let _ = nexus.audit_log_entry_init(opctx, &rqctx).await; + + // TODO: where are we supposed to put this? any ? will exit the function + // early! external_latences.instrument_dropshot_handler looks very + // appealing.... + let _ = nexus.audit_log_entry_complete(opctx).await; + let (authz_silo, db_silo, identity_provider) = nexus .datastore() .identity_provider_lookup( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index cfc9f99851..99e175b1a8 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -48,8 +48,6 @@ use nexus_types::{ shared::{BfdStatus, ProbeInfo}, }, }; -use omicron_common::api::external::http_pagination::data_page_params_for; -use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -60,6 +58,12 @@ use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanByName; use omicron_common::api::external::http_pagination::ScanByNameOrId; use omicron_common::api::external::http_pagination::ScanParams; +use omicron_common::api::external::http_pagination::{ + data_page_params_for, PaginatedByTimestamp, +}; +use omicron_common::api::external::http_pagination::{ + marker_for_name, ScanByTimestamp, +}; use omicron_common::api::external::AddressLot; use omicron_common::api::external::AddressLotBlock; use omicron_common::api::external::AddressLotCreateResponse; @@ -851,8 +855,13 @@ impl NexusExternalApi for NexusExternalApiImpl { let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + + nexus.audit_log_entry_init(&opctx, &rqctx).await?; + let project = nexus.project_create(&opctx, &new_project.into_inner()).await?; + + let _ = nexus.audit_log_entry_complete(&opctx).await?; Ok(HttpResponseCreated(project.into())) }; apictx @@ -6352,6 +6361,46 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + async fn audit_log_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let scan_params = ScanByTimestamp::from_query(&query)?; + let pag_params = data_page_params_for(&rqctx, &query)?; + + let log_entries = nexus + .audit_log_list( + &opctx, + &pag_params, + scan_params.selector.start_time, + scan_params.selector.end_time, + ) + .await? + .into_iter() + .map(|entry| entry.into()) + .collect::>(); + Ok(HttpResponseOk(ScanByTimestamp::results_page( + &query, + log_entries, + &|_, entry: &views::AuditLogEntry| entry.timestamp, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn login_saml_begin( rqctx: RequestContext, path_params: Path, diff --git a/nexus/tests/integration_tests/audit_log.rs b/nexus/tests/integration_tests/audit_log.rs new file mode 100644 index 0000000000..74720437c0 --- /dev/null +++ b/nexus/tests/integration_tests/audit_log.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use chrono::{DateTime, Utc}; +use dropshot::{test_util::ClientTestContext, ResultsPage}; +use nexus_test_utils::resource_helpers::{ + create_project, objects_list_page_authz, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +fn to_q(d: DateTime) -> String { + d.to_rfc3339_opts(chrono::SecondsFormat::Micros, true) +} + +async fn fetch_log( + client: &ClientTestContext, + start: DateTime, + end: Option>, +) -> ResultsPage { + let mut qs = vec![format!("start_time={}", to_q(start))]; + if let Some(end) = end { + qs.push(format!("end_time={}", to_q(end))); + } + let url = format!("/v1/system/audit-log?{}", qs.join("&")); + objects_list_page_authz::(client, &url).await +} + +#[nexus_test] +async fn test_audit_log_list(ctx: &ControlPlaneTestContext) { + let client = &ctx.external_client; + + let t0: DateTime = "2024-01-01T00:00:00Z".parse().unwrap(); + // let t_future: DateTime = "2099-01-01T00:00:00Z".parse().unwrap(); + + let audit_log = fetch_log(client, t0, None).await; + assert_eq!(audit_log.items.len(), 0); + + let t1 = Utc::now(); // before log entry + + // this endpoint has audit log calls in it + create_project(client, "test-proj").await; + + let t2 = Utc::now(); // after log entry + + let audit_log = fetch_log(client, t0, None).await; + assert_eq!(audit_log.items.len(), 1); + + // this endpoint has audit log calls in it + create_project(client, "test-proj2").await; + + // let t3 = Utc::now(); // after second entry + + let audit_log = fetch_log(client, t1, None).await; + assert_eq!(audit_log.items.len(), 2); + + // we can exclude the entry by timestamp + let audit_log = fetch_log(client, t2, Some(t2)).await; + assert_eq!(audit_log.items.len(), 0); + + let audit_log = fetch_log(client, t2, None).await; + assert_eq!(audit_log.items.len(), 1); +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index dfcea79607..c3233d1aaf 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -5,6 +5,7 @@ mod address_lots; mod allow_list; +mod audit_log; mod authn_http; mod authz; mod basic; diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 4e616e698f..7b4a75fcc1 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -2287,3 +2287,12 @@ pub struct DeviceAccessTokenRequest { pub device_code: String, pub client_id: Uuid, } + +// Audit log has its own pagination scheme because it paginates by timestamp. +#[derive(Deserialize, JsonSchema, Serialize, PartialEq, Debug, Clone)] +pub struct AuditLog { + /// Required, inclusive + pub start_time: DateTime, + /// Exclusive + pub end_time: Option>, +} diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 94b2279906..c779c559bb 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -1027,3 +1027,43 @@ pub struct OxqlQueryResult { /// Tables resulting from the query, each containing timeseries. pub tables: Vec, } + +/// Audit log entry +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct AuditLogEntry { + /// Unique identifier for the audit log entry + pub id: Uuid, + + /// When the request was received + pub timestamp: DateTime, + + /// Request ID for tracing requests through the system + pub request_id: String, + /// Full URL of the request + pub request_uri: String, + /// API endpoint ID + pub operation_id: String, + /// IP address that made the request + pub source_ip: String, + /// Resource type + pub resource_type: String, + + /// User ID of the actor who performed the action + pub actor_id: Option, + pub actor_silo_id: Option, + + /// The specific action that was attempted (create, delete, update, etc) + pub action: String, + + // Fields that are optional because they get filled in after the action completes + /// Resource identifier + pub resource_id: Option, + + // if this is a bigger number than JS can handle, something has gone badly wrong + /// Time in milliseconds between receiving request and responding + pub duration_ms: Option, + + /// Error information if the action failed + pub error_code: Option, + pub error_message: Option, +} diff --git a/openapi/nexus.json b/openapi/nexus.json index bc043059dd..e88b296064 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -5036,6 +5036,86 @@ } } }, + "/v1/system/audit-log": { + "get": { + "tags": [ + "system/audit-log" + ], + "summary": "View audit log", + "operationId": "audit_log_list", + "parameters": [ + { + "in": "query", + "name": "end_time", + "description": "Exclusive", + "schema": { + "nullable": true, + "type": "string", + "format": "date-time" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/TimestampSortMode" + } + }, + { + "in": "query", + "name": "start_time", + "description": "Required, inclusive", + "schema": { + "type": "string", + "format": "date-time" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AuditLogEntryResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "start_time" + ] + } + } + }, "/v1/system/hardware/disks": { "get": { "tags": [ @@ -11082,6 +11162,115 @@ } ] }, + "AuditLogActor": { + "type": "object", + "properties": { + "user_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "user_id" + ] + }, + "AuditLogEntry": { + "description": "Audit log entry", + "type": "object", + "properties": { + "action": { + "description": "The specific action that was attempted (create, delete, update, etc)", + "type": "string" + }, + "actor": { + "description": "Username and ID of the actor who performed the action", + "allOf": [ + { + "$ref": "#/components/schemas/AuditLogActor" + } + ] + }, + "duration_ms": { + "nullable": true, + "description": "Time in milliseconds between receiving request and responding", + "type": "integer", + "format": "int64" + }, + "error_code": { + "nullable": true, + "description": "Error information if the action failed", + "type": "string" + }, + "error_message": { + "nullable": true, + "type": "string" + }, + "id": { + "description": "Unique identifier for the audit log entry", + "type": "string", + "format": "uuid" + }, + "request_id": { + "description": "Request ID for tracing requests through the system", + "type": "string", + "format": "uuid" + }, + "request_url": { + "description": "Full URL of the request", + "type": "string" + }, + "resource_id": { + "description": "Resource identifier", + "type": "string", + "format": "uuid" + }, + "resource_type": { + "description": "Resource type", + "type": "string" + }, + "source_ip": { + "description": "IP address that made the request, accounting for X-Forwarded-For", + "type": "string" + }, + "timestamp": { + "description": "When the request was received", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "action", + "actor", + "id", + "request_id", + "request_url", + "resource_id", + "resource_type", + "source_ip", + "timestamp" + ] + }, + "AuditLogEntryResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/AuditLogEntry" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "AuthzScope": { "description": "Authorization scope for a timeseries.\n\nThis describes the level at which a user must be authorized to read data from a timeseries. For example, fleet-scoping means the data is only visible to an operator or fleet reader. Project-scoped, on the other hand, indicates that a user will see data limited to the projects on which they have read permissions.", "oneOf": [ @@ -22956,6 +23145,18 @@ "ram_provisioned" ] }, + "TimestampSortMode": { + "description": "Supported set of sort modes for scanning by timestamp only\n\nCurrently, we only support scanning in ascending order.", + "oneOf": [ + { + "description": "sort in increasing order of \"name\"", + "type": "string", + "enum": [ + "ascending" + ] + } + ] + }, "NameSortMode": { "description": "Supported set of sort modes for scanning by name only\n\nCurrently, we only support scanning in ascending order.", "oneOf": [ @@ -23074,6 +23275,13 @@ "url": "http://docs.oxide.computer/api/snapshots" } }, + { + "name": "system/audit-log", + "description": "These endpoints relate to audit logs.", + "externalDocs": { + "url": "http://docs.oxide.computer/api/system-audit-log" + } + }, { "name": "system/hardware", "description": "These operations pertain to hardware inventory and management. Racks are the unit of expansion of an Oxide deployment. Racks are in turn composed of sleds, switches, power supplies, and a cabled backplane.", diff --git a/schema/crdb/audit-log/up01.sql b/schema/crdb/audit-log/up01.sql new file mode 100644 index 0000000000..c3d21a7061 --- /dev/null +++ b/schema/crdb/audit-log/up01.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS audit_log ( + id UUID NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + request_id STRING NOT NULL, + request_uri STRING NOT NULL, + operation_id STRING NOT NULL, + source_ip STRING NOT NULL, + resource_type STRING NOT NULL, + actor_id UUID NOT NULL, + -- actor_silo_id UUID NOT NULL, + action STRING NOT NULL, + -- response fields + resource_id UUID, + duration INTERVAL, + error_code STRING, + error_message STRING, + -- this stuff avoids table scans when filtering and sorting by timestamp + -- sequential field must go after the random field + -- https://www.cockroachlabs.com/docs/v22.1/performance-best-practices-overview#use-multi-column-primary-keys + -- https://www.cockroachlabs.com/docs/v22.1/hash-sharded-indexes#create-a-table-with-a-hash-sharded-secondary-index + PRIMARY KEY (id, timestamp), + INDEX (timestamp) USING HASH +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ce6764bd17..fda57310ed 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4746,6 +4746,33 @@ CREATE UNIQUE INDEX IF NOT EXISTS one_record_per_volume_resource_usage on omicro region_snapshot_snapshot_id ); +CREATE TABLE IF NOT EXISTS audit_log ( + id UUID NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + -- TODO: sizes on all strings + request_id STRING NOT NULL, + request_uri STRING NOT NULL, + operation_id STRING NOT NULL, + source_ip STRING NOT NULL, + resource_type STRING NOT NULL, + + actor_id UUID, + actor_silo_id UUID, + + action STRING NOT NULL, + -- fields we can only fill in after the operation + resource_id UUID, + duration INTERVAL, + error_code STRING, + error_message STRING, + -- this stuff avoids table scans when filtering and sorting by timestamp + -- sequential field must go after the random field + -- https://www.cockroachlabs.com/docs/v22.1/performance-best-practices-overview#use-multi-column-primary-keys + -- https://www.cockroachlabs.com/docs/v22.1/hash-sharded-indexes#create-a-table-with-a-hash-sharded-secondary-index + PRIMARY KEY (id, timestamp), + INDEX (timestamp) USING HASH +); + /* * Keep this at the end of file so that the database does not contain a version * until it is fully populated. @@ -4757,7 +4784,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '118.0.0', NULL) + (TRUE, NOW(), NOW(), '119.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;