Skip to content

Commit

Permalink
pageserver: simpler, stricter config error handling (#8177)
Browse files Browse the repository at this point in the history
## Problem

Tenant attachment has error paths for failures to write local
configuration, but these types of local storage I/O errors should be
considered fatal for the process. Related thread on an earlier PR that
touched this code:
#7947 (comment)

## Summary of changes

- Make errors writing tenant config fatal (abort process)
- When reading tenant config, make all I/O errors except ENOENT fatal
- Replace use of bare anyhow errors with `LoadConfigError`
  • Loading branch information
jcsp authored and VladLazar committed Jul 8, 2024
1 parent eefe278 commit d602666
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 145 deletions.
4 changes: 2 additions & 2 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ impl From<UpsertLocationError> for ApiError {
BadRequest(e) => ApiError::BadRequest(e),
Unavailable(_) => ApiError::ShuttingDown,
e @ InProgress => ApiError::Conflict(format!("{e}")),
Flush(e) | Other(e) => ApiError::InternalServerError(e),
Flush(e) | InternalError(e) => ApiError::InternalServerError(e),
}
}
}
Expand Down Expand Up @@ -1296,7 +1296,7 @@ async fn update_tenant_config_handler(

crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
.await
.map_err(ApiError::InternalServerError)?;
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
tenant.set_new_tenant_config(new_tenant_conf);

json_response(StatusCode::OK, ())
Expand Down
78 changes: 42 additions & 36 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,15 @@ impl From<PageReconstructError> for GcError {
}
}

#[derive(thiserror::Error, Debug)]
pub(crate) enum LoadConfigError {
#[error("TOML deserialization error: '{0}'")]
DeserializeToml(#[from] toml_edit::de::Error),

#[error("Config not found at {0}")]
NotFound(Utf8PathBuf),
}

impl Tenant {
/// Yet another helper for timeline initialization.
///
Expand Down Expand Up @@ -2563,44 +2572,43 @@ impl Tenant {
pub(super) fn load_tenant_config(
conf: &'static PageServerConf,
tenant_shard_id: &TenantShardId,
) -> anyhow::Result<LocationConf> {
) -> Result<LocationConf, LoadConfigError> {
let config_path = conf.tenant_location_config_path(tenant_shard_id);

if config_path.exists() {
// New-style config takes precedence
let deserialized = Self::read_config(&config_path)?;
Ok(toml_edit::de::from_document::<LocationConf>(deserialized)?)
} else {
// The config should almost always exist for a tenant directory:
// - When attaching a tenant, the config is the first thing we write
// - When detaching a tenant, we atomically move the directory to a tmp location
// before deleting contents.
//
// The very rare edge case that can result in a missing config is if we crash during attach
// between creating directory and writing config. Callers should handle that as if the
// directory didn't exist.
anyhow::bail!("tenant config not found in {}", config_path);
}
}

fn read_config(path: &Utf8Path) -> anyhow::Result<toml_edit::Document> {
info!("loading tenant configuration from {path}");
info!("loading tenant configuration from {config_path}");

// load and parse file
let config = fs::read_to_string(path)
.with_context(|| format!("Failed to load config from path '{path}'"))?;
let config = fs::read_to_string(&config_path).map_err(|e| {
match e.kind() {
std::io::ErrorKind::NotFound => {
// The config should almost always exist for a tenant directory:
// - When attaching a tenant, the config is the first thing we write
// - When detaching a tenant, we atomically move the directory to a tmp location
// before deleting contents.
//
// The very rare edge case that can result in a missing config is if we crash during attach
// between creating directory and writing config. Callers should handle that as if the
// directory didn't exist.

LoadConfigError::NotFound(config_path)
}
_ => {
// No IO errors except NotFound are acceptable here: other kinds of error indicate local storage or permissions issues
// that we cannot cleanly recover
crate::virtual_file::on_fatal_io_error(&e, "Reading tenant config file")
}
}
})?;

config
.parse::<toml_edit::Document>()
.with_context(|| format!("Failed to parse config from file '{path}' as toml file"))
Ok(toml_edit::de::from_str::<LocationConf>(&config)?)
}

#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
pub(super) async fn persist_tenant_config(
conf: &'static PageServerConf,
tenant_shard_id: &TenantShardId,
location_conf: &LocationConf,
) -> anyhow::Result<()> {
) -> std::io::Result<()> {
let config_path = conf.tenant_location_config_path(tenant_shard_id);

Self::persist_tenant_config_at(tenant_shard_id, &config_path, location_conf).await
Expand All @@ -2611,7 +2619,7 @@ impl Tenant {
tenant_shard_id: &TenantShardId,
config_path: &Utf8Path,
location_conf: &LocationConf,
) -> anyhow::Result<()> {
) -> std::io::Result<()> {
debug!("persisting tenantconf to {config_path}");

let mut conf_content = r#"# This file contains a specific per-tenant's config.
Expand All @@ -2620,22 +2628,20 @@ impl Tenant {
.to_string();

fail::fail_point!("tenant-config-before-write", |_| {
anyhow::bail!("tenant-config-before-write");
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"tenant-config-before-write",
))
});

// Convert the config to a toml file.
conf_content += &toml_edit::ser::to_string_pretty(&location_conf)?;
conf_content +=
&toml_edit::ser::to_string_pretty(&location_conf).expect("Config serialization failed");

let temp_path = path_with_suffix_extension(config_path, TEMP_FILE_SUFFIX);

let tenant_shard_id = *tenant_shard_id;
let config_path = config_path.to_owned();
let conf_content = conf_content.into_bytes();
VirtualFile::crashsafe_overwrite(config_path.clone(), temp_path, conf_content)
.await
.with_context(|| format!("write tenant {tenant_shard_id} config to {config_path}"))?;

Ok(())
VirtualFile::crashsafe_overwrite(config_path.to_owned(), temp_path, conf_content).await
}

//
Expand Down
Loading

0 comments on commit d602666

Please sign in to comment.