Skip to content

Commit

Permalink
fix: avoid busy loop on replacement failure (#3613)
Browse files Browse the repository at this point in the history
Add an AtomicBool per RemoteLayer, use it to mark together with closed
semaphore that remotelayer is unusable until restart or ignore+load.

#3533 (comment)
  • Loading branch information
koivunej authored Feb 17, 2023
1 parent ae3eff1 commit 8e6b27b
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 5 deletions.
6 changes: 6 additions & 0 deletions pageserver/src/tenant/layer_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ where
expected: &Arc<L>,
new: Arc<L>,
) -> anyhow::Result<Replacement<Arc<L>>> {
fail::fail_point!("layermap-replace-notfound", |_| Ok(
// this is not what happens if an L0 layer was not found a anyhow error but perhaps
// that should be changed. this is good enough to show a replacement failure.
Replacement::NotFound
));

self.layer_map.replace_historic_noflush(expected, new)
}

Expand Down
13 changes: 13 additions & 0 deletions pageserver/src/tenant/storage_layer/remote_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ pub struct RemoteLayer {
access_stats: LayerAccessStats,

pub(crate) ongoing_download: Arc<tokio::sync::Semaphore>,

/// Has `LayerMap::replace` failed for this (true) or not (false).
///
/// Used together with [`ongoing_download`] semaphore in `Timeline::download_remote_layer`.
/// The field is used to mark a RemoteLayer permanently (until restart or ignore+load)
/// unprocessable, because a LayerMap::replace failed.
///
/// It is very unlikely to accumulate these in the Timeline's LayerMap, but having this avoids
/// a possible fast loop between `Timeline::get_reconstruct_data` and
/// `Timeline::download_remote_layer`, which also logs.
pub(crate) download_replacement_failure: std::sync::atomic::AtomicBool,
}

impl std::fmt::Debug for RemoteLayer {
Expand Down Expand Up @@ -207,6 +218,7 @@ impl RemoteLayer {
file_name: fname.to_owned().into(),
layer_metadata: layer_metadata.clone(),
ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
download_replacement_failure: std::sync::atomic::AtomicBool::default(),
access_stats,
}
}
Expand All @@ -228,6 +240,7 @@ impl RemoteLayer {
file_name: fname.to_owned().into(),
layer_metadata: layer_metadata.clone(),
ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
download_replacement_failure: std::sync::atomic::AtomicBool::default(),
access_stats,
}
}
Expand Down
43 changes: 38 additions & 5 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3624,14 +3624,26 @@ impl Timeline {
&self,
remote_layer: Arc<RemoteLayer>,
) -> anyhow::Result<()> {
use std::sync::atomic::Ordering::Relaxed;

let permit = match Arc::clone(&remote_layer.ongoing_download)
.acquire_owned()
.await
{
Ok(permit) => permit,
Err(_closed) => {
info!("download of layer has already finished");
return Ok(());
if remote_layer.download_replacement_failure.load(Relaxed) {
// this path will be hit often, in case there are upper retries. however
// hitting this error will prevent a busy loop between get_reconstruct_data and
// download, so an error is prefered.
//
// TODO: we really should poison the timeline, but panicking is not yet
// supported. Related: https://github.com/neondatabase/neon/issues/3621
anyhow::bail!("an earlier download succeeded but LayerMap::replace failed")
} else {
info!("download of layer has already finished");
return Ok(());
}
}
};

Expand Down Expand Up @@ -3667,15 +3679,16 @@ impl Timeline {
{
use crate::tenant::layer_map::Replacement;
let l: Arc<dyn PersistentLayer> = remote_layer.clone();
match updates.replace_historic(&l, new_layer) {
Ok(Replacement::Replaced { .. }) => { /* expected */ }
let failure = match updates.replace_historic(&l, new_layer) {
Ok(Replacement::Replaced { .. }) => false,
Ok(Replacement::NotFound) => {
// TODO: the downloaded file should probably be removed, otherwise
// it will be added to the layermap on next load? we should
// probably restart any get_reconstruct_data search as well.
//
// See: https://github.com/neondatabase/neon/issues/3533
error!("replacing downloaded layer into layermap failed because layer was not found");
true
}
Ok(Replacement::RemovalBuffered) => {
unreachable!("current implementation does not remove anything")
Expand All @@ -3694,12 +3707,32 @@ impl Timeline {
?other,
"replacing downloaded layer into layermap failed because another layer was found instead of expected"
);
true
}
Err(e) => {
// this is a precondition failure, the layer filename derived
// attributes didn't match up, which doesn't seem likely.
error!("replacing downloaded layer into layermap failed: {e:#?}")
error!("replacing downloaded layer into layermap failed: {e:#?}");
true
}
};

if failure {
// mark the remote layer permanently failed; the timeline is most
// likely unusable after this. sadly we cannot just poison the layermap
// lock with panic, because that would create an issue with shutdown.
//
// this does not change the retry semantics on failed downloads.
//
// use of Relaxed is valid because closing of the semaphore gives
// happens-before and wakes up any waiters; we write this value before
// and any waiters (or would be waiters) will load it after closing
// semaphore.
//
// See: https://github.com/neondatabase/neon/issues/3533
remote_layer
.download_replacement_failure
.store(true, Relaxed);
}
}
updates.flush();
Expand Down
2 changes: 2 additions & 0 deletions test_runner/fixtures/neon_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,7 @@ def timeline_detail(
timeline_id: TimelineId,
include_non_incremental_logical_size: bool = False,
include_timeline_dir_layer_file_size_sum: bool = False,
**kwargs,
) -> Dict[Any, Any]:
params = {}
if include_non_incremental_logical_size:
Expand All @@ -1298,6 +1299,7 @@ def timeline_detail(
res = self.get(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
params=params,
**kwargs,
)
self.verbose_error(res)
res_json = res.json()
Expand Down
62 changes: 62 additions & 0 deletions test_runner/regress/test_ondemand_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PageserverApiException,
RemoteStorageKind,
assert_tenant_status,
available_remote_storages,
Expand All @@ -18,6 +19,7 @@
wait_for_sk_commit_lsn_to_reach_remote_storage,
wait_for_upload,
wait_until,
wait_until_tenant_state,
)
from fixtures.types import Lsn
from fixtures.utils import query_scalar
Expand Down Expand Up @@ -623,3 +625,63 @@ def test_compaction_downloads_on_demand_with_image_creation(

def stringify(conf: Dict[str, Any]) -> Dict[str, str]:
return dict(map(lambda x: (x[0], str(x[1])), conf.items()))


@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
def test_ondemand_download_failure_to_replace(
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
):
"""
Make sure that we fail on being unable to replace a RemoteLayer instead of for example livelocking.
See: https://github.com/neondatabase/neon/issues/3533
"""

neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_ondemand_download_failure_to_replace",
)

# disable gc and compaction via default tenant config because config is lost while detaching
# so that compaction will not be the one to download the layer but the http handler is
neon_env_builder.pageserver_config_override = (
"""tenant_config={gc_period = "0s", compaction_period = "0s"}"""
)

env = neon_env_builder.init_start()

tenant_id, timeline_id = env.neon_cli.create_tenant()

env.initial_tenant = tenant_id
pageserver_http = env.pageserver.http_client()

lsn = Lsn(pageserver_http.timeline_detail(tenant_id, timeline_id)["last_record_lsn"])

wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn)

# remove layers so that they will be redownloaded
pageserver_http.tenant_detach(tenant_id)
pageserver_http.tenant_attach(tenant_id)

wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)
pageserver_http.configure_failpoints(("layermap-replace-notfound", "return"))

# requesting details with non-incremental size should trigger a download of the only layer
# this will need to be adjusted if an index for logical sizes is ever implemented
with pytest.raises(PageserverApiException):
# error message is not useful
pageserver_http.timeline_detail(tenant_id, timeline_id, True, timeout=2)

actual_message = (
".* ERROR .*replacing downloaded layer into layermap failed because layer was not found"
)
assert env.pageserver.log_contains(actual_message) is not None
env.pageserver.allowed_errors.append(actual_message)

env.pageserver.allowed_errors.append(
".* ERROR .*Error processing HTTP request: InternalServerError\\(get local timeline info"
)
# this might get to run and attempt on-demand, but not always
env.pageserver.allowed_errors.append(".* ERROR .*Task 'initial size calculation'")

# if the above returned, then we didn't have a livelock, and all is well

0 comments on commit 8e6b27b

Please sign in to comment.