Skip to content

Commit dbc6e68

Browse files
committed
another test lap
1 parent 4123f50 commit dbc6e68

File tree

2 files changed

+145
-90
lines changed

2 files changed

+145
-90
lines changed

sled-agent/config-reconciler/src/dump_setup.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ impl DumpSetupWorker {
11561156
let mut rotated_log_files = Vec::new();
11571157
if include_live {
11581158
let pattern = logdir
1159-
.join("*.log")
1159+
.join("*.log*")
11601160
.to_str()
11611161
.ok_or_else(|| ArchiveLogsError::Utf8(zone_name.to_string()))?
11621162
.to_string();
@@ -1184,6 +1184,12 @@ impl DumpSetupWorker {
11841184
self.log,
11851185
"Archiving {count} log files from {zone_name} zone"
11861186
);
1187+
} else if include_live {
1188+
warn!(
1189+
self.log,
1190+
"Found no log files from {zone_name} zone, including live \
1191+
log files"
1192+
);
11871193
}
11881194
for entry in rotated_log_files {
11891195
let src_name = entry.file_name().unwrap();

sled-agent/config-reconciler/src/reconciler_task/external_disks.rs

Lines changed: 138 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use sled_storage::disk::Disk;
3131
use sled_storage::disk::DiskError;
3232
use sled_storage::disk::RawDisk;
3333
use slog::Logger;
34+
use slog::debug;
3435
use slog::error;
3536
use slog::info;
3637
use slog::warn;
@@ -47,6 +48,8 @@ use crate::disks_common::MaybeUpdatedDisk;
4748
use crate::disks_common::update_properties_from_raw_disk;
4849
use crate::dump_setup_task::FormerZoneRootArchiver;
4950
use crate::raw_disks::RawDiskWithId;
51+
use camino::Utf8PathBuf;
52+
use illumos_utils::zfs::Mountpoint;
5053

5154
/// Set of currently managed zpools.
5255
///
@@ -506,8 +509,13 @@ impl ExternalDisks {
506509
// that we find on them.
507510
let mut failed = Vec::new();
508511
for (disk_id, zpool_name) in newly_adopted {
509-
if let Err(error) = self
510-
.archive_and_destroy_former_zone_roots(&zpool_name, log)
512+
if let Err(error) = disk_adopter
513+
.archive_and_destroy_former_zone_roots(
514+
&zpool_name,
515+
&self.mount_config,
516+
&self.archiver,
517+
log,
518+
)
511519
.await
512520
{
513521
// This situation is really unfortunate. We adopted the disk,
@@ -660,14 +668,103 @@ impl ExternalDisks {
660668
}
661669
}
662670
}
671+
}
672+
673+
#[derive(Debug)]
674+
struct ExternalDiskState {
675+
config: OmicronPhysicalDiskConfig,
676+
state: DiskState,
677+
}
678+
679+
impl ExternalDiskState {
680+
fn managed(config: OmicronPhysicalDiskConfig, disk: Disk) -> Self {
681+
Self { config, state: DiskState::Managed(disk) }
682+
}
683+
684+
fn failed(
685+
config: OmicronPhysicalDiskConfig,
686+
err: DiskManagementError,
687+
) -> Self {
688+
Self { config, state: DiskState::FailedToManage(err) }
689+
}
690+
}
691+
692+
impl IdMappable for ExternalDiskState {
693+
type Id = PhysicalDiskUuid;
694+
695+
fn id(&self) -> Self::Id {
696+
self.config.id
697+
}
698+
}
699+
700+
#[derive(Debug)]
701+
enum DiskState {
702+
Managed(Disk),
703+
FailedToManage(DiskManagementError),
704+
}
705+
706+
/// Helper to allow unit tests to run without interacting with the real [`Disk`]
707+
/// implementation. In production, the only implementor of this trait is
708+
/// [`RealDiskAdopter`].
709+
trait DiskAdopter {
710+
fn adopt_disk(
711+
&self,
712+
raw_disk: RawDisk,
713+
mount_config: &MountConfig,
714+
pool_id: ZpoolUuid,
715+
log: &Logger,
716+
) -> impl Future<Output = Result<Disk, DiskManagementError>> + Send;
717+
718+
fn archive_and_destroy_former_zone_roots(
719+
&self,
720+
zpool_name: &ZpoolName,
721+
mount_config: &MountConfig,
722+
archiver: &FormerZoneRootArchiver,
723+
log: &Logger,
724+
) -> impl Future<Output = Result<(), anyhow::Error>> + Send;
725+
}
726+
727+
struct RealDiskAdopter<'a> {
728+
key_requester: &'a StorageKeyRequester,
729+
}
730+
731+
impl DiskAdopter for RealDiskAdopter<'_> {
732+
async fn adopt_disk(
733+
&self,
734+
raw_disk: RawDisk,
735+
mount_config: &MountConfig,
736+
pool_id: ZpoolUuid,
737+
log: &Logger,
738+
) -> Result<Disk, DiskManagementError> {
739+
Disk::new(
740+
log,
741+
mount_config,
742+
raw_disk,
743+
Some(pool_id),
744+
Some(self.key_requester),
745+
)
746+
.await
747+
.map_err(|err| {
748+
let err_string = InlineErrorChain::new(&err).to_string();
749+
match err {
750+
// We pick this error out and identify it separately because
751+
// it may be transient, and should sometimes be handled with
752+
// a retry.
753+
DiskError::Dataset(DatasetError::KeyManager(_)) => {
754+
DiskManagementError::KeyManager(err_string)
755+
}
756+
_ => DiskManagementError::Other(err_string),
757+
}
758+
})
759+
}
663760

664761
async fn archive_and_destroy_former_zone_roots(
665762
&self,
666763
zpool_name: &ZpoolName,
764+
mount_config: &MountConfig,
765+
archiver: &FormerZoneRootArchiver,
667766
log: &Logger,
668767
) -> Result<(), anyhow::Error> {
669-
let mount_config = &self.mount_config;
670-
671768
// Attempt to archive and then wipe the contents of the zones dataset.
672769
//
673770
// There's a chain of design goals and compromises here:
@@ -737,7 +834,7 @@ impl ExternalDisks {
737834
cleanup_former_zone_roots(
738835
log,
739836
mount_config,
740-
&self.archiver,
837+
archiver,
741838
&zpool_name,
742839
)
743840
.await?;
@@ -760,87 +857,6 @@ impl ExternalDisks {
760857
}
761858
}
762859

763-
#[derive(Debug)]
764-
struct ExternalDiskState {
765-
config: OmicronPhysicalDiskConfig,
766-
state: DiskState,
767-
}
768-
769-
impl ExternalDiskState {
770-
fn managed(config: OmicronPhysicalDiskConfig, disk: Disk) -> Self {
771-
Self { config, state: DiskState::Managed(disk) }
772-
}
773-
774-
fn failed(
775-
config: OmicronPhysicalDiskConfig,
776-
err: DiskManagementError,
777-
) -> Self {
778-
Self { config, state: DiskState::FailedToManage(err) }
779-
}
780-
}
781-
782-
impl IdMappable for ExternalDiskState {
783-
type Id = PhysicalDiskUuid;
784-
785-
fn id(&self) -> Self::Id {
786-
self.config.id
787-
}
788-
}
789-
790-
#[derive(Debug)]
791-
enum DiskState {
792-
Managed(Disk),
793-
FailedToManage(DiskManagementError),
794-
}
795-
796-
/// Helper to allow unit tests to run without interacting with the real [`Disk`]
797-
/// implementation. In production, the only implementor of this trait is
798-
/// [`RealDiskAdopter`].
799-
trait DiskAdopter {
800-
fn adopt_disk(
801-
&self,
802-
raw_disk: RawDisk,
803-
mount_config: &MountConfig,
804-
pool_id: ZpoolUuid,
805-
log: &Logger,
806-
) -> impl Future<Output = Result<Disk, DiskManagementError>> + Send;
807-
}
808-
809-
struct RealDiskAdopter<'a> {
810-
key_requester: &'a StorageKeyRequester,
811-
}
812-
813-
impl DiskAdopter for RealDiskAdopter<'_> {
814-
async fn adopt_disk(
815-
&self,
816-
raw_disk: RawDisk,
817-
mount_config: &MountConfig,
818-
pool_id: ZpoolUuid,
819-
log: &Logger,
820-
) -> Result<Disk, DiskManagementError> {
821-
Disk::new(
822-
log,
823-
mount_config,
824-
raw_disk,
825-
Some(pool_id),
826-
Some(self.key_requester),
827-
)
828-
.await
829-
.map_err(|err| {
830-
let err_string = InlineErrorChain::new(&err).to_string();
831-
match err {
832-
// We pick this error out and identify it separately because
833-
// it may be transient, and should sometimes be handled with
834-
// a retry.
835-
DiskError::Dataset(DatasetError::KeyManager(_)) => {
836-
DiskManagementError::KeyManager(err_string)
837-
}
838-
_ => DiskManagementError::Other(err_string),
839-
}
840-
})
841-
}
842-
}
843-
844860
/// Given a pool name, find any zone root filesystems, attempt to archive their
845861
/// log files, and destroy them.
846862
async fn cleanup_former_zone_roots(
@@ -876,7 +892,30 @@ async fn cleanup_former_zone_roots(
876892
&child_dataset_relative_to_pool,
877893
);
878894

879-
// Attempt to archive this zone as though it's a zone name.
895+
// We need this dataset to be mounted in order to archive its logs.
896+
// On initial sled boot, it won't be mounted yet. In other cases (e.g.,
897+
// sled-agent restart), it may already be.
898+
let child_dataset_name =
899+
format!("{}/{}", parent_dataset_name, child_name);
900+
debug!(
901+
log,
902+
"ensuring dataset mounted to archive former zone root";
903+
"path" => %mountpoint,
904+
"dataset" => &child_dataset_name,
905+
);
906+
Zfs::ensure_dataset_mounted_and_exists(
907+
&child_dataset_name,
908+
&Mountpoint(Utf8PathBuf::from(&mountpoint)),
909+
)
910+
.await
911+
.with_context(|| format!("mounting {:?}", &child_dataset_name))
912+
.map_err(|error| {
913+
DiskManagementError::Other(
914+
InlineErrorChain::new(&*error).to_string(),
915+
)
916+
})?;
917+
918+
// Attempt to archive this dataset as though it's a former zone root.
880919
// This is best-effort.
881920
info!(
882921
log,
@@ -885,8 +924,8 @@ async fn cleanup_former_zone_roots(
885924
);
886925
archiver.archive_former_zone_root(mountpoint).await;
887926

888-
let child_dataset_name =
889-
format!("{}/{}", parent_dataset_name, child_name);
927+
// Finally, destroy it. This preserves historical behavior of wiping
928+
// these datasets when we adopt disks.
890929
info!(
891930
log,
892931
"destroying former zone root";
@@ -949,6 +988,16 @@ mod tests {
949988
self.requests.lock().unwrap().push(raw_disk);
950989
Ok(disk)
951990
}
991+
992+
async fn archive_and_destroy_former_zone_roots(
993+
&self,
994+
_zpool_name: &ZpoolName,
995+
_mount_config: &MountConfig,
996+
_archiver: &FormerZoneRootArchiver,
997+
_log: &Logger,
998+
) -> Result<(), anyhow::Error> {
999+
Ok(())
1000+
}
9521001
}
9531002

9541003
// All our tests operate on fake in-memory disks, so the mount config

0 commit comments

Comments
 (0)