@@ -31,6 +31,7 @@ use sled_storage::disk::Disk;
3131use sled_storage:: disk:: DiskError ;
3232use sled_storage:: disk:: RawDisk ;
3333use slog:: Logger ;
34+ use slog:: debug;
3435use slog:: error;
3536use slog:: info;
3637use slog:: warn;
@@ -47,6 +48,8 @@ use crate::disks_common::MaybeUpdatedDisk;
4748use crate :: disks_common:: update_properties_from_raw_disk;
4849use crate :: dump_setup_task:: FormerZoneRootArchiver ;
4950use crate :: raw_disks:: RawDiskWithId ;
51+ use camino:: Utf8PathBuf ;
52+ use illumos_utils:: zfs:: Mountpoint ;
5053
5154/// Set of currently managed zpools.
5255///
@@ -506,8 +509,13 @@ impl ExternalDisks {
506509 // that we find on them.
507510 let mut failed = Vec :: new ( ) ;
508511 for ( disk_id, zpool_name) in newly_adopted {
509- if let Err ( error) = self
510- . archive_and_destroy_former_zone_roots ( & zpool_name, log)
512+ if let Err ( error) = disk_adopter
513+ . archive_and_destroy_former_zone_roots (
514+ & zpool_name,
515+ & self . mount_config ,
516+ & self . archiver ,
517+ log,
518+ )
511519 . await
512520 {
513521 // This situation is really unfortunate. We adopted the disk,
@@ -660,14 +668,103 @@ impl ExternalDisks {
660668 }
661669 }
662670 }
671+ }
672+
673+ #[ derive( Debug ) ]
674+ struct ExternalDiskState {
675+ config : OmicronPhysicalDiskConfig ,
676+ state : DiskState ,
677+ }
678+
679+ impl ExternalDiskState {
680+ fn managed ( config : OmicronPhysicalDiskConfig , disk : Disk ) -> Self {
681+ Self { config, state : DiskState :: Managed ( disk) }
682+ }
683+
684+ fn failed (
685+ config : OmicronPhysicalDiskConfig ,
686+ err : DiskManagementError ,
687+ ) -> Self {
688+ Self { config, state : DiskState :: FailedToManage ( err) }
689+ }
690+ }
691+
692+ impl IdMappable for ExternalDiskState {
693+ type Id = PhysicalDiskUuid ;
694+
695+ fn id ( & self ) -> Self :: Id {
696+ self . config . id
697+ }
698+ }
699+
700+ #[ derive( Debug ) ]
701+ enum DiskState {
702+ Managed ( Disk ) ,
703+ FailedToManage ( DiskManagementError ) ,
704+ }
705+
706+ /// Helper to allow unit tests to run without interacting with the real [`Disk`]
707+ /// implementation. In production, the only implementor of this trait is
708+ /// [`RealDiskAdopter`].
709+ trait DiskAdopter {
710+ fn adopt_disk (
711+ & self ,
712+ raw_disk : RawDisk ,
713+ mount_config : & MountConfig ,
714+ pool_id : ZpoolUuid ,
715+ log : & Logger ,
716+ ) -> impl Future < Output = Result < Disk , DiskManagementError > > + Send ;
717+
718+ fn archive_and_destroy_former_zone_roots (
719+ & self ,
720+ zpool_name : & ZpoolName ,
721+ mount_config : & MountConfig ,
722+ archiver : & FormerZoneRootArchiver ,
723+ log : & Logger ,
724+ ) -> impl Future < Output = Result < ( ) , anyhow:: Error > > + Send ;
725+ }
726+
727+ struct RealDiskAdopter < ' a > {
728+ key_requester : & ' a StorageKeyRequester ,
729+ }
730+
731+ impl DiskAdopter for RealDiskAdopter < ' _ > {
732+ async fn adopt_disk (
733+ & self ,
734+ raw_disk : RawDisk ,
735+ mount_config : & MountConfig ,
736+ pool_id : ZpoolUuid ,
737+ log : & Logger ,
738+ ) -> Result < Disk , DiskManagementError > {
739+ Disk :: new (
740+ log,
741+ mount_config,
742+ raw_disk,
743+ Some ( pool_id) ,
744+ Some ( self . key_requester ) ,
745+ )
746+ . await
747+ . map_err ( |err| {
748+ let err_string = InlineErrorChain :: new ( & err) . to_string ( ) ;
749+ match err {
750+ // We pick this error out and identify it separately because
751+ // it may be transient, and should sometimes be handled with
752+ // a retry.
753+ DiskError :: Dataset ( DatasetError :: KeyManager ( _) ) => {
754+ DiskManagementError :: KeyManager ( err_string)
755+ }
756+ _ => DiskManagementError :: Other ( err_string) ,
757+ }
758+ } )
759+ }
663760
664761 async fn archive_and_destroy_former_zone_roots (
665762 & self ,
666763 zpool_name : & ZpoolName ,
764+ mount_config : & MountConfig ,
765+ archiver : & FormerZoneRootArchiver ,
667766 log : & Logger ,
668767 ) -> Result < ( ) , anyhow:: Error > {
669- let mount_config = & self . mount_config ;
670-
671768 // Attempt to archive and then wipe the contents of the zones dataset.
672769 //
673770 // There's a chain of design goals and compromises here:
@@ -737,7 +834,7 @@ impl ExternalDisks {
737834 cleanup_former_zone_roots (
738835 log,
739836 mount_config,
740- & self . archiver ,
837+ archiver,
741838 & zpool_name,
742839 )
743840 . await ?;
@@ -760,87 +857,6 @@ impl ExternalDisks {
760857 }
761858}
762859
763- #[ derive( Debug ) ]
764- struct ExternalDiskState {
765- config : OmicronPhysicalDiskConfig ,
766- state : DiskState ,
767- }
768-
769- impl ExternalDiskState {
770- fn managed ( config : OmicronPhysicalDiskConfig , disk : Disk ) -> Self {
771- Self { config, state : DiskState :: Managed ( disk) }
772- }
773-
774- fn failed (
775- config : OmicronPhysicalDiskConfig ,
776- err : DiskManagementError ,
777- ) -> Self {
778- Self { config, state : DiskState :: FailedToManage ( err) }
779- }
780- }
781-
782- impl IdMappable for ExternalDiskState {
783- type Id = PhysicalDiskUuid ;
784-
785- fn id ( & self ) -> Self :: Id {
786- self . config . id
787- }
788- }
789-
790- #[ derive( Debug ) ]
791- enum DiskState {
792- Managed ( Disk ) ,
793- FailedToManage ( DiskManagementError ) ,
794- }
795-
796- /// Helper to allow unit tests to run without interacting with the real [`Disk`]
797- /// implementation. In production, the only implementor of this trait is
798- /// [`RealDiskAdopter`].
799- trait DiskAdopter {
800- fn adopt_disk (
801- & self ,
802- raw_disk : RawDisk ,
803- mount_config : & MountConfig ,
804- pool_id : ZpoolUuid ,
805- log : & Logger ,
806- ) -> impl Future < Output = Result < Disk , DiskManagementError > > + Send ;
807- }
808-
809- struct RealDiskAdopter < ' a > {
810- key_requester : & ' a StorageKeyRequester ,
811- }
812-
813- impl DiskAdopter for RealDiskAdopter < ' _ > {
814- async fn adopt_disk (
815- & self ,
816- raw_disk : RawDisk ,
817- mount_config : & MountConfig ,
818- pool_id : ZpoolUuid ,
819- log : & Logger ,
820- ) -> Result < Disk , DiskManagementError > {
821- Disk :: new (
822- log,
823- mount_config,
824- raw_disk,
825- Some ( pool_id) ,
826- Some ( self . key_requester ) ,
827- )
828- . await
829- . map_err ( |err| {
830- let err_string = InlineErrorChain :: new ( & err) . to_string ( ) ;
831- match err {
832- // We pick this error out and identify it separately because
833- // it may be transient, and should sometimes be handled with
834- // a retry.
835- DiskError :: Dataset ( DatasetError :: KeyManager ( _) ) => {
836- DiskManagementError :: KeyManager ( err_string)
837- }
838- _ => DiskManagementError :: Other ( err_string) ,
839- }
840- } )
841- }
842- }
843-
844860/// Given a pool name, find any zone root filesystems, attempt to archive their
845861/// log files, and destroy them.
846862async fn cleanup_former_zone_roots (
@@ -876,7 +892,30 @@ async fn cleanup_former_zone_roots(
876892 & child_dataset_relative_to_pool,
877893 ) ;
878894
879- // Attempt to archive this zone as though it's a zone name.
895+ // We need this dataset to be mounted in order to archive its logs.
896+ // On initial sled boot, it won't be mounted yet. In other cases (e.g.,
897+ // sled-agent restart), it may already be.
898+ let child_dataset_name =
899+ format ! ( "{}/{}" , parent_dataset_name, child_name) ;
900+ debug ! (
901+ log,
902+ "ensuring dataset mounted to archive former zone root" ;
903+ "path" => %mountpoint,
904+ "dataset" => & child_dataset_name,
905+ ) ;
906+ Zfs :: ensure_dataset_mounted_and_exists (
907+ & child_dataset_name,
908+ & Mountpoint ( Utf8PathBuf :: from ( & mountpoint) ) ,
909+ )
910+ . await
911+ . with_context ( || format ! ( "mounting {:?}" , & child_dataset_name) )
912+ . map_err ( |error| {
913+ DiskManagementError :: Other (
914+ InlineErrorChain :: new ( & * error) . to_string ( ) ,
915+ )
916+ } ) ?;
917+
918+ // Attempt to archive this dataset as though it's a former zone root.
880919 // This is best-effort.
881920 info ! (
882921 log,
@@ -885,8 +924,8 @@ async fn cleanup_former_zone_roots(
885924 ) ;
886925 archiver. archive_former_zone_root ( mountpoint) . await ;
887926
888- let child_dataset_name =
889- format ! ( "{}/{}" , parent_dataset_name , child_name ) ;
927+ // Finally, destroy it. This preserves historical behavior of wiping
928+ // these datasets when we adopt disks.
890929 info ! (
891930 log,
892931 "destroying former zone root" ;
@@ -949,6 +988,16 @@ mod tests {
949988 self . requests . lock ( ) . unwrap ( ) . push ( raw_disk) ;
950989 Ok ( disk)
951990 }
991+
992+ async fn archive_and_destroy_former_zone_roots (
993+ & self ,
994+ _zpool_name : & ZpoolName ,
995+ _mount_config : & MountConfig ,
996+ _archiver : & FormerZoneRootArchiver ,
997+ _log : & Logger ,
998+ ) -> Result < ( ) , anyhow:: Error > {
999+ Ok ( ( ) )
1000+ }
9521001 }
9531002
9541003 // All our tests operate on fake in-memory disks, so the mount config
0 commit comments