@@ -297,7 +297,7 @@ func (m *manager) Start() error {
297297 return err
298298 }
299299 klog .V (2 ).Infof ("Starting recovery of all containers" )
300- err = m .detectSubcontainers ("/" )
300+ err = m .detectSubContainers ("/" )
301301 if err != nil {
302302 return err
303303 }
@@ -385,7 +385,7 @@ func (m *manager) globalHousekeeping(quit chan error) {
385385 start := time .Now ()
386386
387387 // Check for new containers.
388- err := m .detectSubcontainers ("/" )
388+ err := m .detectSubContainers ("/" )
389389 if err != nil {
390390 klog .Errorf ("Failed to detect containers: %s" , err )
391391 }
@@ -1009,6 +1009,10 @@ func (m *manager) destroyContainerLocked(containerName string) error {
10091009 // Already destroyed, done.
10101010 return nil
10111011 }
1012+ // If the container is OOM-killed, keep monitoring metrics for the container.
1013+ if cont .oomEvents != 0 {
1014+ return nil
1015+ }
10121016
10131017 // Tell the container to stop.
10141018 err := cont .Stop ()
@@ -1045,7 +1049,7 @@ func (m *manager) destroyContainerLocked(containerName string) error {
10451049
10461050// Detect all containers that have been added or deleted from the specified container.
10471051func (m * manager ) getContainersDiff (containerName string ) (added []info.ContainerReference , removed []info.ContainerReference , err error ) {
1048- // Get all subcontainers recursively.
1052+ // Get all subContainers recursively.
10491053 m .containersLock .RLock ()
10501054 cont , ok := m .containers [namespacedContainerName {
10511055 Name : containerName ,
@@ -1067,8 +1071,8 @@ func (m *manager) getContainersDiff(containerName string) (added []info.Containe
10671071 // Determine which were added and which were removed.
10681072 allContainersSet := make (map [string ]* containerData )
10691073 for name , d := range m .containers {
1070- // Only add the canonical name.
1071- if d .info .Name == name .Name {
1074+ // Only add the canonical name. Preserve containers that were deleted due to OOMKilled.
1075+ if d .info .Name == name .Name && d . oomEvents != 0 {
10721076 allContainersSet [name .Name ] = d
10731077 }
10741078 }
@@ -1092,8 +1096,8 @@ func (m *manager) getContainersDiff(containerName string) (added []info.Containe
10921096 return
10931097}
10941098
1095- // Detect the existing subcontainers and reflect the setup here.
1096- func (m * manager ) detectSubcontainers (containerName string ) error {
1099+ // Detect the existing subContainers and reflect the setup here.
1100+ func (m * manager ) detectSubContainers (containerName string ) error {
10971101 added , removed , err := m .getContainersDiff (containerName )
10981102 if err != nil {
10991103 return err
@@ -1136,7 +1140,7 @@ func (m *manager) watchForNewContainers(quit chan error) error {
11361140 }
11371141
11381142 // There is a race between starting the watch and new container creation so we do a detection before we read new containers.
1139- err := m .detectSubcontainers ("/" )
1143+ err := m .detectSubContainers ("/" )
11401144 if err != nil {
11411145 return err
11421146 }
0 commit comments