@@ -46,6 +46,15 @@ const (
4646	// Important: The conmon attach socket uses an extra byte at the beginning of each 
4747	// message to specify the STREAM so we have to increase the buffer size by one 
4848	bufferSize  =  conmonConfig .BufSize  +  1 
49+ 
50+ 	// Healthcheck message type from conmon (using negative to avoid PID conflicts) 
51+ 	HealthCheckMsgStatusUpdate  =  - 100 
52+ 
53+ 	// Healthcheck status values sent by conmon (added to base message type -100) 
54+ 	HealthCheckStatusNone       =  0 
55+ 	HealthCheckStatusStarting   =  1 
56+ 	HealthCheckStatusHealthy    =  2 
57+ 	HealthCheckStatusUnhealthy  =  3 
4958)
5059
5160// ConmonOCIRuntime is an OCI runtime managed by Conmon. 
@@ -981,7 +990,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
981990	if  err  !=  nil  {
982991		return  0 , fmt .Errorf ("creating socket pair: %w" , err )
983992	}
984- 	defer  errorhandling .CloseQuiet (parentSyncPipe )
985993
986994	childStartPipe , parentStartPipe , err  :=  newPipe ()
987995	if  err  !=  nil  {
@@ -1038,6 +1046,9 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
10381046		args  =  append (args , "--conmon-pidfile" , ctr .config .ConmonPidFile )
10391047	}
10401048
1049+ 	// Add healthcheck-related arguments (build-conditional) 
1050+ 	args  =  r .addHealthCheckArgs (ctr , args )
1051+ 
10411052	if  r .noPivot  {
10421053		args  =  append (args , "--no-pivot" )
10431054	}
@@ -1199,6 +1210,8 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
11991210	// regardless of whether we errored or not, we no longer need the children pipes 
12001211	childSyncPipe .Close ()
12011212	childStartPipe .Close ()
1213+ 
1214+ 	// Note: parentSyncPipe is NOT closed here because it's used for continuous healthcheck monitoring 
12021215	if  err  !=  nil  {
12031216		return  0 , err 
12041217	}
@@ -1219,7 +1232,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
12191232		return  0 , fmt .Errorf ("conmon failed: %w" , err )
12201233	}
12211234
1222- 	pid , err  :=  readConmonPipeData (r .name , parentSyncPipe , ociLog )
1235+ 	pid , err  :=  readConmonPipeData (r .name , parentSyncPipe , ociLog ,  ctr )
12231236	if  err  !=  nil  {
12241237		if  err2  :=  r .DeleteContainer (ctr ); err2  !=  nil  {
12251238			logrus .Errorf ("Removing container %s from runtime after creation failed" , ctr .ID ())
@@ -1322,7 +1335,6 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
13221335		logDriverArg  =  define .NoLogging 
13231336	case  define .PassthroughLogging , define .PassthroughTTYLogging :
13241337		logDriverArg  =  define .PassthroughLogging 
1325- 	//lint:ignore ST1015 the default case has to be here 
13261338	default : //nolint:gocritic 
13271339		// No case here should happen except JSONLogging, but keep this here in case the options are extended 
13281340		logrus .Errorf ("%s logging specified but not supported. Choosing k8s-file logging instead" , ctr .LogDriver ())
@@ -1390,13 +1402,15 @@ func readConmonPidFile(pidFile string) (int, error) {
13901402	return  0 , nil 
13911403}
13921404
1405+ // syncInfo is used to return data from monitor process to daemon 
1406+ type  syncInfo  struct  {
1407+ 	Data     int     `json:"data"` 
1408+ 	Message  string  `json:"message,omitempty"` 
1409+ }
1410+ 
13931411// readConmonPipeData attempts to read a syncInfo struct from the pipe 
1394- func  readConmonPipeData (runtimeName  string , pipe  * os.File , ociLog  string ) (int , error ) {
1395- 	// syncInfo is used to return data from monitor process to daemon 
1396- 	type  syncInfo  struct  {
1397- 		Data     int     `json:"data"` 
1398- 		Message  string  `json:"message,omitempty"` 
1399- 	}
1412+ // If ctr is provided, it will also start continuous healthcheck monitoring 
1413+ func  readConmonPipeData (runtimeName  string , pipe  * os.File , ociLog  string , ctr  ... * Container ) (int , error ) {
14001414
14011415	// Wait to get container pid from conmon 
14021416	type  syncStruct  struct  {
@@ -1408,15 +1422,24 @@ func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int,
14081422		var  si  * syncInfo 
14091423		rdr  :=  bufio .NewReader (pipe )
14101424		b , err  :=  rdr .ReadBytes ('\n' )
1425+ 
1426+ 		// Log the raw JSON string received from conmon 
1427+ 		logrus .Debugf ("HEALTHCHECK: Raw JSON received from conmon: %q" , string (b ))
1428+ 		logrus .Debugf ("HEALTHCHECK: JSON length: %d bytes" , len (b ))
1429+ 
14111430		// ignore EOF here, error is returned even when data was read 
14121431		// if it is no valid json unmarshal will fail below 
14131432		if  err  !=  nil  &&  ! errors .Is (err , io .EOF ) {
1433+ 			logrus .Debugf ("HEALTHCHECK: Error reading from conmon pipe: %v" , err )
14141434			ch  <-  syncStruct {err : err }
1435+ 			return 
14151436		}
14161437		if  err  :=  json .Unmarshal (b , & si ); err  !=  nil  {
1438+ 			logrus .Debugf ("HEALTHCHECK: Failed to unmarshal JSON from conmon: %v" , err )
14171439			ch  <-  syncStruct {err : fmt .Errorf ("conmon bytes %q: %w" , string (b ), err )}
14181440			return 
14191441		}
1442+ 		logrus .Debugf ("HEALTHCHECK: Successfully parsed JSON from conmon: Data=%d, Message=%q" , si .Data , si .Message )
14201443		ch  <-  syncStruct {si : si }
14211444	}()
14221445
@@ -1436,6 +1459,13 @@ func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int,
14361459			return  - 1 , fmt .Errorf ("container create failed (no logs from conmon): %w" , ss .err )
14371460		}
14381461		logrus .Debugf ("Received: %d" , ss .si .Data )
1462+ 
1463+ 		// Start continuous healthcheck monitoring if container is provided and PID is valid 
1464+ 		if  len (ctr ) >  0  &&  ctr [0 ] !=  nil  &&  ss .si .Data  >  0  {
1465+ 			logrus .Debugf ("HEALTHCHECK: Starting continuous healthcheck monitoring for container %s (PID: %d)" , ctr [0 ].ID (), ss .si .Data )
1466+ 			go  readConmonHealthCheckPipeData (ctr [0 ], pipe )
1467+ 		}
1468+ 
14391469		if  ss .si .Data  <  0  {
14401470			if  ociLog  !=  ""  {
14411471				ociLogData , err  :=  os .ReadFile (ociLog )
@@ -1459,6 +1489,79 @@ func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int,
14591489	return  data , nil 
14601490}
14611491
1492+ // readConmonHealthCheckPipeData continuously reads healthcheck status updates from conmon 
1493+ func  readConmonHealthCheckPipeData (ctr  * Container , pipe  * os.File ) {
1494+ 	logrus .Debugf ("HEALTHCHECK: Starting continuous healthcheck monitoring for container %s" , ctr .ID ())
1495+ 
1496+ 	rdr  :=  bufio .NewReader (pipe )
1497+ 	for  {
1498+ 		// Read one line from the pipe 
1499+ 		b , err  :=  rdr .ReadBytes ('\n' )
1500+ 		if  err  !=  nil  {
1501+ 			if  err  ==  io .EOF  {
1502+ 				logrus .Debugf ("HEALTHCHECK: Pipe closed for container %s, stopping monitoring" , ctr .ID ())
1503+ 				return 
1504+ 			}
1505+ 			logrus .Errorf ("HEALTHCHECK: Error reading from pipe for container %s: %v" , ctr .ID (), err )
1506+ 			return 
1507+ 		}
1508+ 
1509+ 		// Log the raw JSON string received from conmon 
1510+ 		logrus .Debugf ("HEALTHCHECK: Raw JSON received from conmon for container %s: %q" , ctr .ID (), string (b ))
1511+ 		logrus .Debugf ("HEALTHCHECK: JSON length: %d bytes" , len (b ))
1512+ 
1513+ 		// Parse the JSON 
1514+ 		var  si  syncInfo 
1515+ 		if  err  :=  json .Unmarshal (b , & si ); err  !=  nil  {
1516+ 			logrus .Errorf ("HEALTHCHECK: Failed to parse JSON from conmon for container %s: %v" , ctr .ID (), err )
1517+ 			continue 
1518+ 		}
1519+ 
1520+ 		logrus .Debugf ("HEALTHCHECK: Parsed sync info for container %s: Data=%d, Message=%q" , ctr .ID (), si .Data , si .Message )
1521+ 
1522+ 		// Handle healthcheck status updates based on your new encoding scheme 
1523+ 		// Base message type is -100, status values are added to it: 
1524+ 		// -100 + 0 (none) = -100 
1525+ 		// -100 + 1 (starting) = -99 
1526+ 		// -100 + 2 (healthy) = -98 
1527+ 		// -100 + 3 (unhealthy) = -97 
1528+ 		if  si .Data  >=  HealthCheckMsgStatusUpdate  &&  si .Data  <=  HealthCheckMsgStatusUpdate + HealthCheckStatusUnhealthy  {
1529+ 			statusValue  :=  si .Data  -  HealthCheckMsgStatusUpdate  // Convert back to status value 
1530+ 			var  status  string 
1531+ 
1532+ 			switch  statusValue  {
1533+ 			case  HealthCheckStatusNone :
1534+ 				status  =  define .HealthCheckReset  // "reset" or "none" 
1535+ 			case  HealthCheckStatusStarting :
1536+ 				status  =  define .HealthCheckStarting  // "starting" 
1537+ 			case  HealthCheckStatusHealthy :
1538+ 				status  =  define .HealthCheckHealthy  // "healthy" 
1539+ 			case  HealthCheckStatusUnhealthy :
1540+ 				status  =  define .HealthCheckUnhealthy  // "unhealthy" 
1541+ 			default :
1542+ 				logrus .Errorf ("HEALTHCHECK: Unknown status value %d for container %s" , statusValue , ctr .ID ())
1543+ 				continue 
1544+ 			}
1545+ 
1546+ 			logrus .Infof ("HEALTHCHECK: Received healthcheck status update for container %s: %s (message type: %d, status value: %d)" ,
1547+ 				ctr .ID (), status , si .Data , statusValue )
1548+ 
1549+ 			// Update the container's healthcheck status 
1550+ 			if  err  :=  ctr .updateHealthStatus (status ); err  !=  nil  {
1551+ 				logrus .Errorf ("HEALTHCHECK: Failed to update healthcheck status for container %s: %v" , ctr .ID (), err )
1552+ 			} else  {
1553+ 				logrus .Infof ("HEALTHCHECK: Successfully updated healthcheck status for container %s to %s" , ctr .ID (), status )
1554+ 			}
1555+ 		} else  if  si .Data  <  0  {
1556+ 			// Other negative message types - might be healthcheck related but not recognized 
1557+ 			logrus .Debugf ("HEALTHCHECK: Received unrecognized negative message type %d for container %s - might be healthcheck related" , si .Data , ctr .ID ())
1558+ 		} else  if  si .Data  >  0  {
1559+ 			// Positive message types - not healthcheck related 
1560+ 			logrus .Debugf ("HEALTHCHECK: Received positive message type %d for container %s - not healthcheck related" , si .Data , ctr .ID ())
1561+ 		}
1562+ 	}
1563+ }
1564+ 
14621565// writeConmonPipeData writes nonce data to a pipe 
14631566func  writeConmonPipeData (pipe  * os.File ) error  {
14641567	someData  :=  []byte {0 }
0 commit comments