Skip to content

Commit

Permalink
[healthd] Use unix_socket_path instead of loopback ip (#14843)
Browse files Browse the repository at this point in the history
- Why I did it

interfaces-config service restarts networking service, which in-turn results in loopback interface address is being removed and reassigned back

If the system-health happens to start during that instance expections and logs like this are seen:

Apr 15 18:14:49.357869 r-panther-20 ERR healthd: update system status exception:Unable to connect to redis: Cannot assign requested address
Apr 15 18:14:49.429778 r-panther-20 ERR healthd: subscribe_statedb exited- Unable to connect to redis: Cannot assign requested address
Apr 15 18:14:52.218594 r-panther-20 ERR healthd: system_service_Map_base::at
Apr 15 18:14:52.219714 r-panther-20 ERR healthd: system_service_Map_base::at
Apr 15 18:14:55.218636 r-panther-20 ERR healthd: system_service_Map_base::at
Apr 15 18:14:55.218722 r-panther-20 ERR healthd: system_service_Map_base::at

- How I did it
use unix socket path

Signed-off-by: Vivek Reddy Karri <vkarri@nvidia.com>
  • Loading branch information
vivekrnv authored May 26, 2023
1 parent f3ce9eb commit bc9c054
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/system-health/health_checker/hardware_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class HardwareChecker(HealthChecker):

def __init__(self):
HealthChecker.__init__(self)
self._db = SonicV2Connector(host="127.0.0.1")
self._db = SonicV2Connector(use_unix_socket_path=True)
self._db.connect(self._db.STATE_DB)

def get_category(self):
Expand Down
2 changes: 1 addition & 1 deletion src/system-health/health_checker/service_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def check_services(self, config):
config (config.Config): Health checker configuration.
"""
if not self.config_db:
self.config_db = swsscommon.ConfigDBConnector()
self.config_db = swsscommon.ConfigDBConnector(use_unix_socket_path=True)
self.config_db.connect()
feature_table = self.config_db.get_table("FEATURE")
expected_running_containers, self.container_feature_dict = self.get_expected_running_containers(feature_table)
Expand Down
16 changes: 8 additions & 8 deletions src/system-health/health_checker/sysmonitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self,myQ):
self.task_queue = myQ

def subscribe_statedb(self):
state_db = swsscommon.DBConnector("STATE_DB", REDIS_TIMEOUT_MS, True)
state_db = swsscommon.DBConnector("STATE_DB", REDIS_TIMEOUT_MS, False)
sel = swsscommon.Select()
cst = swsscommon.SubscriberStateTable(state_db, "FEATURE")
sel.addSelectable(cst)
Expand Down Expand Up @@ -122,7 +122,7 @@ def __init__(self):
def post_system_status(self, state):
try:
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
self.state_db.connect(self.state_db.STATE_DB)

self.state_db.set(self.state_db.STATE_DB, "SYSTEM_READY|SYSTEM_STATE", "Status", state)
Expand All @@ -135,7 +135,7 @@ def post_system_status(self, state):
def get_all_service_list(self):

if not self.config_db:
self.config_db = swsscommon.ConfigDBConnector()
self.config_db = swsscommon.ConfigDBConnector(use_unix_socket_path=True)
self.config_db.connect()

dir_list = []
Expand Down Expand Up @@ -197,10 +197,10 @@ def get_service_from_feature_table(self, dir_list):
#else, just return Up
def get_app_ready_status(self, service):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
self.state_db.connect(self.state_db.STATE_DB)
if not self.config_db:
self.config_db = swsscommon.ConfigDBConnector()
self.config_db = swsscommon.ConfigDBConnector(use_unix_socket_path=True)
self.config_db.connect()

fail_reason = ""
Expand Down Expand Up @@ -248,7 +248,7 @@ def run_systemctl_show(self, service):
#Sets the service status to state db
def post_unit_status(self, srv_name, srv_status, app_status, fail_reason, update_time):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
self.state_db.connect(self.state_db.STATE_DB)

key = 'ALL_SERVICE_STATUS|{}'.format(srv_name)
Expand Down Expand Up @@ -378,7 +378,7 @@ def update_system_status(self):
def check_unit_status(self, event):
#global dnsrvs_name
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
self.state_db.connect(self.state_db.STATE_DB)
astate = "DOWN"

Expand Down Expand Up @@ -419,7 +419,7 @@ def check_unit_status(self, event):

def system_service(self):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True)
self.state_db.connect(self.state_db.STATE_DB)

mpmgr = multiprocessing.Manager()
Expand Down
2 changes: 1 addition & 1 deletion src/system-health/scripts/healthd
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class HealthDaemon(DaemonBase):
Constructor of HealthDaemon.
"""
DaemonBase.__init__(self, SYSLOG_IDENTIFIER)
self._db = SonicV2Connector(host="127.0.0.1")
self._db = SonicV2Connector(use_unix_socket_path=True)
self._db.connect(self._db.STATE_DB)
self.stop_event = threading.Event()

Expand Down
2 changes: 1 addition & 1 deletion src/system-health/tests/mock_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ class MockConnector(object):
STATE_DB = None
data = {}

def __init__(self, host):
def __init__(self, use_unix_socket_path):
pass

def connect(self, db_id):
Expand Down

0 comments on commit bc9c054

Please sign in to comment.