Skip to content

Commit

Permalink
[system-health] When disabling a feature the SYSTEM_READY|SYSTEM_STAT…
Browse files Browse the repository at this point in the history
…E was not updated (sonic-net#14823)

- Why I did it
If you enable feature and then disable it, System Ready status change to Not Ready

A disabled feature should not affect the system ready status.

- How I did it
During the disable flow of dhcp_relay, it entered the dnsrvs_name list, which caused the SYSTEM_STATE key to be set to DOWN. Right after that, the dhcp_relay service was removed from the full service list, however, but, when it was removed from the dnsrvs_name, there was no flow to reset the system state back to UP even though there was no more services in down state.

- How to verify it
root@qa-eth-vt01-2-3700v:/home/admin# config feature state dhcp_relay enabled 
root@qa-eth-vt01-2-3700v:/home/admin# show system-health sysready-status 

root@qa-eth-vt01-2-3700v:/home/admin# config feature state dhcp_relay disabled
root@qa-eth-vt01-2-3700v:/home/admin# show system-health sysready-status 

Should see
System is ready
  • Loading branch information
DavidZagury authored May 30, 2023
1 parent 6852fcd commit e830491
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/system-health/health_checker/sysmonitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,12 @@ def check_unit_status(self, event):
if event in self.dnsrvs_name:
self.dnsrvs_name.remove(event)

if len(self.dnsrvs_name) == 0:
astate = "UP"
else:
astate = "DOWN"
self.publish_system_status(astate)

srv_name,last = event.split('.')
# stop on service maybe propagated to timers and in that case,
# the state_db entry for the service should not be deleted
Expand Down
3 changes: 3 additions & 0 deletions src/system-health/tests/mock_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def keys(self, db_id, pattern):
def get_all(self, db_id, key):
return MockConnector.data[key]

def exists(self, db_id, key):
return key in MockConnector.data

def set(self, db_id, key, field, value):
self.data[key] = {}
self.data[key][field] = value
Expand Down
25 changes: 25 additions & 0 deletions src/system-health/tests/test_system_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,31 @@ def test_check_unit_status():
assert 'mock_bgp.service' in sysmon.dnsrvs_name


@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(side_effect=[
['mock_snmp.service', 'mock_bgp.service', 'mock_ns.service'],
['mock_snmp.service', 'mock_ns.service']
]))
@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_bgp.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Down','-','-')))
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.print_console_message', MagicMock())
def test_system_status_up_after_service_removed():
sysmon = Sysmonitor()
sysmon.publish_system_status('UP')

sysmon.check_unit_status('mock_bgp.service')
assert 'mock_bgp.service' in sysmon.dnsrvs_name
result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status')
print("system status result before service was removed from system: {}".format(result))
assert result == "DOWN"

sysmon.check_unit_status('mock_bgp.service')
assert 'mock_bgp.service' not in sysmon.dnsrvs_name
result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status')
print("system status result after service was removed from system: {}".format(result))
assert result == "UP"


@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service']))
def test_check_unit_status_timer():
sysmon = Sysmonitor()
Expand Down

0 comments on commit e830491

Please sign in to comment.