Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GCU] Complete RDMA Platform Validation Checks #2791

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 112 additions & 5 deletions generic_config_updater/field_operation_validators.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,117 @@
from sonic_py_common import device_info
import os
import re
import json
import jsonpointer
import subprocess
from sonic_py_common import device_info
from .gu_common import GenericConfigUpdaterError

def rdma_config_update_validator():
version_info = device_info.get_sonic_version_info()
asic_type = version_info.get('asic_type')

if (asic_type != 'mellanox' and asic_type != 'broadcom' and asic_type != 'cisco-8000'):
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
GCU_TABLE_MOD_CONF_FILE = f"{SCRIPT_DIR}/gcu_field_operation_validators.conf.json"

def get_asic_name():
asic = "unknown"

if os.path.exists(GCU_TABLE_MOD_CONF_FILE):
with open(GCU_TABLE_MOD_CONF_FILE, "r") as s:
gcu_field_operation_conf = json.load(s)
else:
raise GenericConfigUpdaterError("GCU table modification validators config file not found")

asic_mapping = gcu_field_operation_conf["helper_data"]["rdma_config_update_validator"]

if device_info.get_sonic_version_info()['asic_type'] == 'cisco-8000':
asic = "cisco-8000"
elif device_info.get_sonic_version_info()['asic_type'] == 'mellanox':
GET_HWSKU_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.hwsku"
spc1_hwskus = asic_mapping["mellanox_asics"]["spc1"]
proc = subprocess.Popen(GET_HWSKU_CMD, shell=True, universal_newlines=True, stdout=subprocess.PIPE)
output, err = proc.communicate()
hwsku = output.rstrip('\n')
if hwsku.lower() in [spc1_hwsku.lower() for spc1_hwsku in spc1_hwskus]:
asic = "spc1"
elif device_info.get_sonic_version_info()['asic_type'] == 'broadcom':
command = ["sudo", "lspci"]
Copy link
Contributor

@qiluo-msft qiluo-msft Apr 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

command

Runtime check is heavy. We could build the mapping offline, and move add the hareware skus into gcu_field_operation_validators.conf.json.

Then in runtime, you just check hwsku. #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved mapping to gcu_field_operation_validators.conf.json.

proc = subprocess.Popen(command, universal_newlines=True, stdout=subprocess.PIPE)
output, err = proc.communicate()
broadcom_asics = asic_mapping["broadcom_asics"]
for asic_shorthand, asic_descriptions in broadcom_asics.items():
if asic != "unknown":
break
for asic_description in asic_descriptions:
if asic_description in output:
asic = asic_shorthand
break

return asic


def rdma_config_update_validator(patch_element):
asic = get_asic_name()
if asic == "unknown":
Copy link
Contributor

@qiluo-msft qiluo-msft Apr 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

==

You can compare and return earlier. #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved comparison to the start of validator function

return False
version_info = device_info.get_sonic_version_info()
build_version = version_info.get('build_version')
version_substrings = build_version.split('.')
branch_version = None

for substring in version_substrings:
if substring.isdigit() and re.match(r'^\d{8}$', substring):
branch_version = substring

path = patch_element["path"]
table = jsonpointer.JsonPointer(path).parts[0]

# Helper function to return relevant cleaned paths, consdiers case where the jsonpatch value is a dict
# For paths like /PFC_WD/Ethernet112/action, remove Ethernet112 from the path so that we can clearly determine the relevant field (i.e. action, not Ethernet112)
def _get_fields_in_patch():
cleaned_fields = []

field_elements = jsonpointer.JsonPointer(path).parts[1:]
cleaned_field_elements = [elem for elem in field_elements if not any(char.isdigit() for char in elem)]
cleaned_field = '/'.join(cleaned_field_elements).lower()


if 'value' in patch_element.keys() and isinstance(patch_element['value'], dict):
for key in patch_element['value']:
cleaned_fields.append(cleaned_field+ '/' + key)
else:
cleaned_fields.append(cleaned_field)

return cleaned_fields

if os.path.exists(GCU_TABLE_MOD_CONF_FILE):
with open(GCU_TABLE_MOD_CONF_FILE, "r") as s:
gcu_field_operation_conf = json.load(s)
else:
raise GenericConfigUpdaterError("GCU table modification validators config file not found")

tables = gcu_field_operation_conf["tables"]
scenarios = tables[table]["validator_data"]["rdma_config_update_validator"]

cleaned_fields = _get_fields_in_patch()
for cleaned_field in cleaned_fields:
scenario = None
for key in scenarios.keys():
if cleaned_field in scenarios[key]["fields"]:
scenario = scenarios[key]
break

if scenario is None:
return False

if scenario["platforms"][asic] == "":
return False

if patch_element['op'] not in scenario["operations"]:
return False

if branch_version is not None:
if asic in scenario["platforms"]:
if branch_version < scenario["platforms"][asic]:
return False
else:
return False

return True
119 changes: 118 additions & 1 deletion generic_config_updater/gcu_field_operation_validators.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,128 @@
"e.g. 'show.acl.test_acl'",
"",
"field_operation_validators for a given table defines a list of validators that all must pass for modification to the specified field and table to be allowed",
"",
"validator_data provides data relevant to each validator",
""
],
"helper_data": {
"rdma_config_update_validator": {
"mellanox_asics": {
"spc1": [ "ACS-MSN2700", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010", "Mellanox-SN2700", "Mellanox-SN2700-D48C8" ]
},
"broadcom_asics": {
"th": ["Broadcom Limited Device b960", "Broadcom Limited Broadcom BCM56960"],
"th2": ["Broadcom Limited Device b971"],
"td2": ["Broadcom Limited Device b850", "Broadcom Limited Broadcom BCM56850"],
"td3": ["Broadcom Limited Device b870", "Broadcom Inc. and subsidiaries Device b870"]
}
}
},
"tables": {
"PFC_WD": {
"field_operation_validators": [ "generic_config_updater.field_operation_validators.rdma_config_update_validator" ]
"field_operation_validators": [ "generic_config_updater.field_operation_validators.rdma_config_update_validator" ],
"validator_data": {
"rdma_config_update_validator": {
"PFCWD enable/disable": {
"fields": [
"restoration_time",
"detection_time",
"action",
"global/poll_interval"
],
"operations": ["remove", "add", "replace"],
"platforms": {
"spc1": "20181100",
"td2": "20181100",
"th": "20181100",
"th2": "20181100",
"td3": "20181100",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

realized there is an error in my doc. this should be 202012. td3 is supported only from that release.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

"cisco-8000": "20201200"
}
}
}
}
},
"BUFFER_POOL": {
"field_operation_validators": [ "generic_config_updater.field_operation_validators.rdma_config_update_validator" ],
"validator_data": {
"rdma_config_update_validator": {
"Shared/headroom pool size changes": {
"fields": [
"ingress_lossless_pool/xoff",
"ingress_lossless_pool/size",
"egress_lossy_pool/size"
],
"operations": ["replace"],
"platforms": {
"spc1": "20191100",
"td2": "",
"th": "20221100",
"th2": "20221100",
"td3": "20221100",
"cisco-8000": ""
}
}
}
}
},
"BUFFER_PROFILE": {
"field_operation_validators": [ "generic_config_updater.field_operation_validators.rdma_config_update_validator" ],
"validator_data": {
"rdma_config_update_validator": {
"Dynamic threshold tuning": {
"fields": [
"dynamic_th"
],
"operations": ["replace"],
"platforms": {
"spc1": "20181100",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need to check build number less than 20220531? Because GCU support is introduced in 202205.
It by default satisfies this rule.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but I have this information in the conf file for completion and clarity.

I left the comparison in field_operation_validators.py as-is because this version comparison is always going to be necessary for cases only supported in 202211+. Do you think it's better to not define lower versions in the conf file, and only run this version comparison check if the higher version is defined in the conf file? I figure for this implementation, I would have to add code just to check if the version is defined, which may be an extra unnecessary step.

"td2": "20181100",
"th": "20181100",
"th2": "20181100",
"td3": "20201200",
"cisco-8000": ""
}
},
"PG headroom modification": {
"fields": [
"xoff"
],
"operations": ["replace"],
"platforms": {
"spc1": "20191100",
"td2": "",
isabelmsft marked this conversation as resolved.
Show resolved Hide resolved
"th": "20221100",
"th2": "20221100",
"td3": "20221100",
"cisco-8000": ""
}
}
}
}
},
"WRED_PROFILE": {
"field_operation_validators": [ "generic_config_updater.field_operation_validators.rdma_config_update_validator" ],
"validator_data": {
"rdma_config_update_validator": {
"ECN tuning": {
"fields": [
"azure_lossless/green_min_threshold",
"azure_lossless/green_max_threshold",
"azure_lossless/green_drop_probability"
],
"operations": ["replace"],
"platforms": {
"spc1": "20181100",
"td2": "20181100",
"th": "20181100",
"th2": "20181100",
"td3": "20201200",
"cisco-8000": ""
}
}
}
}
}
}
}
6 changes: 3 additions & 3 deletions generic_config_updater/gu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,15 @@ def validate_field_operation(self, old_config, target_config):
if any(op['op'] == operation and field == op['path'] for op in patch):
raise IllegalPatchOperationError("Given patch operation is invalid. Operation: {} is illegal on field: {}".format(operation, field))

def _invoke_validating_function(cmd):
def _invoke_validating_function(cmd, jsonpatch_element):
# cmd is in the format as <package/module name>.<method name>
method_name = cmd.split(".")[-1]
module_name = ".".join(cmd.split(".")[0:-1])
if module_name != "generic_config_updater.field_operation_validators" or "validator" not in method_name:
raise GenericConfigUpdaterError("Attempting to call invalid method {} in module {}. Module must be generic_config_updater.field_operation_validators, and method must be a defined validator".format(method_name, module_name))
module = importlib.import_module(module_name, package=None)
method_to_call = getattr(module, method_name)
return method_to_call()
return method_to_call(jsonpatch_element)

if os.path.exists(GCU_FIELD_OP_CONF_FILE):
with open(GCU_FIELD_OP_CONF_FILE, "r") as s:
Expand All @@ -194,7 +194,7 @@ def _invoke_validating_function(cmd):
validating_functions.update(tables.get(table, {}).get("field_operation_validators", []))

for function in validating_functions:
if not _invoke_validating_function(function):
if not _invoke_validating_function(function, element):
raise IllegalPatchOperationError("Modification of {} table is illegal- validating function {} returned False".format(table, function))


Expand Down
Loading