Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] Use the explicit mst device of SPC during fw-upgrade #98

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -1049,8 +1049,8 @@ sudo rm -rf $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
sudo cp platform/mellanox/nv-syncd-shared/nv-syncd-shared.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable nv-syncd-shared

# Install minicom package
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install minicom
# Install required packages specific for mellanox platform
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install minicom xmlstarlet

sudo LANG=C chroot $FILESYSTEM_ROOT systemctl disable rshim.service
{% endif %}
Expand Down
56 changes: 36 additions & 20 deletions platform/mellanox/mlnx-fw-upgrade.j2
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
{#-
Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES.
Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES.
Apache-2.0

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down Expand Up @@ -32,6 +35,7 @@ declare -r EXIT_SUCCESS="0"
declare -r EXIT_FAILURE="1"
declare -r FW_ALREADY_UPDATED_FAILURE="2"

declare -r QUERY_XML="mlxfwmanager --query-format XML"
declare -r QUERY_CMD="mlxfwmanager --query"
declare -r LIST_CONTENT_CMD="mlxfwmanager --list-content"
declare -r BURN_CMD="mlxfwmanager -u -f -y"
Expand Down Expand Up @@ -163,21 +167,22 @@ function WaitForDevice() {
local -i QUERY_RETRY_COUNT_MAX="10"
local -i QUERY_RETRY_COUNT="0"

${QUERY_CMD} > /dev/null
local SPC_MST_DEV=$(GetSPCMstDevice)

while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("$?" -ne "${EXIT_SUCCESS}") ]]; do
while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${SPC_MST_DEV}" == "${UNKN_MST}") ]]; do
sleep 1s
((QUERY_RETRY_COUNT++))
output=$(eval ${MFT_DIAGNOSIS_FLAGS} ${QUERY_CMD}) > /dev/null
SPC_MST_DEV=$(GetSPCMstDevice)
done

ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
# Exit failure and print the detailed information
echo "$output"
if [[ "${SPC_MST_DEV}" == "${UNKN_MST}" ]]; then
# Couldn't Detect the Spectrum ASIC. Exit failure and print the detailed information
output=$(${QUERY_CMD})
failure_msg="${output#*Fail : }"
ExitFailure "FW Query command: ${QUERY_CMD} failed to wait for device with error: ${failure_msg}"
ExitFailure "FW Query command: ${QUERY_CMD} failed to detect spectrum device with error: ${failure_msg}"
fi

LogInfo "Spectrum ASIC successfully detected at ${SPC_MST_DEV}"
}

function GetAsicType() {
Expand Down Expand Up @@ -206,8 +211,8 @@ function GetAsicType() {
exit "${EXIT_FAILURE}"
}

function GetMstDevice() {
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"
function GetSPCMstDevice() {
local _MST_DEVICE=$(${QUERY_XML} | xmlstarlet sel -t -m "//Device[contains(@type,'Spectrum')]" -v @pciName | head -n 1)

if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
Expand All @@ -218,6 +223,19 @@ function GetMstDevice() {
exit "${EXIT_SUCCESS}"
}

function GetXPathXML() {
local xpath=$1
local xml_file=$2

val=$(xmlstarlet sel -t -v "${xpath}" ${xml_file})
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
ExitFailure "XML Fetch failed for path: ${xpath}, file: $(cat ${xml_file})"
fi

echo ${val}
}

function RunCmd() {
local ERROR_CODE="${EXIT_SUCCESS}"

Expand Down Expand Up @@ -247,7 +265,7 @@ function RunFwUpdateCmd() {

if [[ "${ERROR_CODE}" == "${FW_ALREADY_UPDATED_FAILURE}" ]]; then
LogInfo "FW reactivation is required. Reactivating and updating FW ..."
local -r _MST_DEVICE="$(GetMstDevice)"
local -r _MST_DEVICE="$(GetSPCMstDevice)"
local -r _CMD="flint -d ${_MST_DEVICE} ir"
output=$(eval "${_CMD}")

Expand Down Expand Up @@ -285,15 +303,14 @@ function UpgradeFW() {
ExitFailure "no such file: ${_FW_FILE}"
fi

RunCmd "${QUERY_CMD} -o ${QUERY_FILE}"
local -r _FW_CURRENT_INFO="$(grep FW ${QUERY_FILE})"
local -r _FW_CURRENT="$(echo ${_FW_CURRENT_INFO} | cut -f2 -d' ')"
local -r _PSID_INFO="$(grep PSID ${QUERY_FILE})"
local -r _PSID="$(echo ${_PSID_INFO} | cut -f2 -d' ')"
local -r _MST_DEVICE=$(GetSPCMstDevice)
RunCmd "${QUERY_XML} -d ${_MST_DEVICE} -o ${QUERY_FILE}"
local -r _FW_CURRENT=$(GetXPathXML "//Device/Versions/FW/@current" ${QUERY_FILE})
local -r _PSID=$(GetXPathXML "//Device/@psid" ${QUERY_FILE})

RunCmd "${LIST_CONTENT_CMD} -i ${_FW_FILE} -o ${LIST_CONTENT_FILE}"
RunCmd "${LIST_CONTENT_CMD} -i ${_FW_FILE} -d ${_MST_DEVICE} -o ${LIST_CONTENT_FILE}"
local -r _FW_AVAILABLE_INFO="$(grep ${_PSID} ${LIST_CONTENT_FILE})"
local -r _FW_AVAILABLE="$(echo ${_FW_AVAILABLE_INFO} | cut -f4 -d' ')"
local -r _FW_AVAILABLE="$(echo ${_FW_AVAILABLE_INFO} | awk '{print $4}')"

if [[ -z "${_FW_CURRENT}" ]]; then
ExitFailure "could not retreive current FW version"
Expand All @@ -307,7 +324,6 @@ function UpgradeFW() {
ExitSuccess "firmware is up to date"
else
LogNotice "firmware upgrade is required. Installing compatible version..."
local -r _MST_DEVICE="$(GetMstDevice)"
if [[ "${_MST_DEVICE}" = "${UNKN_MST}" ]]; then
LogWarning "could not find fastest mst device, using default device"
RunFwUpdateCmd "-i ${_FW_FILE}"
Expand Down