From ff302afd5a19b2641fcd4bdf86561aa4f1155e62 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Tue, 10 Jan 2023 13:14:37 -0600 Subject: [PATCH 1/4] feat(ti): set L2 cache data ram latency on A72 cores to 4 cycles The Cortex-A72 based cores on K3 platforms can be clocked fast enough that an extra latency cycle is needed to ensure correct L2 access. Set the latency here for all A72 cores. Signed-off-by: Andrew Davis Change-Id: I639091dd0d2de09572bf0f73ac404e306e336883 --- include/lib/cpus/aarch32/cortex_a72.h | 1 + include/lib/cpus/aarch64/cortex_a72.h | 1 + plat/ti/k3/board/j784s4/board.mk | 28 +++++++++++++++++++++++++++ plat/ti/k3/common/k3_helpers.S | 10 +++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 plat/ti/k3/board/j784s4/board.mk diff --git a/include/lib/cpus/aarch32/cortex_a72.h b/include/lib/cpus/aarch32/cortex_a72.h index 4b1af61ca3..c77484026f 100644 --- a/include/lib/cpus/aarch32/cortex_a72.h +++ b/include/lib/cpus/aarch32/cortex_a72.h @@ -47,6 +47,7 @@ #define CORTEX_A72_L2CTLR_TAG_RAM_LATENCY_SHIFT U(6) #define CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES U(0x2) +#define CORTEX_A72_L2_DATA_RAM_LATENCY_4_CYCLES U(0x3) #define CORTEX_A72_L2_TAG_RAM_LATENCY_2_CYCLES U(0x1) #define CORTEX_A72_L2_TAG_RAM_LATENCY_3_CYCLES U(0x2) diff --git a/include/lib/cpus/aarch64/cortex_a72.h b/include/lib/cpus/aarch64/cortex_a72.h index 28b440e19d..b6ed820393 100644 --- a/include/lib/cpus/aarch64/cortex_a72.h +++ b/include/lib/cpus/aarch64/cortex_a72.h @@ -65,6 +65,7 @@ #define CORTEX_A72_L2_DATA_RAM_LATENCY_MASK U(0x7) #define CORTEX_A72_L2_TAG_RAM_LATENCY_MASK U(0x7) #define CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES U(0x2) +#define CORTEX_A72_L2_DATA_RAM_LATENCY_4_CYCLES U(0x3) #define CORTEX_A72_L2_TAG_RAM_LATENCY_2_CYCLES U(0x1) #define CORTEX_A72_L2_TAG_RAM_LATENCY_3_CYCLES U(0x2) diff --git a/plat/ti/k3/board/j784s4/board.mk b/plat/ti/k3/board/j784s4/board.mk new file mode 100644 index 0000000000..c7fcb00160 --- /dev/null +++ b/plat/ti/k3/board/j784s4/board.mk @@ -0,0 +1,28 @@ +# +# Copyright (c) 2022, ARM Limited and Contributors. All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# + +BL32_BASE ?= 0x9e800000 +$(eval $(call add_define,BL32_BASE)) + +PRELOADED_BL33_BASE ?= 0x80080000 +$(eval $(call add_define,PRELOADED_BL33_BASE)) + +K3_HW_CONFIG_BASE ?= 0x82000000 +$(eval $(call add_define,K3_HW_CONFIG_BASE)) + +# Define sec_proxy usage as the full prioritized communication scheme +K3_SEC_PROXY_LITE := 0 +$(eval $(call add_define,K3_SEC_PROXY_LITE)) + +# Use a 4 cycle data RAM latency for J784s4 +K3_DATA_RAM_4_LATENCY := 1 +$(eval $(call add_define,K3_DATA_RAM_4_LATENCY)) + +# System coherency is managed in hardware +USE_COHERENT_MEM := 1 + +PLAT_INCLUDES += \ + -Iplat/ti/k3/board/j784s4/include \ diff --git a/plat/ti/k3/common/k3_helpers.S b/plat/ti/k3/common/k3_helpers.S index f4f7d18eac..cc9934c4e9 100644 --- a/plat/ti/k3/common/k3_helpers.S +++ b/plat/ti/k3/common/k3_helpers.S @@ -105,7 +105,15 @@ func plat_reset_handler /* Cortex-A72 specific settings */ a72: mrs x0, CORTEX_A72_L2CTLR_EL1 - orr x0, x0, #(CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES << CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT) +#if K3_DATA_RAM_4_LATENCY + /* Set L2 cache data RAM latency to 4 cycles */ + orr x0, x0, #(CORTEX_A72_L2_DATA_RAM_LATENCY_4_CYCLES << \ + CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT) +#else + /* Set L2 cache data RAM latency to 3 cycles */ + orr x0, x0, #(CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES << \ + CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT) +#endif msr CORTEX_A72_L2CTLR_EL1, x0 isb ret From 24d8e6d97d8128c0c6bb2d900bbd378b85757ad8 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Tue, 10 Jan 2023 13:25:42 -0600 Subject: [PATCH 2/4] feat(ti): set L2 cache ECC and and parity on A72 cores The Cortex-A72 based cores on K3 platforms have cache ECC and parity protection, enable these. Signed-off-by: Andrew Davis Change-Id: Icd00bc4aa9c1c48f0fb2a10ea66e75e0b146ef3c --- include/lib/cpus/aarch32/cortex_a72.h | 3 +++ include/lib/cpus/aarch64/cortex_a72.h | 3 +++ plat/ti/k3/common/k3_helpers.S | 3 +++ 3 files changed, 9 insertions(+) diff --git a/include/lib/cpus/aarch32/cortex_a72.h b/include/lib/cpus/aarch32/cortex_a72.h index c77484026f..3fbc465316 100644 --- a/include/lib/cpus/aarch32/cortex_a72.h +++ b/include/lib/cpus/aarch32/cortex_a72.h @@ -43,6 +43,9 @@ ******************************************************************************/ #define CORTEX_A72_L2CTLR p15, 1, c9, c0, 2 +#define CORTEX_A72_L2CTLR_EL1_ECC_AND_PARITY_ENABLE (ULL(1) << 21) +#define CORTEX_A72_L2CTLR_EL1_DATA_INLINE_ECC_ENABLE (ULL(1) << 20) + #define CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT U(0) #define CORTEX_A72_L2CTLR_TAG_RAM_LATENCY_SHIFT U(6) diff --git a/include/lib/cpus/aarch64/cortex_a72.h b/include/lib/cpus/aarch64/cortex_a72.h index b6ed820393..5d1d055d7e 100644 --- a/include/lib/cpus/aarch64/cortex_a72.h +++ b/include/lib/cpus/aarch64/cortex_a72.h @@ -57,6 +57,9 @@ ******************************************************************************/ #define CORTEX_A72_L2CTLR_EL1 S3_1_C11_C0_2 +#define CORTEX_A72_L2CTLR_EL1_ECC_AND_PARITY_ENABLE (ULL(1) << 21) +#define CORTEX_A72_L2CTLR_EL1_DATA_INLINE_ECC_ENABLE (ULL(1) << 20) + #define CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT U(0) #define CORTEX_A72_L2CTLR_DATA_RAM_SETUP_SHIFT U(5) #define CORTEX_A72_L2CTLR_TAG_RAM_LATENCY_SHIFT U(6) diff --git a/plat/ti/k3/common/k3_helpers.S b/plat/ti/k3/common/k3_helpers.S index cc9934c4e9..6742e74f1e 100644 --- a/plat/ti/k3/common/k3_helpers.S +++ b/plat/ti/k3/common/k3_helpers.S @@ -114,6 +114,9 @@ a72: orr x0, x0, #(CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES << \ CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT) #endif + /* Enable L2 ECC and parity with inline data */ + orr x0, x0, #CORTEX_A72_L2CTLR_EL1_ECC_AND_PARITY_ENABLE + orr x0, x0, #CORTEX_A72_L2CTLR_EL1_DATA_INLINE_ECC_ENABLE msr CORTEX_A72_L2CTLR_EL1, x0 isb ret From a28799c696cc4073b55719de6ed09298b7b13604 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Thu, 12 Jan 2023 09:32:33 -0600 Subject: [PATCH 3/4] feat(ti): set snoop-delayed exclusive handling on A72 cores Snoop requests should not be responded to during atomic operations. This can be handled by the interconnect using its global monitor or by the core's SCU delaying to check for the corresponding atomic monitor state. TI SoCs take the second approach. Set the snoop-delayed exclusive handling bit to inform the core it needs to delay responses to perform this check. As J784s4 is currently the only SoC with multiple A72 clusters, limit this delay to only that device. Signed-off-by: Andrew Davis Change-Id: I875f64e4f53d47a9a0ccbf3415edc565be7f84d9 --- include/lib/cpus/aarch32/cortex_a72.h | 1 + include/lib/cpus/aarch64/cortex_a72.h | 1 + plat/ti/k3/board/j784s4/board.mk | 4 ++++ plat/ti/k3/common/k3_helpers.S | 13 +++++++++++++ 4 files changed, 19 insertions(+) diff --git a/include/lib/cpus/aarch32/cortex_a72.h b/include/lib/cpus/aarch32/cortex_a72.h index 3fbc465316..954f7361f9 100644 --- a/include/lib/cpus/aarch32/cortex_a72.h +++ b/include/lib/cpus/aarch32/cortex_a72.h @@ -37,6 +37,7 @@ #define CORTEX_A72_CPUACTLR_NO_ALLOC_WBWA (ULL(1) << 49) #define CORTEX_A72_CPUACTLR_DCC_AS_DCCI (ULL(1) << 44) #define CORTEX_A72_CPUACTLR_DIS_INSTR_PREFETCH (ULL(1) << 32) +#define CORTEX_A72_CPUACTLR_DELAY_EXCLUSIVE_SNOOP (ULL(1) << 31) /******************************************************************************* * L2 Control register specific definitions. diff --git a/include/lib/cpus/aarch64/cortex_a72.h b/include/lib/cpus/aarch64/cortex_a72.h index 5d1d055d7e..dfbb593482 100644 --- a/include/lib/cpus/aarch64/cortex_a72.h +++ b/include/lib/cpus/aarch64/cortex_a72.h @@ -37,6 +37,7 @@ #define CORTEX_A72_CPUACTLR_EL1_NO_ALLOC_WBWA (ULL(1) << 49) #define CORTEX_A72_CPUACTLR_EL1_DCC_AS_DCCI (ULL(1) << 44) #define CORTEX_A72_CPUACTLR_EL1_DIS_INSTR_PREFETCH (ULL(1) << 32) +#define CORTEX_A72_CPUACTLR_EL1_DELAY_EXCLUSIVE_SNOOP (ULL(1) << 31) /******************************************************************************* * L2 Auxiliary Control register specific definitions. diff --git a/plat/ti/k3/board/j784s4/board.mk b/plat/ti/k3/board/j784s4/board.mk index c7fcb00160..68ba1b527e 100644 --- a/plat/ti/k3/board/j784s4/board.mk +++ b/plat/ti/k3/board/j784s4/board.mk @@ -21,6 +21,10 @@ $(eval $(call add_define,K3_SEC_PROXY_LITE)) K3_DATA_RAM_4_LATENCY := 1 $(eval $(call add_define,K3_DATA_RAM_4_LATENCY)) +# Delay snoop exclusive handling for J784s4 +K3_EXCLUSIVE_SNOOP_DELAY := 1 +$(eval $(call add_define,K3_EXCLUSIVE_SNOOP_DELAY)) + # System coherency is managed in hardware USE_COHERENT_MEM := 1 diff --git a/plat/ti/k3/common/k3_helpers.S b/plat/ti/k3/common/k3_helpers.S index 6742e74f1e..f997b46783 100644 --- a/plat/ti/k3/common/k3_helpers.S +++ b/plat/ti/k3/common/k3_helpers.S @@ -118,6 +118,19 @@ a72: orr x0, x0, #CORTEX_A72_L2CTLR_EL1_ECC_AND_PARITY_ENABLE orr x0, x0, #CORTEX_A72_L2CTLR_EL1_DATA_INLINE_ECC_ENABLE msr CORTEX_A72_L2CTLR_EL1, x0 + + mrs x0, CORTEX_A72_L2ACTLR_EL1 + /* Enable L2 UniqueClean evictions with data */ + orr x0, x0, #CORTEX_A72_L2ACTLR_ENABLE_UNIQUE_CLEAN + msr CORTEX_A72_L2ACTLR_EL1, x0 + +#if K3_EXCLUSIVE_SNOOP_DELAY + mrs x0, CORTEX_A72_CPUACTLR_EL1 + /* Set Snoop-delayed exclusive handling */ + orr x0, x0, #CORTEX_A72_CPUACTLR_EL1_DELAY_EXCLUSIVE_SNOOP + msr CORTEX_A72_CPUACTLR_EL1, x0 +#endif + isb ret endfunc plat_reset_handler From 22cd7f4724c30a8d82630bb0db5832f1239889ad Mon Sep 17 00:00:00 2001 From: Andrew Yong Date: Wed, 9 Oct 2024 23:09:03 +0800 Subject: [PATCH 4/4] plat/nxp: set snoop-delayed exclusive handling on A72 cores Signed-off-by: Andrew Yong --- plat/nxp/common/aarch64/ls_helpers.S | 22 ++++++++++++++++++- .../nxp/soc-lx2160a/aarch64/lx2160a_helpers.S | 1 + 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/plat/nxp/common/aarch64/ls_helpers.S b/plat/nxp/common/aarch64/ls_helpers.S index 19ea9e5d4b..1d9829675d 100644 --- a/plat/nxp/common/aarch64/ls_helpers.S +++ b/plat/nxp/common/aarch64/ls_helpers.S @@ -6,8 +6,8 @@ */ #include +#include #include -#include #include @@ -23,6 +23,7 @@ .globl _disable_ldstr_pfetch_A53 .globl _disable_ldstr_pfetch_A72 .global _set_smmu_pagesz_64 + .global _enable_delay_exclusive_snoop_A72 /* int plat_crash_console_init(void) * Function to initialize the crash console @@ -192,3 +193,22 @@ func _set_smmu_pagesz_64 ret endfunc _set_smmu_pagesz_64 + +/* + * Function enables snoop-delayed exclusive handling on A72 cores + */ +func _enable_delay_exclusive_snoop_A72 + mrs x0, CORTEX_A72_CPUACTLR_EL1 + tst x0, #CORTEX_A72_CPUACTLR_EL1_DELAY_EXCLUSIVE_SNOOP + b.eq 1f + b 2f +.align 6 +1: + dsb sy + isb + orr x0, x0, #CORTEX_A72_CPUACTLR_EL1_DELAY_EXCLUSIVE_SNOOP + msr CORTEX_A72_CPUACTLR_EL1, x0 + isb +2: + ret +endfunc _enable_delay_exclusive_snoop_A72 diff --git a/plat/nxp/soc-lx2160a/aarch64/lx2160a_helpers.S b/plat/nxp/soc-lx2160a/aarch64/lx2160a_helpers.S index c364decd17..935309bf41 100644 --- a/plat/nxp/soc-lx2160a/aarch64/lx2160a_helpers.S +++ b/plat/nxp/soc-lx2160a/aarch64/lx2160a_helpers.S @@ -37,6 +37,7 @@ func plat_reset_handler bl apply_platform_errata #if defined(IMAGE_BL31) + bl _enable_delay_exclusive_snoop_A72 ldr x0, =POLICY_SMMU_PAGESZ_64K cbz x0, 1f /* Set the SMMU page size in the sACR register */