Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xtensa: use generic cache API #50136

Merged
merged 7 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arch/xtensa/core/xtensa-asm2.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
#ifdef CONFIG_KERNEL_COHERENCE
__ASSERT((((size_t)stack) % XCHAL_DCACHE_LINESIZE) == 0, "");
__ASSERT((((size_t)stack_ptr) % XCHAL_DCACHE_LINESIZE) == 0, "");
z_xtensa_cache_flush_inv(stack, (char *)stack_ptr - (char *)stack);
sys_cache_data_flush_and_invd_range(stack, (char *)stack_ptr - (char *)stack);
#endif
}

Expand Down
14 changes: 7 additions & 7 deletions arch/xtensa/include/kernel_arch_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#ifndef _ASMLANGUAGE
#include <kernel_internal.h>
#include <string.h>
#include <zephyr/arch/xtensa/cache.h>
#include <zephyr/cache.h>
#include <zsr.h>

#ifdef __cplusplus
Expand All @@ -33,7 +33,7 @@ static ALWAYS_INLINE void arch_kernel_init(void)
/* Make sure we don't have live data for unexpected cached
* regions due to boot firmware
*/
z_xtensa_cache_flush_inv_all();
sys_cache_data_flush_and_invd_all();

/* Our cache top stash location might have junk in it from a
* pre-boot environment. Must be zero or valid!
Expand Down Expand Up @@ -115,7 +115,7 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
* automatically overwritten as needed.
*/
if (curr_cpu != new_thread->arch.last_cpu) {
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
sys_cache_data_invd_range((void *)nsp, (nstack + nsz) - nsp);
}
old_thread->arch.last_cpu = curr_cpu;

Expand Down Expand Up @@ -143,8 +143,8 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
* to the stack top stashed in a special register.
*/
if (old_switch_handle != NULL) {
z_xtensa_cache_flush((void *)osp, (ostack + osz) - osp);
z_xtensa_cache_inv((void *)ostack, osp - ostack);
sys_cache_data_flush_range((void *)osp, (ostack + osz) - osp);
sys_cache_data_invd_range((void *)ostack, osp - ostack);
} else {
/* When in a switch, our current stack is the outbound
* stack. Flush the single line containing the stack
Expand All @@ -155,8 +155,8 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
*/
__asm__ volatile("mov %0, a1" : "=r"(osp));
osp -= 16;
z_xtensa_cache_flush((void *)osp, 1);
z_xtensa_cache_inv((void *)ostack, osp - ostack);
sys_cache_data_flush_range((void *)osp, 1);
sys_cache_data_invd_range((void *)ostack, osp - ostack);

uint32_t end = ostack + osz;

Expand Down
20 changes: 11 additions & 9 deletions drivers/mm/mm_drv_intel_adsp_mtl_tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
*/

#include "mm_drv_intel_adsp.h"

#include <soc_util.h>
#include <zephyr/drivers/mm/mm_drv_intel_adsp_mtl_tlb.h>
#include <zephyr/drivers/mm/mm_drv_bank.h>
#include <zephyr/debug/sparse.h>
#include <zephyr/cache.h>

static struct k_spinlock tlb_lock;
extern struct k_spinlock sys_mm_drv_common_lock;
Expand Down Expand Up @@ -269,7 +270,7 @@ int sys_mm_drv_map_page(void *virt, uintptr_t phys, uint32_t flags)
* Invalid the cache of the newly mapped virtual page to
* avoid stale data.
*/
z_xtensa_cache_inv(virt, CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_invd_range(virt, CONFIG_MM_DRV_PAGE_SIZE);

k_spin_unlock(&tlb_lock, key);

Expand Down Expand Up @@ -356,7 +357,7 @@ int sys_mm_drv_unmap_page(void *virt)
* Flush the cache to make sure the backing physical page
* has the latest data.
*/
z_xtensa_cache_flush(virt, CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_flush_range(virt, CONFIG_MM_DRV_PAGE_SIZE);

entry_idx = get_tlb_entry_idx(va);

Expand Down Expand Up @@ -581,8 +582,8 @@ int sys_mm_drv_move_region(void *virt_old, size_t size, void *virt_new,
* flush the cache to make sure the backing physical
* pages have the new data.
*/
z_xtensa_cache_flush(virt_new, size);
z_xtensa_cache_flush_inv(virt_old, size);
sys_cache_data_flush_range(virt_new, size);
sys_cache_data_flush_and_invd_range(virt_old, size);

return ret;
}
Expand All @@ -603,7 +604,7 @@ int sys_mm_drv_move_array(void *virt_old, size_t size, void *virt_new,
* flush the cache to make sure the backing physical
* pages have the new data.
*/
z_xtensa_cache_flush(va_new, size);
sys_cache_data_flush_range(va_new, size);

return ret;
}
Expand Down Expand Up @@ -722,7 +723,8 @@ static void adsp_mm_save_context(void *storage_buffer)
* all cache data has been flushed before
* do this for pages to remap only
*/
z_xtensa_cache_inv(UINT_TO_POINTER(phys_addr), CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_invd_range(UINT_TO_POINTER(phys_addr),
CONFIG_MM_DRV_PAGE_SIZE);

/* Enable the translation in the TLB entry */
entry |= TLB_ENABLE_BIT;
Expand All @@ -746,7 +748,7 @@ static void adsp_mm_save_context(void *storage_buffer)
*((uint32_t *) location) = 0;
location += sizeof(uint32_t);

z_xtensa_cache_flush(
sys_cache_data_flush_range(
storage_buffer,
(uint32_t)location - (uint32_t)storage_buffer);

Expand Down Expand Up @@ -788,7 +790,7 @@ __imr void adsp_mm_restore_context(void *storage_buffer)
bmemcpy(UINT_TO_POINTER(phys_addr_uncached),
location,
CONFIG_MM_DRV_PAGE_SIZE);
z_xtensa_cache_inv(UINT_TO_POINTER(phys_addr), CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_invd_range(UINT_TO_POINTER(phys_addr), CONFIG_MM_DRV_PAGE_SIZE);

location += CONFIG_MM_DRV_PAGE_SIZE;
phys_addr = *((uint32_t *) location);
Expand Down
9 changes: 5 additions & 4 deletions drivers/mm/mm_drv_intel_adsp_tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <zephyr/sys/check.h>
#include <zephyr/sys/mem_manage.h>
#include <zephyr/sys/util.h>
#include <zephyr/cache.h>

#include <soc.h>
#include <adsp_memory.h>
Expand Down Expand Up @@ -132,7 +133,7 @@ int sys_mm_drv_map_page(void *virt, uintptr_t phys, uint32_t flags)
* Invalid the cache of the newly mapped virtual page to
* avoid stale data.
*/
z_xtensa_cache_inv(virt, CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_invd_range(virt, CONFIG_MM_DRV_PAGE_SIZE);

k_spin_unlock(&tlb_lock, key);

Expand Down Expand Up @@ -185,7 +186,7 @@ int sys_mm_drv_unmap_page(void *virt)
* Flush the cache to make sure the backing physical page
* has the latest data.
*/
z_xtensa_cache_flush(virt, CONFIG_MM_DRV_PAGE_SIZE);
sys_cache_data_flush_range(virt, CONFIG_MM_DRV_PAGE_SIZE);

entry_idx = get_tlb_entry_idx(va);

Expand Down Expand Up @@ -302,7 +303,7 @@ int sys_mm_drv_move_region(void *virt_old, size_t size, void *virt_new,
* flush the cache to make sure the backing physical
* pages have the new data.
*/
z_xtensa_cache_flush(va_new, size);
sys_cache_data_flush_range(va_new, size);

return ret;
}
Expand All @@ -323,7 +324,7 @@ int sys_mm_drv_move_array(void *virt_old, size_t size, void *virt_new,
* flush the cache to make sure the backing physical
* pages have the new data.
*/
z_xtensa_cache_flush(va_new, size);
sys_cache_data_flush_range(va_new, size);

return ret;
}
14 changes: 7 additions & 7 deletions drivers/neural_net/intel_gna.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static void intel_gna_interrupt_handler(const struct device *dev)
if (k_msgq_get(&gna->request_queue, &pending_req, K_NO_WAIT) != 0) {
LOG_ERR("Pending request queue is empty");
} else {
z_xtensa_cache_inv(pending_req.model->output,
sys_cache_data_invd_range(pending_req.model->output,
pending_req.output_len);
/* copy output from the model buffer to application buffer */
memcpy(pending_req.output, pending_req.model->output,
Expand Down Expand Up @@ -194,7 +194,7 @@ static int intel_gna_initialize(const struct device *dev)
dev->name, gna_config_desc.vamaxaddr);

/* flush cache */
z_xtensa_cache_flush((void *)&gna_config_desc, sizeof(gna_config_desc));
sys_cache_data_flush_range((void *)&gna_config_desc, sizeof(gna_config_desc));

LOG_INF("%s: initialized (max %u models & max %u pending requests)",
dev->name, GNA_MAX_NUM_MODELS,
Expand Down Expand Up @@ -334,7 +334,7 @@ static int intel_gna_register_model(const struct device *dev,

intel_gna_setup_page_table(model->rw_region, rw_size,
virtual_base);
z_xtensa_cache_flush(model->rw_region, rw_size);
sys_cache_data_flush_range(model->rw_region, rw_size);
}

if (model->ro_region == NULL) {
Expand All @@ -352,8 +352,8 @@ static int intel_gna_register_model(const struct device *dev,
intel_gna_setup_page_table(ro_region, ro_size,
(void *)((uint32_t)virtual_base + rw_size));

z_xtensa_cache_flush(ro_region, ro_size);
z_xtensa_cache_flush(gna_page_table, sizeof(gna_page_table));
sys_cache_data_flush_range(ro_region, ro_size);
sys_cache_data_flush_range(gna_page_table, sizeof(gna_page_table));

/* copy the model pointers */
gna_model->model = *model;
Expand Down Expand Up @@ -461,12 +461,12 @@ static int intel_gna_infer(const struct device *dev,

/* copy input */
memcpy(handle->input, req->input, input_size);
z_xtensa_cache_flush(handle->input, input_size);
sys_cache_data_flush_range(handle->input, input_size);

/* assign layer descriptor base address to configuration descriptor */
gna_config_desc.labase = (uint32_t)handle->vabase;
gna_config_desc.lacnt = (uint16_t)header->layer_count;
z_xtensa_cache_flush(&gna_config_desc, sizeof(gna_config_desc));
sys_cache_data_flush_range(&gna_config_desc, sizeof(gna_config_desc));

gna->state = GNA_STATE_ACTIVE;
regs->gnactrl = (regs->gnactrl & ~GNA_CTRL_INTR_DISABLE) |
Expand Down
2 changes: 2 additions & 0 deletions include/zephyr/arch/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#if defined(CONFIG_ARM64)
#include <zephyr/arch/arm64/cache.h>
#elif defined(CONFIG_XTENSA)
#include <zephyr/arch/xtensa/cache.h>
#endif

#if defined(CONFIG_DCACHE)
Expand Down
137 changes: 137 additions & 0 deletions include/zephyr/arch/xtensa/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <xtensa/config/core.h>
#include <zephyr/arch/common/addr_types.h>
#include <zephyr/arch/xtensa/gdbstub.h>
#include <zephyr/debug/sparse.h>

#ifdef CONFIG_KERNEL_COHERENCE
#define ARCH_STACK_PTR_ALIGN XCHAL_DCACHE_LINESIZE
Expand Down Expand Up @@ -84,6 +85,142 @@ static ALWAYS_INLINE void arch_nop(void)
}
#endif


#if defined(CONFIG_XTENSA_RPO_CACHE)
#if defined(CONFIG_ARCH_HAS_COHERENCE)
static inline bool arch_mem_coherent(void *ptr)
{
size_t addr = (size_t) ptr;

return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
}
#endif

static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
{
/* The math here is all compile-time: when the two regions
* differ by a power of two, we can convert between them by
* setting or clearing just one bit. Otherwise it needs two
* operations.
*/
uint32_t rxor = (rto ^ rfrom) << 29;

rto <<= 29;
if (Z_IS_POW2(rxor)) {
if ((rxor & rto) == 0) {
return addr & ~rxor;
} else {
return addr | rxor;
}
} else {
return (addr & ~(7U << 29)) | rto;
}
}
/**
* @brief Return cached pointer to a RAM address
*
* The Xtensa coherence architecture maps addressable RAM twice, in
* two different 512MB regions whose L1 cache settings can be
* controlled independently. So for any given pointer, it is possible
* to convert it to and from a cached version.
*
* This function takes a pointer to any addressable object (either in
* cacheable memory or not) and returns a pointer that can be used to
* refer to the same memory through the L1 data cache. Data read
* through the resulting pointer will reflect locally cached values on
* the current CPU if they exist, and writes will go first into the
* cache and be written back later.
*
* @see arch_xtensa_uncached_ptr()
*
* @param ptr A pointer to a valid C object
* @return A pointer to the same object via the L1 dcache
*/
static inline void __sparse_cache *arch_xtensa_cached_ptr(void *ptr)
{
return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr,
CONFIG_XTENSA_CACHED_REGION,
CONFIG_XTENSA_UNCACHED_REGION);
}

/**
* @brief Return uncached pointer to a RAM address
*
* The Xtensa coherence architecture maps addressable RAM twice, in
* two different 512MB regions whose L1 cache settings can be
* controlled independently. So for any given pointer, it is possible
* to convert it to and from a cached version.
*
* This function takes a pointer to any addressable object (either in
* cacheable memory or not) and returns a pointer that can be used to
* refer to the same memory while bypassing the L1 data cache. Data
* in the L1 cache will not be inspected nor modified by the access.
*
* @see arch_xtensa_cached_ptr()
*
* @param ptr A pointer to a valid C object
* @return A pointer to the same object bypassing the L1 dcache
*/
static inline void *arch_xtensa_uncached_ptr(void __sparse_cache *ptr)
{
return (void *)z_xtrpoflip((__sparse_force uint32_t)ptr,
CONFIG_XTENSA_UNCACHED_REGION,
CONFIG_XTENSA_CACHED_REGION);
}

/* Utility to generate an unrolled and optimal[1] code sequence to set
* the RPO TLB registers (contra the HAL cacheattr macros, which
* generate larger code and can't be called from C), based on the
* KERNEL_COHERENCE configuration in use. Selects RPO attribute "2"
* for regions (including MMIO registers in region zero) which want to
* bypass L1, "4" for the cached region which wants writeback, and
* "15" (invalid) elsewhere.
*
* Note that on cores that have the "translation" option set, we need
* to put an identity mapping in the high bits. Also per spec
* changing the current code region (by definition cached) requires
* that WITLB be followed by an ISYNC and that both instructions live
* in the same cache line (two 3-byte instructions fit in an 8-byte
* aligned region, so that's guaranteed not to cross a cache line
* boundary).
*
* [1] With the sole exception of gcc's infuriating insistence on
* emitting a precomputed literal for addr + addrincr instead of
* computing it with a single ADD instruction from values it already
* has in registers. Explicitly assigning the variables to registers
* via an attribute works, but then emits needless MOV instructions
* instead. I tell myself it's just 32 bytes of .text, but... Sigh.
*/
#define _REGION_ATTR(r) \
((r) == 0 ? 2 : \
((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \
((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))

#define _SET_ONE_TLB(region) do { \
uint32_t attr = _REGION_ATTR(region); \
if (XCHAL_HAVE_XLT_CACHEATTR) { \
attr |= addr; /* RPO with translation */ \
} \
if (region != CONFIG_XTENSA_CACHED_REGION) { \
__asm__ volatile("wdtlb %0, %1; witlb %0, %1" \
:: "r"(attr), "r"(addr)); \
} else { \
__asm__ volatile("wdtlb %0, %1" \
:: "r"(attr), "r"(addr)); \
__asm__ volatile("j 1f; .align 8; 1:"); \
__asm__ volatile("witlb %0, %1; isync" \
:: "r"(attr), "r"(addr)); \
} \
addr += addrincr; \
} while (0)

#define ARCH_XTENSA_SET_RPO_TLB() do { \
register uint32_t addr = 0, addrincr = 0x20000000; \
FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \
} while (0)

#endif

#endif /* !defined(_ASMLANGUAGE) && !defined(__ASSEMBLER__) */

#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_ARCH_H_ */
Loading