|
7 | 7 | #include "mlx5_core.h"
|
8 | 8 | #include "wq.h"
|
9 | 9 |
|
| 10 | +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) |
| 11 | +#include <asm/neon.h> |
| 12 | +#endif |
| 13 | + |
10 | 14 | #define TEST_WC_NUM_WQES 255
|
11 | 15 | #define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES))
|
12 | 16 | #define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ
|
@@ -255,6 +259,27 @@ static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq)
|
255 | 259 | mlx5_wq_destroy(&sq->wq_ctrl);
|
256 | 260 | }
|
257 | 261 |
|
| 262 | +static void mlx5_iowrite64_copy(struct mlx5_wc_sq *sq, __be32 mmio_wqe[16], |
| 263 | + size_t mmio_wqe_size, unsigned int offset) |
| 264 | +{ |
| 265 | +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) |
| 266 | + if (cpu_has_neon()) { |
| 267 | + kernel_neon_begin(); |
| 268 | + asm volatile |
| 269 | + (".arch_extension simd;\n\t" |
| 270 | + "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" |
| 271 | + "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" |
| 272 | + : |
| 273 | + : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) |
| 274 | + : "memory", "v0", "v1", "v2", "v3"); |
| 275 | + kernel_neon_end(); |
| 276 | + return; |
| 277 | + } |
| 278 | +#endif |
| 279 | + __iowrite64_copy(sq->bfreg.map + offset, mmio_wqe, |
| 280 | + mmio_wqe_size / 8); |
| 281 | +} |
| 282 | + |
258 | 283 | static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, unsigned int *offset,
|
259 | 284 | bool signaled)
|
260 | 285 | {
|
@@ -289,8 +314,7 @@ static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, unsigned int *offset,
|
289 | 314 | */
|
290 | 315 | wmb();
|
291 | 316 |
|
292 |
| - __iowrite64_copy(sq->bfreg.map + *offset, mmio_wqe, |
293 |
| - sizeof(mmio_wqe) / 8); |
| 317 | + mlx5_iowrite64_copy(sq, mmio_wqe, sizeof(mmio_wqe), *offset); |
294 | 318 |
|
295 | 319 | *offset ^= buf_size;
|
296 | 320 | }
|
|
0 commit comments