From 4c8fb2c2e0d4e54f799fc76329642bc97e60635f Mon Sep 17 00:00:00 2001 From: Simon Ewing Date: Tue, 29 Oct 2024 10:56:33 -0700 Subject: [PATCH] xe: gemm: skip locking flag register on no-load blocks --- src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp | 7 +++++++ src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp | 2 +- src/gpu/intel/jit/gemm/generator/pieces/copy.cxx | 2 +- src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx | 4 +++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp b/src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp index 1fd0819040c..58ba6e54831 100644 --- a/src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp +++ b/src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp @@ -99,6 +99,13 @@ FlagRegister VirtualFlagAllocator::assignPhysical(VirtualFlag vflag) return pflag.toPhysical(); } +bool VirtualFlagAllocator::lock(VirtualFlag vflag, bool allowAlreadyLocked) { + bool wasLocked = isLocked(vflag); + if (wasLocked && !allowAlreadyLocked) stub("Illegally locking an already-locked flag register"); + locked |= mask(vflag); + return wasLocked; +} + bool VirtualFlagAllocator::canLock(int n) const { uint8_t unlocked = ~locked & ((1 << nflag) - 1); diff --git a/src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp b/src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp index 783a1c680c5..0e3cf11e637 100644 --- a/src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp +++ b/src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp @@ -78,7 +78,7 @@ class VirtualFlagAllocator { bool isVirtual(VirtualFlag vflag) { return (vflag.idx >= nflag); } - bool lock(VirtualFlag vflag) { bool wasLocked = isLocked(vflag); locked |= mask(vflag); return wasLocked; } + bool lock(VirtualFlag vflag, bool allowAlreadyLocked = false); void unlock(VirtualFlag vflag) { locked &= ~mask(vflag); } bool isLocked(VirtualFlag vflag) const { return !(~locked & mask(vflag)); } bool canLock(int n = 1) const; diff --git a/src/gpu/intel/jit/gemm/generator/pieces/copy.cxx b/src/gpu/intel/jit/gemm/generator/pieces/copy.cxx index 2be36f87020..93c12505f44 100644 --- a/src/gpu/intel/jit/gemm/generator/pieces/copy.cxx +++ b/src/gpu/intel/jit/gemm/generator/pieces/copy.cxx @@ -172,7 +172,7 @@ void BLASKernelGenerator::copyExecute(CopyPlan &&plan, CommonState &state) if (!state.vflagsEnabled()) for (int i = 0; i < nflag; i++) if (!raVFlag0.isFree(VirtualFlag{i})) - raVFlag0.lock(VirtualFlag{i}); + raVFlag0.lock(VirtualFlag{i}, true); auto raVFlag = raVFlag0; // If we have enough free flags, use those. diff --git a/src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx b/src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx index 91c4ffcd535..cbee13ed0ff 100644 --- a/src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx +++ b/src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx @@ -621,6 +621,8 @@ void BLASKernelGenerator::prepareSeriesRegisterBlockMasking(const vector::prepareSeriesRegisterBlockMasking(const vector