From 79d418bc5e637fda935756ca1ddb21bc63481d0f Mon Sep 17 00:00:00 2001 From: Yuanyuan Chen Date: Wed, 10 Sep 2025 14:25:00 +0800 Subject: [PATCH] Bump ASMJIT to 1.18 Signed-off-by: Yuanyuan Chen --- external/asmjit | 2 +- src/EmbeddingSpMDM.cc | 64 +++++++++---------- src/EmbeddingSpMDMNBit.cc | 58 ++++++++--------- src/FbgemmI64.cc | 38 +++++------ src/GenerateI8Depthwise.cc | 26 ++++---- src/GenerateKernelDirectConvU8S8S32ACC32.cc | 70 ++++++++++----------- src/GenerateKernelU8S8S32ACC16.cc | 30 ++++----- src/GenerateKernelU8S8S32ACC16Avx512.cc | 38 +++++------ src/GenerateKernelU8S8S32ACC32.cc | 36 +++++------ src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc | 38 +++++------ src/GroupwiseConv.cc | 48 +++++++------- src/RowWiseSparseAdagradFused.cc | 50 +++++++-------- src/SparseAdagrad.cc | 46 +++++++------- 13 files changed, 272 insertions(+), 272 deletions(-) diff --git a/external/asmjit b/external/asmjit index a3199e8857..32b5f78700 160000 --- a/external/asmjit +++ b/external/asmjit @@ -1 +1 @@ -Subproject commit a3199e8857792cd10b7589ff5d58343d2c9008ea +Subproject commit 32b5f78700e68684066feb99d583d6fb2a4e3b22 diff --git a/src/EmbeddingSpMDM.cc b/src/EmbeddingSpMDM.cc index 1ab87e3950..545abc14ef 100644 --- a/src/EmbeddingSpMDM.cc +++ b/src/EmbeddingSpMDM.cc @@ -309,28 +309,28 @@ GenEmbeddingSpMDMLookup< frame.init(func); if constexpr (instSet == inst_set_t::avx2) { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); } else { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); } - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kGp, reg_id == 15 - ? asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) - : asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); + ? asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) + : asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); if constexpr (ROWWISE_SPARSE) { - args.assignAll( + args.assign_all( output_size, index_size, data_size, @@ -342,7 +342,7 @@ GenEmbeddingSpMDMLookup< compressed_indices_table, scratchReg1_); } else { - args.assignAll( + args.assign_all( output_size, index_size, data_size, @@ -354,11 +354,11 @@ GenEmbeddingSpMDMLookup< scratchReg1_); } - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); constexpr int vlen = simd_info::WIDTH_32BIT_ELEMS; constexpr int NUM_VEC_REG = simd_info::NUM_VEC_REGS; @@ -451,10 +451,10 @@ GenEmbeddingSpMDMLookup< a->lea( index_size, x86::ptr(indices, index_size, areIndices64b ? 3 : 2)); - asmjit::Label exit = a->newLabel(); - asmjit::Label error = a->newLabel(); - asmjit::Label LoopRangeIndexBegin = a->newLabel(); - asmjit::Label LoopRangeIndexEnd = a->newLabel(); + asmjit::Label exit = a->new_label(); + asmjit::Label error = a->new_label(); + asmjit::Label LoopRangeIndexBegin = a->new_label(); + asmjit::Label LoopRangeIndexEnd = a->new_label(); // rangeIndex loop begins (iterate output_size times) a->bind(LoopRangeIndexBegin); @@ -462,8 +462,8 @@ GenEmbeddingSpMDMLookup< a->jl(LoopRangeIndexEnd); if (normalize_by_lengths) { - asmjit::Label IfLengthsBegin = a->newLabel(); - asmjit::Label IfLengthsEnd = a->newLabel(); + asmjit::Label IfLengthsBegin = a->new_label(); + asmjit::Label IfLengthsEnd = a->new_label(); a->bind(IfLengthsBegin); if (use_offsets) { a->mov(lengths_R_, x86::dword_ptr(lengths, sizeof(offsetType))); @@ -520,9 +520,9 @@ GenEmbeddingSpMDMLookup< a->cmp(scratchReg1_, index_size); a->jg(error); - asmjit::Label LoopDataIndexBegin = a->newLabel(); - asmjit::Label LoopDataIndexEnd = a->newLabel(); - asmjit::Label ValidIndexLabel = a->newLabel(); + asmjit::Label LoopDataIndexBegin = a->new_label(); + asmjit::Label LoopDataIndexEnd = a->new_label(); + asmjit::Label ValidIndexLabel = a->new_label(); // dataIndex loop begins (iterate lengths_R_ times) a->bind(LoopDataIndexBegin); @@ -569,8 +569,8 @@ GenEmbeddingSpMDMLookup< int fused_block_size = input_stride * sizeof(inType); if (pref_dist) { - asmjit::Label pref_dist_reset_start = a->newLabel(); - asmjit::Label pref_dist_reset_end = a->newLabel(); + asmjit::Label pref_dist_reset_start = a->new_label(); + asmjit::Label pref_dist_reset_end = a->new_label(); // out of bound handling for prefetch a->lea( scratchReg2_, x86::ptr(indices, pref_dist * sizeof(indxType))); @@ -601,8 +601,8 @@ GenEmbeddingSpMDMLookup< a->bind(pref_dist_reset_end); if constexpr (ROWWISE_SPARSE) { asmjit::Label rowwise_sparse_pref_corner_case_begin = - a->newLabel(); - asmjit::Label rowwise_sparse_pref_corner_case_end = a->newLabel(); + a->new_label(); + asmjit::Label rowwise_sparse_pref_corner_case_end = a->new_label(); a->cmp(scratchReg2_, data_size); a->jae(rowwise_sparse_pref_corner_case_begin); @@ -934,7 +934,7 @@ GenEmbeddingSpMDMLookup< a->lea(x86::rsp, x86::ymmword_ptr(x86::rsp, vlen * sizeof(int32_t))); } - a->emitEpilog(frame); + a->emit_epilog(frame); // jit_fused8bitembedding_kernel fn; typename ReturnFunctionSignature< @@ -943,13 +943,13 @@ GenEmbeddingSpMDMLookup< offsetType, outType, ROWWISE_SPARSE>::jit_embedding_kernel fn; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/EmbeddingSpMDMNBit.cc b/src/EmbeddingSpMDMNBit.cc index afdf91acd2..dd692cdabe 100644 --- a/src/EmbeddingSpMDMNBit.cc +++ b/src/EmbeddingSpMDMNBit.cc @@ -283,22 +283,22 @@ GenEmbeddingSpMDMNBitLookup< asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kGp, reg_id == 15 - ? asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) - : asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); + ? asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) + : asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); if constexpr (ROWWISE_SPARSE) { - args.assignAll( + args.assign_all( output_size, index_size, data_size, @@ -310,7 +310,7 @@ GenEmbeddingSpMDMNBitLookup< compressed_indices_table, scratchReg1_); } else { - args.assignAll( + args.assign_all( output_size, index_size, data_size, @@ -322,11 +322,11 @@ GenEmbeddingSpMDMNBitLookup< scratchReg1_); } - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); constexpr int vlen = simd_info::WIDTH_32BIT_ELEMS; constexpr int NUM_VEC_REG = simd_info::NUM_VEC_REGS; @@ -480,10 +480,10 @@ GenEmbeddingSpMDMNBitLookup< a->lea( index_size, x86::ptr(indices, index_size, areIndices64b ? 3 : 2)); - asmjit::Label exit = a->newLabel(); - asmjit::Label error = a->newLabel(); - asmjit::Label LoopRangeIndexBegin = a->newLabel(); - asmjit::Label LoopRangeIndexEnd = a->newLabel(); + asmjit::Label exit = a->new_label(); + asmjit::Label error = a->new_label(); + asmjit::Label LoopRangeIndexBegin = a->new_label(); + asmjit::Label LoopRangeIndexEnd = a->new_label(); // rangeIndex loop begins (iterate output_size times) a->bind(LoopRangeIndexBegin); @@ -491,8 +491,8 @@ GenEmbeddingSpMDMNBitLookup< a->jl(LoopRangeIndexEnd); if (normalize_by_lengths) { - asmjit::Label IfLengthsBegin = a->newLabel(); - asmjit::Label IfLengthsEnd = a->newLabel(); + asmjit::Label IfLengthsBegin = a->new_label(); + asmjit::Label IfLengthsEnd = a->new_label(); a->bind(IfLengthsBegin); if (use_offsets) { a->mov(lengths_R_, x86::dword_ptr(lengths, sizeof(offsetType))); @@ -548,9 +548,9 @@ GenEmbeddingSpMDMNBitLookup< a->cmp(scratchReg1_, index_size); a->jg(error); - asmjit::Label LoopDataIndexBegin = a->newLabel(); - asmjit::Label LoopDataIndexEnd = a->newLabel(); - asmjit::Label ValidIndexLabel = a->newLabel(); + asmjit::Label LoopDataIndexBegin = a->new_label(); + asmjit::Label LoopDataIndexEnd = a->new_label(); + asmjit::Label ValidIndexLabel = a->new_label(); // dataIndex loop begins (iterate lengths_R_ times) a->bind(LoopDataIndexBegin); @@ -597,8 +597,8 @@ GenEmbeddingSpMDMNBitLookup< int num_elem_per_byte = 8 / bit_rate; int fused_block_size = input_stride; if (pref_dist) { - asmjit::Label pref_dist_reset_start = a->newLabel(); - asmjit::Label pref_dist_reset_end = a->newLabel(); + asmjit::Label pref_dist_reset_start = a->new_label(); + asmjit::Label pref_dist_reset_end = a->new_label(); // out of bound handling for prefetch a->lea( scratchReg2_, x86::ptr(indices, pref_dist * sizeof(indxType))); @@ -629,8 +629,8 @@ GenEmbeddingSpMDMNBitLookup< a->bind(pref_dist_reset_end); if constexpr (ROWWISE_SPARSE) { asmjit::Label rowwise_sparse_pref_corner_case_begin = - a->newLabel(); - asmjit::Label rowwise_sparse_pref_corner_case_end = a->newLabel(); + a->new_label(); + asmjit::Label rowwise_sparse_pref_corner_case_end = a->new_label(); a->cmp(scratchReg2_, data_size); a->jae(rowwise_sparse_pref_corner_case_begin); @@ -941,7 +941,7 @@ GenEmbeddingSpMDMNBitLookup< a->lea(x86::rsp, x86::ymmword_ptr(x86::rsp, vlen * sizeof(int32_t))); } - a->emitEpilog(frame); + a->emit_epilog(frame); // jit_fused8bitembedding_kernel fn; typename ReturnFunctionSignature< @@ -949,12 +949,12 @@ GenEmbeddingSpMDMNBitLookup< offsetType, outType, ROWWISE_SPARSE>::jit_embedding_kernel fn; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/FbgemmI64.cc b/src/FbgemmI64.cc index c9d4096e2c..da5f2f0d14 100644 --- a/src/FbgemmI64.cc +++ b/src/FbgemmI64.cc @@ -185,28 +185,28 @@ CodeGenBase::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); - frame.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); - asmjit::Label LoopNBlocks = a->newLabel(); - asmjit::Label Loopk = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); + asmjit::Label LoopNBlocks = a->new_label(); + asmjit::Label Loopk = a->new_label(); x86::Gp buffer_B_saved = a->gpz(10); x86::Gp C_Offset = a->gpz(11); @@ -308,8 +308,8 @@ CodeGenBase::getOrCreate( // generate code for remainder if (mRegBlocksRem > 0) { assert(false); - asmjit::Label LoopNRem = a->newLabel(); - asmjit::Label LoopkRem = a->newLabel(); + asmjit::Label LoopNRem = a->new_label(); + asmjit::Label LoopkRem = a->new_label(); int rowRegs = mRegBlocksRem; a->xor_(jIdx.r32(), jIdx.r32()); @@ -366,15 +366,15 @@ CodeGenBase::getOrCreate( a->jl(LoopNRem); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateI8Depthwise.cc b/src/GenerateI8Depthwise.cc index c13885aa7d..017b404a84 100644 --- a/src/GenerateI8Depthwise.cc +++ b/src/GenerateI8Depthwise.cc @@ -278,16 +278,16 @@ GenI8Depthwise::jit_kernel_signature GenI8Depthwise::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); - frame.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll( + args.assign_all( a_addr, b_addr, c_addr, @@ -299,11 +299,11 @@ GenI8Depthwise::jit_kernel_signature GenI8Depthwise::getOrCreate( a_zero_point, b_zero_point_addr); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - e->emitProlog(frame); - e->emitArgsAssignment(frame, args); + e->emit_prolog(frame); + e->emit_args_assignment(frame, args); // Assign vector registers Ymm a[4]; @@ -382,7 +382,7 @@ GenI8Depthwise::jit_kernel_signature GenI8Depthwise::getOrCreate( e->sar(ic_loop_count, asmjit::Imm(oc_per_g == 1 ? 5 : 4)); e->mov(a_addr_save, a_addr); - asmjit::Label ic_loop_begin = e->newLabel(), ic_loop_end = e->newLabel(); + asmjit::Label ic_loop_begin = e->new_label(), ic_loop_end = e->new_label(); // main_loop == false: the last vector iteration across input channels for (bool main_loop : {true, false}) { @@ -554,15 +554,15 @@ GenI8Depthwise::jit_kernel_signature GenI8Depthwise::getOrCreate( } } - e->emitEpilog(frame); + e->emit_epilog(frame); jit_kernel_signature fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateKernelDirectConvU8S8S32ACC32.cc b/src/GenerateKernelDirectConvU8S8S32ACC32.cc index 92a28b5027..adbd1c0dfd 100644 --- a/src/GenerateKernelDirectConvU8S8S32ACC32.cc +++ b/src/GenerateKernelDirectConvU8S8S32ACC32.cc @@ -233,30 +233,30 @@ DirectConvCodeGenBase::getOrCreateDirectConv( asmjit::FuncFrame frame; frame.init(func); - auto dirtyVecRegs = asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15); + auto dirtyVecRegs = asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15); if (numRegs >= 16) { - dirtyVecRegs |= asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31); + dirtyVecRegs |= asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31); } - frame.setDirtyRegs(asmjit::RegGroup::kVec, dirtyVecRegs); - frame.setDirtyRegs( + frame.set_dirty_regs(asmjit::RegGroup::kVec, dirtyVecRegs); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, ichXk1, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, ichXk1, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); - // asmjit::Label LoopOBlocks = a->newLabel(); - // asmjit::Label LoopNBlocks = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); + // asmjit::Label LoopOBlocks = a->new_label(); + // asmjit::Label LoopNBlocks = a->new_label(); const x86::Gp& buffer_B_saved = a->gpz(10); const x86::Gp& C_Offset = a->gpz(11); @@ -279,8 +279,8 @@ DirectConvCodeGenBase::getOrCreateDirectConv( auto issueLoopOverK = [&](int rowRegs) { // loopKLabel: corresponds to loop "r" where r = 0 // loopK0Label: corresponds to loop "r" where r = 1 - asmjit::Label LoopKLabel = a->newLabel(); - asmjit::Label LoopK0Label = a->newLabel(); + asmjit::Label LoopKLabel = a->new_label(); + asmjit::Label LoopK0Label = a->new_label(); // Init C (result) vector registers initCRegs(a, rowRegs, colRegs); @@ -392,15 +392,15 @@ DirectConvCodeGenBase::getOrCreateDirectConv( issueLoopOverK(O1RegBlocksRem); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } @@ -651,28 +651,28 @@ DirectConvCodeGenBase:: asmjit::FuncFrame frame; frame.init(func); - auto dirtyVecRegs = asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15); + auto dirtyVecRegs = asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15); if (numRegs >= 16) { - dirtyVecRegs |= asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31); + dirtyVecRegs |= asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31); } - frame.setDirtyRegs(asmjit::RegGroup::kVec, dirtyVecRegs); - frame.setDirtyRegs( + frame.set_dirty_regs(asmjit::RegGroup::kVec, dirtyVecRegs); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, CBase, ic, ldcReg, o1Xoc, i1); + args.assign_all(buffer_A, buffer_B, CBase, ic, ldcReg, o1Xoc, i1); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); const x86::Gp& C_offset = a->gpz(12); const x86::Gp& buffer_B_saved = a->gpz(13); @@ -687,7 +687,7 @@ DirectConvCodeGenBase:: int colRegs = maxNRegs; auto issueLoopOverK = [&](int rowRegs) { - asmjit::Label LoopKLabel = a->newLabel(); + asmjit::Label LoopKLabel = a->new_label(); // Init C (result) vector registers initCRegs(a, rowRegs, colRegs); @@ -753,15 +753,15 @@ DirectConvCodeGenBase:: a->jl(LoopMBlocks); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp_convT fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index f78d5a9a09..303d9086e9 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -186,25 +186,25 @@ CodeGenBase::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); - frame.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label Loopk = a->newLabel(); - asmjit::Label LoopMBlocks = a->newLabel(); + asmjit::Label Loopk = a->new_label(); + asmjit::Label LoopMBlocks = a->new_label(); const x86::Gp& buffer_B_saved = a->gpz(10); const x86::Gp& C_Offset = a->gpz(11); @@ -276,7 +276,7 @@ CodeGenBase::getOrCreate( } // generate code for remainder if (mRegBlocksRem > 0) { - asmjit::Label LoopkRem = a->newLabel(); + asmjit::Label LoopkRem = a->new_label(); int rowRegs = mRegBlocksRem; // init C registers @@ -311,15 +311,15 @@ CodeGenBase::getOrCreate( a, rowRegs, colRegs, C_Offset, ldcReg, accum); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index cdc3c03aa0..cb69a958f7 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -149,28 +149,28 @@ CodeGenBase::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); - frame.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); - asmjit::Label LoopNBlocks = a->newLabel(); - asmjit::Label Loopk = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); + asmjit::Label LoopNBlocks = a->new_label(); + asmjit::Label Loopk = a->new_label(); x86::Gp buffer_B_saved = a->gpz(10); x86::Gp C_Offset = a->gpz(11); @@ -274,8 +274,8 @@ CodeGenBase::getOrCreate( } // generate code for remainder if (mRegBlocksRem > 0) { - asmjit::Label LoopNRem = a->newLabel(); - asmjit::Label LoopkRem = a->newLabel(); + asmjit::Label LoopNRem = a->new_label(); + asmjit::Label LoopkRem = a->new_label(); int rowRegs = mRegBlocksRem; a->xor_(jIdx.r32(), jIdx.r32()); @@ -335,15 +335,15 @@ CodeGenBase::getOrCreate( a->jl(LoopNRem); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc index 6c1960b4b8..1104b3841c 100644 --- a/src/GenerateKernelU8S8S32ACC32.cc +++ b/src/GenerateKernelU8S8S32ACC32.cc @@ -186,29 +186,29 @@ CodeGenBase::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - auto dirtyVecRegs = asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15); + auto dirtyVecRegs = asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15); if (numRegs >= 16) { - dirtyVecRegs |= asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31); + dirtyVecRegs |= asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31); } - frame.setDirtyRegs(asmjit::RegGroup::kVec, dirtyVecRegs); - frame.setDirtyRegs( + frame.set_dirty_regs(asmjit::RegGroup::kVec, dirtyVecRegs); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); - asmjit::Label LoopNBlocks = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); + asmjit::Label LoopNBlocks = a->new_label(); const x86::Gp& buffer_B_saved = a->gpz(10); const x86::Gp& C_Offset = a->gpz(11); @@ -232,7 +232,7 @@ CodeGenBase::getOrCreate( int colRegs = std::min(currColRegs, maxNRegs); auto issueLoopOverK = [&](int rowRegs) { - asmjit::Label LoopKLabel = a->newLabel(); + asmjit::Label LoopKLabel = a->new_label(); // Init C (result) vector registers initCRegs(a, rowRegs, colRegs); @@ -327,7 +327,7 @@ CodeGenBase::getOrCreate( } // generate code for remainder if (mRegBlocksRem > 0) { - asmjit::Label LoopNRem = a->newLabel(); + asmjit::Label LoopNRem = a->new_label(); a->xor_(jIdx.r32(), jIdx.r32()); a->bind(LoopNRem); @@ -345,15 +345,15 @@ CodeGenBase::getOrCreate( a->jl(LoopNRem); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc index 16b3b5ec77..e113a8af97 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc @@ -140,28 +140,28 @@ CodeGenBase::getOrCreate( asmjit::FuncFrame frame; frame.init(func); - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); - frame.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); + args.assign_all(buffer_A, buffer_B, B_pf, CBase, kSize, ldcReg); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); - asmjit::Label LoopMBlocks = a->newLabel(); - asmjit::Label LoopNBlocks = a->newLabel(); - asmjit::Label Loopk = a->newLabel(); + asmjit::Label LoopMBlocks = a->new_label(); + asmjit::Label LoopNBlocks = a->new_label(); + asmjit::Label Loopk = a->new_label(); x86::Gp buffer_B_saved = a->gpz(10); x86::Gp C_Offset = a->gpz(11); @@ -277,8 +277,8 @@ CodeGenBase::getOrCreate( } // generate code for remainder if (mRegBlocksRem > 0) { - asmjit::Label LoopNRem = a->newLabel(); - asmjit::Label LoopkRem = a->newLabel(); + asmjit::Label LoopNRem = a->new_label(); + asmjit::Label LoopkRem = a->new_label(); int rowRegs = mRegBlocksRem; a->xor_(jIdx.r32(), jIdx.r32()); @@ -340,15 +340,15 @@ CodeGenBase::getOrCreate( a->jl(LoopNRem); } - a->emitEpilog(frame); + a->emit_epilog(frame); jit_micro_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/GroupwiseConv.cc b/src/GroupwiseConv.cc index f92408f2ec..f4aef91942 100644 --- a/src/GroupwiseConv.cc +++ b/src/GroupwiseConv.cc @@ -231,16 +231,16 @@ jit_conv_kernel_fp GenConvKernel::getOrCreate() { frame_.init(func_); - frame_.setDirtyRegs( + frame_.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); - frame_.setDirtyRegs( + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); + frame_.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func_); - args.assignAll( + args.assign_all( in_acts_R_, wghts_R_, out_acts_R_, @@ -250,11 +250,11 @@ jit_conv_kernel_fp GenConvKernel::getOrCreate() { W_R_, row_offset_R_); - args.updateFuncFrame(frame_); + args.update_func_frame(frame_); frame_.finalize(); - a->emitProlog(frame_); - a->emitArgsAssignment(frame_, args); + a->emit_prolog(frame_); + a->emit_args_assignment(frame_, args); // We have run out of register so can't keep // this in a register. It's generated again at @@ -297,16 +297,16 @@ jit_conv_kernel_fp GenConvKernel::getOrCreate() { a, false /* isTopEdge */, this->use_bottom_padding_ /* isBottomEdge */); } - a->emitEpilog(frame_); + a->emit_epilog(frame_); jit_conv_kernel_fp fn = nullptr; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { unique_lock lock(this->rtMutex_); err = this->runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { cout << "Error: in fn add" << '\n'; return nullptr; } @@ -419,11 +419,11 @@ void GenConvKernel::genForTopOrBottomEdge( a->movsxd( loopR1_, x86::dword_ptr( - x86::rsp, frame_.saOffsetFromSP() + func_.arg(6).stackOffset())); - asmjit::Label LoopWStart = a->newLabel(); - asmjit::Label LoopWEnd = a->newLabel(); - asmjit::Label skipRightEdge = a->newLabel(); - asmjit::Label skipRightEdgeTemp = a->newLabel(); + x86::rsp, frame_.sa_offset_from_sp() + func_.arg(6).stack_offset())); + asmjit::Label LoopWStart = a->new_label(); + asmjit::Label LoopWEnd = a->new_label(); + asmjit::Label skipRightEdge = a->new_label(); + asmjit::Label skipRightEdgeTemp = a->new_label(); a->cmp(loopR1_, static_cast(this->W_PAD_)); a->jle(skipRightEdgeTemp); @@ -507,10 +507,10 @@ void GenConvKernel::genCoreInsts(x86::Emitter* a) { a->dec(H_end_R_); } // main compute - asmjit::Label LoopHStart = a->newLabel(); - asmjit::Label LoopHEnd = a->newLabel(); - asmjit::Label LoopWStart = a->newLabel(); - asmjit::Label LoopWEnd = a->newLabel(); + asmjit::Label LoopHStart = a->new_label(); + asmjit::Label LoopHEnd = a->new_label(); + asmjit::Label LoopWStart = a->new_label(); + asmjit::Label LoopWEnd = a->new_label(); // H loop a->mov(loopR1_, H_start_R_); @@ -521,9 +521,9 @@ void GenConvKernel::genCoreInsts(x86::Emitter* a) { a->movsxd( loopR2_, x86::dword_ptr( - x86::rsp, frame_.saOffsetFromSP() + func_.arg(6).stackOffset())); - asmjit::Label skipRightEdge = a->newLabel(); - asmjit::Label skipRightEdgeTemp = a->newLabel(); + x86::rsp, frame_.sa_offset_from_sp() + func_.arg(6).stack_offset())); + asmjit::Label skipRightEdge = a->new_label(); + asmjit::Label skipRightEdgeTemp = a->new_label(); a->cmp(loopR2_, static_cast(this->W_PAD_)); a->jle(skipRightEdgeTemp); diff --git a/src/RowWiseSparseAdagradFused.cc b/src/RowWiseSparseAdagradFused.cc index fe01b16dae..932eecf180 100644 --- a/src/RowWiseSparseAdagradFused.cc +++ b/src/RowWiseSparseAdagradFused.cc @@ -165,25 +165,25 @@ typename ReturnFunctionSignature:: frame.init(func); if constexpr (instSet == inst_set_t::avx2) { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); } else { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); } - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); - args.assignAll( + args.assign_all( output_size, index_size, data_size, @@ -196,10 +196,10 @@ typename ReturnFunctionSignature:: lr, rand_buffer); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); constexpr int vlen = simd_info::WIDTH_32BIT_ELEMS; constexpr int NUM_VEC_REG = simd_info::NUM_VEC_REGS; @@ -314,10 +314,10 @@ typename ReturnFunctionSignature:: a->add(scratchReg1, indices); a->mov(index_size, scratchReg1); - asmjit::Label exit = a->newLabel(); - asmjit::Label error = a->newLabel(); - asmjit::Label LoopRangeIndexBegin = a->newLabel(); - asmjit::Label LoopRangeIndexEnd = a->newLabel(); + asmjit::Label exit = a->new_label(); + asmjit::Label error = a->new_label(); + asmjit::Label LoopRangeIndexBegin = a->new_label(); + asmjit::Label LoopRangeIndexEnd = a->new_label(); // rangeIndex loop begin (iterate output_size times) a->bind(LoopRangeIndexBegin); @@ -409,8 +409,8 @@ typename ReturnFunctionSignature:: a->cmp(scratchReg1, index_size); a->jg(error); - asmjit::Label LoopDataIndexBegin = a->newLabel(); - asmjit::Label LoopDataIndexEnd = a->newLabel(); + asmjit::Label LoopDataIndexBegin = a->new_label(); + asmjit::Label LoopDataIndexEnd = a->new_label(); // dataIndex loop begins (iterate lengths_R_ times) a->bind(LoopDataIndexBegin); @@ -430,8 +430,8 @@ typename ReturnFunctionSignature:: a->jae(error); if (prefetch) { - asmjit::Label pref_dist_reset_start = a->newLabel(); - asmjit::Label pref_dist_reset_end = a->newLabel(); + asmjit::Label pref_dist_reset_start = a->new_label(); + asmjit::Label pref_dist_reset_end = a->new_label(); // out of bound handling for prefetch a->mov(scratchReg2, indices); a->add( @@ -727,17 +727,17 @@ typename ReturnFunctionSignature:: } a->mov(x86::eax, scratchReg1.r32()); - a->emitEpilog(frame); + a->emit_epilog(frame); // jit_fused8bitembedding_kernel fn; typename ReturnFunctionSignature:: jit_sparse_adagrad_kernel fn; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { cout << "Error: in fn add" << '\n'; return nullptr; } diff --git a/src/SparseAdagrad.cc b/src/SparseAdagrad.cc index 94d8cd66ce..cf3f71a849 100644 --- a/src/SparseAdagrad.cc +++ b/src/SparseAdagrad.cc @@ -509,25 +509,25 @@ GenSparseAdagrad::getOrCreate( frame.init(func); if constexpr (instSet == inst_set_t::avx2) { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); } else { - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kVec, - asmjit::Support::bitMask(0, 1, 2, 3, 4, 5, 6, 7) | - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15) | - asmjit::Support::bitMask(16, 17, 18, 19, 20, 21, 22, 23) | - asmjit::Support::bitMask(24, 25, 26, 27, 28, 29, 30, 31)); + asmjit::Support::bit_mask(0, 1, 2, 3, 4, 5, 6, 7) | + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15) | + asmjit::Support::bit_mask(16, 17, 18, 19, 20, 21, 22, 23) | + asmjit::Support::bit_mask(24, 25, 26, 27, 28, 29, 30, 31)); } - frame.setDirtyRegs( + frame.set_dirty_regs( asmjit::RegGroup::kGp, - asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15)); + asmjit::Support::bit_mask(8, 9, 10, 11, 12, 13, 14, 15)); asmjit::FuncArgsAssignment args(&func); - args.assignAll( + args.assign_all( num_rows, param_size, w, @@ -541,10 +541,10 @@ GenSparseAdagrad::getOrCreate( counter, counter_halflife); - args.updateFuncFrame(frame); + args.update_func_frame(frame); frame.finalize(); - a->emitProlog(frame); - a->emitArgsAssignment(frame, args); + a->emit_prolog(frame); + a->emit_args_assignment(frame, args); constexpr int vlen = simd_info::WIDTH_32BIT_ELEMS; constexpr int NUM_VEC_REG = simd_info::NUM_VEC_REGS; @@ -603,9 +603,9 @@ GenSparseAdagrad::getOrCreate( unroll_factor = unroll_factor / 2; // accont for g_vreg } - asmjit::Label exit = a->newLabel(); - asmjit::Label LoopRangeIndexBegin = a->newLabel(); - asmjit::Label LoopRangeIndexEnd = a->newLabel(); + asmjit::Label exit = a->new_label(); + asmjit::Label LoopRangeIndexBegin = a->new_label(); + asmjit::Label LoopRangeIndexEnd = a->new_label(); a->vpbroadcastd(epsilon_vreg, epsilon); a->vpbroadcastd(lr_vreg, lr); @@ -644,7 +644,7 @@ GenSparseAdagrad::getOrCreate( // Check counter != nullptr && counter[idx] > 0 a->vmovaps(adjusted_weight_decay_vreg, weight_decay_vreg); - asmjit::Label skip_adjust_freq = a->newLabel(); + asmjit::Label skip_adjust_freq = a->new_label(); a->cmp(counter, 0); a->je(skip_adjust_freq); @@ -680,8 +680,8 @@ GenSparseAdagrad::getOrCreate( a->jg(exit); if (prefetch) { - asmjit::Label pref_dist_reset_start = a->newLabel(); - asmjit::Label pref_dist_reset_end = a->newLabel(); + asmjit::Label pref_dist_reset_start = a->new_label(); + asmjit::Label pref_dist_reset_end = a->new_label(); a->mov(temp2_, temp1_); a->add(temp2_, prefetch); @@ -755,16 +755,16 @@ GenSparseAdagrad::getOrCreate( a->bind(exit); a->mov(x86::eax, temp1_.r32()); - a->emitEpilog(frame); + a->emit_epilog(frame); typename ReturnFunctionSignature::jit_sparse_adagrad_kernel fn; - asmjit::Error err = 0; + asmjit::Error err = asmjit::Error::kOk; { std::unique_lock lock(rtMutex_); err = runtime().add(&fn, &code); } - if (err) { + if (err != asmjit::Error::kOk) { std::cout << "Error: in fn add" << '\n'; return nullptr; }