Skip to content

Commit

Permalink
AVX-512で入力特徴量を変換する際にすべてのトリガーに対して積算できていないのを修正
Browse files Browse the repository at this point in the history
  • Loading branch information
KazApps committed Sep 13, 2024
1 parent f35ef20 commit df1180c
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions source/eval/nnue/nnue_feature_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ class FeatureTransformer {
_mm512_load_si512(&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m512i sum1 =
_mm512_load_si512(&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm512_add_epi16(
sum0, reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm512_add_epi16(
sum1, reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(
kControl, _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
}
Expand Down

0 comments on commit df1180c

Please sign in to comment.