diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 1762a8eb7..d50e5993d 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -43,7 +43,7 @@ if (MODE_CHESS) # add_definitions(-DVERSION=2) # add_definitions(-DSUB_VERSION=8) add_definitions(-DVERSION=3) - add_definitions(-DSUPPORT960) +# add_definitions(-DSUPPORT960) endif() if (MODE_LICHESS) @@ -57,7 +57,7 @@ if (MODE_LICHESS) add_definitions(-DATOMIC) add_definitions(-DHORDE) add_definitions(-DRACE) - add_definitions(-DSUPPORT960) +# add_definitions(-DSUPPORT960) add_definitions(-DMCTS_TB_SUPPORT) add_definitions(-DVERSION=1) endif() diff --git a/engine/src/agents/config/searchsettings.cpp b/engine/src/agents/config/searchsettings.cpp index a22fff2f4..59fb6c463 100644 --- a/engine/src/agents/config/searchsettings.cpp +++ b/engine/src/agents/config/searchsettings.cpp @@ -33,7 +33,6 @@ SearchSettings::SearchSettings(): nodePolicyTemperature(1.0f), qValueWeight(1.0f), qVetoDelta(0.4f), - virtualLoss(1.0f), verbose(true), epsilonChecksCounter(100), useMCGS(true), @@ -49,7 +48,10 @@ SearchSettings::SearchSettings(): epsilonGreedyCounter(20), reuseTree(true), mctsSolver(false), - searchPlayerMode(MODE_TWO_PLAYER) + searchPlayerMode(MODE_TWO_PLAYER), + virtualStyle(VIRTUAL_VISIT), + virtualMixThreshold(1000), + virtualOffsetStrenght(0.001) { } diff --git a/engine/src/agents/config/searchsettings.h b/engine/src/agents/config/searchsettings.h index 84cb8e28c..fc3c6abdf 100644 --- a/engine/src/agents/config/searchsettings.h +++ b/engine/src/agents/config/searchsettings.h @@ -36,6 +36,13 @@ enum SearchPlayerMode { MODE_TWO_PLAYER }; +enum VirtualStyle { + VIRTUAL_LOSS, + VIRTUAL_VISIT, + VIRTUAL_OFFSET, + VIRTUAL_MIX +}; + struct SearchSettings { uint16_t multiPV; @@ -48,7 +55,6 @@ struct SearchSettings float qValueWeight; // describes how much better the highest Q-Value has to be to replace the candidate move with the highest visit count float qVetoDelta; - uint_fast32_t virtualLoss; bool verbose; uint_fast8_t epsilonChecksCounter; // bool enhanceCaptures; currently not support @@ -75,6 +81,12 @@ struct SearchSettings bool mctsSolver; // Defines the nubmer of players within the MCTS search. Available are MODE_SINGLE_PLAYER and MODE_TWO_PLAYER SearchPlayerMode searchPlayerMode; + // Define the virtual style to avoid conflict between different threads in within the same mini-batch + VirtualStyle virtualStyle; + // Defines the number of visits to switch from virtual-visit to virtual-loss + uint_fast32_t virtualMixThreshold; + // Defines the strength of the virtual offset + double virtualOffsetStrenght; SearchSettings(); }; diff --git a/engine/src/node.cpp b/engine/src/node.cpp index f932092f8..935ff1082 100644 --- a/engine/src/node.cpp +++ b/engine/src/node.cpp @@ -36,9 +36,9 @@ bool Node::is_sorted() const return sorted; } -double Node::get_q_sum(ChildIdx childIdx, float virtualLoss) const +double Node::get_q_sum_virtual_loss(ChildIdx childIdx) const { - return get_child_number_visits(childIdx) * double(get_q_value(childIdx)) + get_virtual_loss_counter(childIdx) * virtualLoss; + return get_child_number_visits(childIdx) * double(get_q_value(childIdx)) + get_virtual_loss_counter(childIdx); } bool Node::is_transposition() const @@ -504,18 +504,28 @@ bool Node::has_nn_results() const return hasNNResults; } -void Node::apply_virtual_loss_to_child(ChildIdx childIdx, uint_fast32_t virtualLoss) +void Node::apply_virtual_loss_to_child(ChildIdx childIdx, const SearchSettings* searchSettings) { // update the stats of the parent node // make it look like if one has lost X games from this node forward where X is the virtual loss value // temporarily reduce the attraction of this node by applying a virtual loss / // the effect of virtual loss will be undone if the playout is over - d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] - virtualLoss) / double(d->childNumberVisits[childIdx] + virtualLoss); + switch (get_virtual_style(searchSettings, d->childNumberVisits[childIdx])) { + case VIRTUAL_LOSS: + d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] - 1) / double(d->childNumberVisits[childIdx] + 1); + break; + case VIRTUAL_OFFSET: + d->qValues[childIdx] -= searchSettings->virtualOffsetStrenght; + case VIRTUAL_VISIT: ; // ignore + case VIRTUAL_MIX: ; // unreachable + } + // virtual increase the number of visits - d->childNumberVisits[childIdx] += virtualLoss; - d->visitSum += virtualLoss; + ++d->childNumberVisits[childIdx]; + ++d->visitSum; + // increment virtual loss counter - update_virtual_loss_counter(childIdx, virtualLoss); + update_virtual_loss_counter(childIdx); } float Node::get_q_value(ChildIdx childIdx) const @@ -642,20 +652,29 @@ uint32_t Node::get_real_visits(ChildIdx childIdx) const return d->childNumberVisits[childIdx] - d->virtualLossCounter[childIdx]; } -void backup_collision(float virtualLoss, const Trajectory& trajectory) { +void backup_collision(const SearchSettings* searchSettings, const Trajectory& trajectory) { for (auto it = trajectory.rbegin(); it != trajectory.rend(); ++it) { - it->node->revert_virtual_loss(it->childIdx, virtualLoss); + it->node->revert_virtual_loss(it->childIdx, searchSettings); } } -void Node::revert_virtual_loss(ChildIdx childIdx, float virtualLoss) +void Node::revert_virtual_loss(ChildIdx childIdx, const SearchSettings* searchSettings) { lock(); - d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] + virtualLoss) / (d->childNumberVisits[childIdx] - virtualLoss); - d->childNumberVisits[childIdx] -= virtualLoss; - d->visitSum -= virtualLoss; + switch (get_virtual_style(searchSettings, d->childNumberVisits[childIdx])) { + case VIRTUAL_LOSS: + d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] + 1) / (d->childNumberVisits[childIdx] - 1); + break; + case VIRTUAL_OFFSET: + d->qValues[childIdx] += searchSettings->virtualOffsetStrenght; + case VIRTUAL_MIX: ; // ignore + case VIRTUAL_VISIT: ; // ignore + } + --d->childNumberVisits[childIdx]; + --d->visitSum; + // decrement virtual loss counter - update_virtual_loss_counter(childIdx, virtualLoss); + update_virtual_loss_counter(childIdx); unlock(); } @@ -990,6 +1009,27 @@ void Node::disable_action(size_t childIdxForParent) d->qValues[childIdxForParent] = -INT_MAX; } +double Node::get_transposition_q_value(const SearchSettings *searchSettings, ChildIdx childIdx, uint_fast32_t transposVisits) +{ + double transposQValue; + switch(get_virtual_style(searchSettings, d->childNumberVisits[childIdx])) { + case VIRTUAL_LOSS: + transposQValue = get_q_sum_virtual_loss(childIdx) / transposVisits; + break; + case VIRTUAL_VISIT: + transposQValue = get_q_value(childIdx); + break; + case VIRTUAL_OFFSET: + transposQValue = double(get_q_value(childIdx)) + get_virtual_loss_counter(childIdx) * searchSettings->virtualOffsetStrenght; + case VIRTUAL_MIX: ; + // unreachable + } + if (searchSettings->searchPlayerMode == MODE_TWO_PLAYER) { + return -transposQValue; + } + return transposQValue; +} + void Node::enhance_moves(const SearchSettings* searchSettings) { // if (!searchSettings->enhanceChecks && !searchSettings->enhanceCaptures) { @@ -1295,7 +1335,7 @@ bool is_terminal_value(float value) return (value == WIN_VALUE || value == DRAW_VALUE || value == LOSS_VALUE); } -float get_transposition_q_value(uint_fast32_t transposVisits, double transposQValue, double targetQValue) +float get_transposition_backup_value(uint_fast32_t transposVisits, double transposQValue, double targetQValue) { return std::clamp(transposVisits * (targetQValue - transposQValue) + targetQValue, double(LOSS_VALUE), double(WIN_VALUE)); } diff --git a/engine/src/node.h b/engine/src/node.h index 971aef207..28350364c 100644 --- a/engine/src/node.h +++ b/engine/src/node.h @@ -84,6 +84,16 @@ struct NodeAndBudget { node(node), budget(budget), curState(state) {} }; +inline VirtualStyle get_virtual_style(const SearchSettings* searchSettings, uint_fast32_t visits) { + if (searchSettings->virtualStyle == VIRTUAL_MIX) { + if (visits > searchSettings->virtualMixThreshold) { + return VIRTUAL_LOSS; + } + return VIRTUAL_VISIT; + } + return searchSettings->virtualStyle; +} + class Node { private: @@ -190,13 +200,11 @@ class Node void revert_virtual_loss_and_update(ChildIdx childIdx, float value, const SearchSettings* searchSettings, bool solveForTerminal) { lock(); - // decrement virtual loss counter - update_virtual_loss_counter(childIdx, searchSettings->virtualLoss); valueSum += value; ++realVisitsSum; - if (d->childNumberVisits[childIdx] == searchSettings->virtualLoss) { + if (d->childNumberVisits[childIdx] == 1) { // set new Q-value based on return // (the initialization of the Q-value was by Q_INIT which we don't want to recover.) d->qValues[childIdx] = value; @@ -204,14 +212,30 @@ class Node else { // revert virtual loss and update the Q-value assert(d->childNumberVisits[childIdx] != 0); - d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] + searchSettings->virtualLoss + value) / d->childNumberVisits[childIdx]; + uint_fast32_t childRealVisit; + double newQVal; + switch(get_virtual_style(searchSettings, d->childNumberVisits[childIdx])) { + case VIRTUAL_LOSS: + d->qValues[childIdx] = (double(d->qValues[childIdx]) * d->childNumberVisits[childIdx] + 1 + value) / d->childNumberVisits[childIdx]; + break; + case VIRTUAL_VISIT: + childRealVisit = get_real_visits(childIdx); + d->qValues[childIdx] = (double(d->qValues[childIdx]) * childRealVisit + value) / (childRealVisit + 1); + break; + case VIRTUAL_OFFSET: + newQVal = double(d->qValues[childIdx]) + d->virtualLossCounter[childIdx] * searchSettings->virtualOffsetStrenght; + newQVal = (newQVal * childRealVisit + value) / (childRealVisit + 1.0); + d->qValues[childIdx] = newQVal - ((d->virtualLossCounter[childIdx]-1) * searchSettings->virtualOffsetStrenght); + case VIRTUAL_MIX: ; + // unreachable + } + assert(!isnan(d->qValues[childIdx])); } - if (searchSettings->virtualLoss != 1) { - d->childNumberVisits[childIdx] -= size_t(searchSettings->virtualLoss) - 1; - d->visitSum -= size_t(searchSettings->virtualLoss) - 1; - } + // decrement virtual loss counter + update_virtual_loss_counter(childIdx); + if (freeBackup) { ++d->freeVisits; } @@ -225,7 +249,7 @@ class Node * @brief revert_virtual_loss Reverts the virtual loss for a target node * @param childIdx Index to the child node to update */ - void revert_virtual_loss(ChildIdx childIdx, float virtualLoss); + void revert_virtual_loss(ChildIdx childIdx, const SearchSettings* searchSettings); bool is_playout_node() const; @@ -259,7 +283,7 @@ class Node double get_value_sum() const; uint32_t get_real_visits() const; - void apply_virtual_loss_to_child(ChildIdx childIdx, uint_fast32_t virtualLoss); + void apply_virtual_loss_to_child(ChildIdx childIdx, const SearchSettings* searchSettings); void increment_no_visit_idx(); void fully_expand_node(); @@ -470,17 +494,17 @@ class Node */ void decrement_number_parents(); - double get_q_sum(ChildIdx childIdx, float virtualLoss) const; + double get_q_sum_virtual_loss(ChildIdx childIdx) const; template - void update_virtual_loss_counter(ChildIdx childIdx, float virtualLoss) + void update_virtual_loss_counter(ChildIdx childIdx) { if (increment) { - d->virtualLossCounter[childIdx] += virtualLoss; + ++d->virtualLossCounter[childIdx]; } else { assert(d->virtualLossCounter[childIdx] != 0); - d->virtualLossCounter[childIdx] -= virtualLoss; + --d->virtualLossCounter[childIdx]; } } @@ -515,6 +539,17 @@ class Node uint32_t get_number_of_nodes() const; + + /** + * @brief get_transposition_q_value Returns the Q-value (without virtualLoss) which connects to the transposition node. + * The q-Value is also multiplied by -1 if searchSettings->searchPlayerMode == MODE_TWO_PLAYER. + * @param currentNode Current node + * @param childIdx child index + * @param transposVisits Number of visits connecting to the transposition node + * @return Q-Value converted to double + */ + double get_transposition_q_value(const SearchSettings* searchSettings, ChildIdx childIdx, uint_fast32_t transposVisits); + private: /** * @brief reserve_full_memory Reserves memory for all available child nodes @@ -765,12 +800,12 @@ bool is_terminal_value(float value); /** * @brief backup_collision Iteratively removes the virtual loss of the collision event that occurred * @param rootNode Root node of the tree - * @param virtualLoss Virtual loss value + * @param searchSettings Search settings struct * @param trajectory Trajectory on how to get to the given collision */ -void backup_collision(float virtualLoss, const Trajectory& trajectory); +void backup_collision(const SearchSettings* searchSettings, const Trajectory& trajectory); -float get_transposition_q_value(uint_fast32_t transposVisits, double transposQValue, double masterQValue); +float get_transposition_backup_value(uint_fast32_t transposVisits, double transposQValue, double masterQValue); /** * @brief backup_value Iteratively backpropagates a value prediction across all of the parents for this node. @@ -788,15 +823,12 @@ void backup_value(float value, const SearchSettings* searchSettings, const Traje if (targetQValue != 0) { const uint_fast32_t transposVisits = it->node->get_real_visits(it->childIdx); if (transposVisits != 0) { - const double transposQValue = -it->node->get_q_sum(it->childIdx, searchSettings->virtualLoss) / transposVisits; - value = get_transposition_q_value(transposVisits, transposQValue, targetQValue); + const double transposQValue = it->node->get_transposition_q_value(searchSettings, it->childIdx, transposVisits); + value = get_transposition_backup_value(transposVisits, transposQValue, targetQValue); } } - switch (searchSettings->searchPlayerMode) { - case MODE_TWO_PLAYER: + if (searchSettings->searchPlayerMode == MODE_TWO_PLAYER) { value = -value; - break; - case MODE_SINGLE_PLAYER: ; } freeBackup ? it->node->revert_virtual_loss_and_update(it->childIdx, value, searchSettings, solveForTerminal) : it->node->revert_virtual_loss_and_update(it->childIdx, value, searchSettings, solveForTerminal); @@ -818,5 +850,4 @@ void backup_value(float value, const SearchSettings* searchSettings, const Traje */ bool is_transposition_verified(const Node* node, const StateObj* state); - #endif // NODE_H diff --git a/engine/src/nodedata.cpp b/engine/src/nodedata.cpp index 1b56ff073..939dca762 100644 --- a/engine/src/nodedata.cpp +++ b/engine/src/nodedata.cpp @@ -52,10 +52,6 @@ void NodeData::reserve_initial_space() virtualLossCounter.reserve(initSize); nodeTypes.reserve(initSize); add_empty_node(); - if (initSize > 1) { - add_empty_node(); - ++noVisitIdx; - } } NodeData::NodeData(): diff --git a/engine/src/searchthread.cpp b/engine/src/searchthread.cpp index 4f8c6eb0a..be2a86664 100644 --- a/engine/src/searchthread.cpp +++ b/engine/src/searchthread.cpp @@ -188,7 +188,7 @@ Node* SearchThread::get_new_child_to_evaluate(NodeDescription& description) if (childIdx == uint16_t(-1)) { childIdx = currentNode->select_child_node(searchSettings); } - currentNode->apply_virtual_loss_to_child(childIdx, searchSettings->virtualLoss); + currentNode->apply_virtual_loss_to_child(childIdx, searchSettings); trajectoryBuffer.emplace_back(NodeAndIdx(currentNode, childIdx)); nextNode = currentNode->get_child_node(childIdx); @@ -246,12 +246,13 @@ Node* SearchThread::get_new_child_to_evaluate(NodeDescription& description) if (nextNode->is_transposition()) { nextNode->lock(); const uint_fast32_t transposVisits = currentNode->get_real_visits(childIdx); - const double transposQValue = -currentNode->get_q_sum(childIdx, searchSettings->virtualLoss) / transposVisits; + const double transposQValue = currentNode->get_transposition_q_value(searchSettings, childIdx, transposVisits); + if (nextNode->is_transposition_return(transposQValue)) { - const float qValue = get_transposition_q_value(transposVisits, transposQValue, nextNode->get_value()); + const float backupValue = get_transposition_backup_value(transposVisits, transposQValue, nextNode->get_value()); nextNode->unlock(); description.type = NODE_TRANSPOSITION; - transpositionValues->add_element(qValue); + transpositionValues->add_element(backupValue); currentNode->unlock(); return nextNode; } @@ -314,7 +315,7 @@ void SearchThread::backup_value_outputs() void SearchThread::backup_collisions() { for (size_t idx = 0; idx < collisionTrajectories.size(); ++idx) { - backup_collision(searchSettings->virtualLoss, collisionTrajectories[idx]); + backup_collision(searchSettings, collisionTrajectories[idx]); } collisionTrajectories.clear(); } @@ -322,8 +323,8 @@ void SearchThread::backup_collisions() { bool SearchThread::nodes_limits_ok() { return (searchLimits->nodes == 0 || (rootNode->get_node_count() < searchLimits->nodes)) && - (searchLimits->simulations == 0 || (rootNode->get_visits() < searchLimits->simulations)) && - (searchLimits->nodesLimit == 0 || (rootNode->get_node_count() < searchLimits->nodesLimit)); + (searchLimits->simulations == 0 || (rootNode->get_visits() < searchLimits->simulations)) && + (searchLimits->nodesLimit == 0 || (rootNode->get_node_count() < searchLimits->nodesLimit)); } bool SearchThread::is_root_node_unsolved() diff --git a/engine/src/searchthread.h b/engine/src/searchthread.h index cf007e28e..11cde23e3 100644 --- a/engine/src/searchthread.h +++ b/engine/src/searchthread.h @@ -186,6 +186,15 @@ class SearchThread : public NeuralNetAPIUser * @return uint_16_t(-1) for no action else custom idx */ ChildIdx select_enhanced_move(Node* currentNode) const; + + /** + * @brief get_current_transposition_q_value Returns the Q-value which connects to the transposition node + * @param currentNode Current node + * @param childIdx child index + * @param transposVisits Number of visits connecting to the transposition node + * @return Q-Value converted to double + */ + double get_current_transposition_q_value(const Node* currentNode, ChildIdx childIdx, uint_fast32_t transposVisits); }; void run_search_thread(SearchThread *t); diff --git a/engine/src/uci/crazyara.cpp b/engine/src/uci/crazyara.cpp index ab49cc9ee..7051d0125 100644 --- a/engine/src/uci/crazyara.cpp +++ b/engine/src/uci/crazyara.cpp @@ -723,7 +723,6 @@ void CrazyAra::init_search_settings() searchSettings.dirichletEpsilon = Options["Centi_Dirichlet_Epsilon"] / 100.0f; searchSettings.dirichletAlpha = Options["Centi_Dirichlet_Alpha"] / 100.0f; searchSettings.nodePolicyTemperature = Options["Centi_Node_Temperature"] / 100.0f; - searchSettings.virtualLoss = Options["Centi_Virtual_Loss"] / 100.0f; searchSettings.randomMoveFactor = Options["Centi_Random_Move_Factor"] / 100.0f; searchSettings.allowEarlyStopping = Options["Allow_Early_Stopping"]; useRawNetwork = Options["Use_Raw_Network"]; @@ -739,6 +738,22 @@ void CrazyAra::init_search_settings() } searchSettings.reuseTree = Options["Reuse_Tree"]; searchSettings.mctsSolver = Options["MCTS_Solver"]; + if (Options["Virtual_Style"] == "virtual_loss") { + searchSettings.virtualStyle = VIRTUAL_LOSS; + } + else if (Options["Virtual_Style"] == "virtual_visit") { + searchSettings.virtualStyle = VIRTUAL_VISIT; + } + else if (Options["Virtual_Style"] == "virtual_offset") { + searchSettings.virtualStyle = VIRTUAL_OFFSET; + } + else if (Options["Virtual_Style"] == "virtual_mix") { + searchSettings.virtualStyle = VIRTUAL_MIX; + } + else { + info_string_important("Unknown option", Options["Virtual_Style"], "for Virtual_Style"); + } + searchSettings.virtualMixThreshold = Options["Virtual_Mix_Threshold"]; } void CrazyAra::init_play_settings() diff --git a/engine/src/uci/optionsuci.cpp b/engine/src/uci/optionsuci.cpp index 222bbff4d..bfee22ebe 100644 --- a/engine/src/uci/optionsuci.cpp +++ b/engine/src/uci/optionsuci.cpp @@ -111,7 +111,6 @@ void OptionsUCI::init(OptionsMap &o) #endif o["Centi_Temperature_Decay"] << Option(92, 0, 100); o["Centi_U_Init_Divisor"] << Option(100, 1, 99999); - o["Centi_Virtual_Loss"] << Option(100, 0, 99999); #if defined(MXNET) && defined(TENSORRT) o["Context"] << Option("gpu", {"cpu", "gpu"}); #elif defined (TORCH) @@ -192,6 +191,8 @@ void OptionsUCI::init(OptionsMap &o) o["UCI_Variant"] << Option(StateConstants::DEFAULT_UCI_VARIANT().c_str(), {StateConstants::DEFAULT_UCI_VARIANT().c_str(), StateConstants::DEFAULT_UCI_VARIANT().c_str()}); #endif o["Use_Raw_Network"] << Option(false); + o["Virtual_Style"] << Option("virtual_mix", { "virtual_loss", "virtual_visit", "virtual_offset", "virtual_mix" }); + o["Virtual_Mix_Threshold"] << Option(1000, 1, 99999999); // additional UCI-Options for RL only #ifdef USE_RL o["Centi_Node_Random_Factor"] << Option(10, 0, 100); diff --git a/engine/src/util/blazeutil.h b/engine/src/util/blazeutil.h index 96c9a3265..d8aef7841 100644 --- a/engine/src/util/blazeutil.h +++ b/engine/src/util/blazeutil.h @@ -148,7 +148,7 @@ template void first_and_second_max(const DynamicVector& v, U endIdx, T& firstMax, T& secondMax, U& firstArg, U& secondArg) { firstMax = v[0]; - secondMax = -INT_MAX; + secondMax = v[0]; firstArg = 0; secondArg = 0; for (size_t idx = 1; idx < endIdx; ++idx) {