feat: Add First play urgency enhancement to selection phase

The First Play Urgency parameter allows the algorithm to exploit more than explore from the beginning when there is a good reason to do so. With FPU, the algorithm no longer waits to expand every child node before exploiting some of them.
snowfrogdev · May 5, 2018 · 3388c05 · 3388c05
1 parent 57072b3
commit 3388c05
Show file tree

Hide file tree

Showing 10 changed files with 310 additions and 258 deletions.
diff --git a/src/controller.ts b/src/controller.ts
@@ -32,7 +32,10 @@ export class Controller<State extends Playerwise, Action> {
   private mcts_!: MCTSFacade<State, Action>
   private duration_!: number
   private explorationParam_!: number
+  private fpuParam_!: number
   private simulate_!: string[]
+  private expand_!: string[]
+  private select_!: string[]
 
   /**
    * Creates an instance of Controller.
@@ -58,12 +61,18 @@ export class Controller<State extends Playerwise, Action> {
     config: {
       duration: number
       explorationParam?: number
+      fpuParam?: number
       simulate?: string[]
+      expand?: string[]
+      select?: string[]
     }
   ) {
     this.duration_ = config.duration
     this.explorationParam_ = config.explorationParam || 1.414
+    this.fpuParam_ = config.fpuParam || Infinity
     this.simulate_ = config.simulate || []
+    this.expand_ = config.expand || []
+    this.select_ = config.select || []
 
     this.init(funcs)
   }
@@ -92,10 +101,12 @@ export class Controller<State extends Playerwise, Action> {
     // This is where we bootstrap the library according to initialization options.
     const data: Map<string, MCTSState<State, Action>> = new Map()
     const dataStore = new DataStore(data)
+    const ucb1: UCB1<State, Action> = new DefaultUCB1(this.explorationParam_)
+    const bestChild = new DefaultBestChild(ucb1)
+
     const expand = new DefaultExpand(funcs.applyAction, funcs.generateActions, dataStore)
-    const UCB1: UCB1<State, Action> = new DefaultUCB1()
-    const bestChild = new DefaultBestChild(UCB1)
-    const select = new DefaultSelect(funcs.stateIsTerminal, expand, bestChild)
+
+    const select = new DefaultSelect(funcs.stateIsTerminal, expand, bestChild, ucb1, this.fpuParam_)
 
     let simulate: Simulate<State, Action>
     if (this.simulate_.includes('decisive')) {

diff --git a/src/macao.ts b/src/macao.ts
@@ -66,9 +66,16 @@ export class Macao<State extends Playerwise, Action> {
    * @param {object} config Configuration options
    * @param {number} config.duration Run time of the algorithm, in milliseconds.
    * @param {number | undefined} config.explorationParam The exploration parameter constant.
+   * Used in [UCT](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search). Defaults to 1.414.
+   * @param {number | undefined} config.fpuParam The First play urgency parameter. Used to encourage
+   * early exploitation. Defaults to `Infinity`.
+   * See [Exploration exploitation in Go:
+   * UCT for Monte-Carlo Go](https://hal.archives-ouvertes.fr/hal-00115330/document)
    * @param {string[]} config.simulate An array of the simulation algorithm enhancements
    * you wish to use.
-   * used in [UCT](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search). Defaults to 1.414.
+   * @param {string[]} config.expand An array of the expand algorithm enhancements
+   * you wish to use.
+   *
    */
   constructor(
     funcs: {
@@ -80,11 +87,22 @@ export class Macao<State extends Playerwise, Action> {
     config: {
       duration: number
       explorationParam?: number
+      fpuParam?: number
       /**
        * An array of the `simulate` algorithm enhancements you wish to use.
        * Valid options: "decisive", "anti-decisive".
        */
       simulate?: string[]
+      /**
+       * An array of the `expand` algorithm enhancements you wish to use.
+       * Valid options: none at the moment.
+       */
+      expand?: string[]
+      /**
+       * An array of the `select` algorithm enhancements you wish to use.
+       * Valid options: none at the moment.
+       */
+      select?: string[]
     }
   ) {
     this.controller_ = new Controller(funcs, config)

diff --git a/src/mcts/expand/expand.ts b/src/mcts/expand/expand.ts
@@ -1,6 +1,7 @@
 import { MCTSNode, Playerwise, ApplyAction, GenerateActions, MCTSState } from '../../entities'
 import { DataGateway } from '../mcts'
 import { spliceRandom } from '../../utils'
+import { BestChild } from '../select/best-child/best-child'
 
 /**
  *
@@ -26,26 +27,12 @@ export interface Expand<State, Action> {
  * @template Action
  */
 export class DefaultExpand<State extends Playerwise, Action> implements Expand<State, Action> {
-  /**
-   * Creates an instance of DefaultExpand.
-   * @param {ApplyAction<State, Action>} applyAction_
-   * @param {GenerateActions<State, Action>} generateActions_
-   * @param {DataGateway<string, MCTSState<State, Action>>} dataStore_
-   * @memberof DefaultExpand
-   */
   constructor(
     private applyAction_: ApplyAction<State, Action>,
     private generateActions_: GenerateActions<State, Action>,
     private dataStore_: DataGateway<string, MCTSState<State, Action>>
   ) {}
 
-  /**
-   *
-   *
-   * @param {MCTSNode<State, Action>} node
-   * @returns {MCTSNode<State, Action>}
-   * @memberof DefaultExpand
-   */
   run(node: MCTSNode<State, Action>): MCTSNode<State, Action> {
     const action = spliceRandom(node.possibleActionsLeftToExpand)
     const state = this.applyAction_(node.mctsState.state, action)

diff --git a/src/mcts/mcts.ts b/src/mcts/mcts.ts
@@ -88,12 +88,12 @@ export class DefaultMCTSFacade<State extends Playerwise, Action>
   getAction(state: State, duration?: number): Action {
     const rootNode = this.createRootNode_(state)
     loopFor(duration || this.duration_).milliseconds(() => {
-      const node = this.select_.run(rootNode, this.explorationParam_)
+      const node = this.select_.run(rootNode)
       const score = this.simulate_.run(node.mctsState.state)
       this.backPropagate_.run(node, score)
     })
-    const bestChild = this.bestChild_.run(rootNode, 0)
-    return bestChild.action as Action
+    const bestChild = this.bestChild_.run(rootNode, true)
+    return bestChild!.action as Action
   }
 
   /**

diff --git a/src/mcts/select/best-child/best-child.ts b/src/mcts/select/best-child/best-child.ts
@@ -10,7 +10,7 @@ import { MCTSNode, Playerwise, MCTSState } from '../../../entities'
  * @template Action
  */
 export interface BestChild<State, Action> {
-  run: (node: MCTSNode<State, Action>, explorationParam: number) => MCTSNode<State, Action>
+  run: (node: MCTSNode<State, Action>, exploit?: boolean) => MCTSNode<State, Action> | undefined
 }
 
 /**
@@ -40,14 +40,14 @@ export class DefaultBestChild<State extends Playerwise, Action>
    * @returns {MCTSNode<State, Action>}
    * @memberof DefaultBestChild
    */
-  run(node: MCTSNode<State, Action>, explorationParam: number): MCTSNode<State, Action> {
+  run(node: MCTSNode<State, Action>, exploit = false): MCTSNode<State, Action> | undefined {
     if (!node.children.length) {
-      throw new Error('Cannot find the best children as the current node does not have children')
+      return undefined
     }
 
     const selectedNode = node.children.reduce((p, c) => {
-      return this.UCB1_.run(node.mctsState, p.mctsState, explorationParam) >
-        this.UCB1_.run(node.mctsState, c.mctsState, explorationParam)
+      return this.UCB1_.run(node.mctsState, p.mctsState, exploit) >
+        this.UCB1_.run(node.mctsState, c.mctsState, exploit)
         ? p
         : c
     })
@@ -66,11 +66,7 @@ export class DefaultBestChild<State extends Playerwise, Action>
  * @template Action
  */
 export interface UCB1<State, Action> {
-  run(
-    parent: MCTSState<State, Action>,
-    child: MCTSState<State, Action>,
-    explorationParam: number
-  ): number
+  run(parent: MCTSState<State, Action>, child: MCTSState<State, Action>, exploit?: boolean): number
 }
 
 /**
@@ -84,6 +80,7 @@ export interface UCB1<State, Action> {
  * @template Action
  */
 export class DefaultUCB1<State, Action> implements UCB1<State, Action> {
+  constructor(private explorationParam_: number) {}
   /**
    *
    *
@@ -93,13 +90,10 @@ export class DefaultUCB1<State, Action> implements UCB1<State, Action> {
    * @returns {number}
    * @memberof DefaultUCB1
    */
-  run(
-    parent: MCTSState<State, Action>,
-    child: MCTSState<State, Action>,
-    explorationParam: number
-  ): number {
+  run(parent: MCTSState<State, Action>, child: MCTSState<State, Action>, exploit = false): number {
+    if (exploit) this.explorationParam_ = 0
     const exploitationTerm = child.reward / child.visits
     const explorationTerm = Math.sqrt(Math.log(parent.visits) / child.visits)
-    return exploitationTerm + explorationParam * explorationTerm
+    return exploitationTerm + this.explorationParam_ * explorationTerm
   }
 }
diff --git a/src/mcts/select/select.ts b/src/mcts/select/select.ts
@@ -1,9 +1,11 @@
 import { MCTSNode, Playerwise, StateIsTerminal } from '../../entities'
 import { Expand } from '../expand/expand'
-import { BestChild } from './best-child/best-child'
+import { BestChild, UCB1 } from './best-child/best-child'
 
 /**
- *
+ * The Select interface represents the Selection part of the Monte Carlo Tree
+ * Search algorithm. This part of the algorithm deals with choosing which node
+ * in the tree to run a simulation on.
  * @hidden
  * @internal
  * @export
@@ -12,47 +14,39 @@ import { BestChild } from './best-child/best-child'
  * @template Action
  */
 export interface Select<State, Action> {
-  run: (node: MCTSNode<State, Action>, explorationParam: number) => MCTSNode<State, Action>
+  run: (node: MCTSNode<State, Action>) => MCTSNode<State, Action>
 }
 
 /**
- *
+ * The DefaultSelect class provides the standard Monte Carlo Tree Search algorithm
+ * with the selection phase. Through it's [[run]] method, when supplied with a tree
+ * node, it will provide another tree node from which to run a simulation.
  * @hidden
  * @internal
- * @export
- * @class DefaultSelect
  * @implements {Select<State, Action>}
  * @template State
  * @template Action
  */
 export class DefaultSelect<State extends Playerwise, Action> implements Select<State, Action> {
-  /**
-   * Creates an instance of DefaultSelect.
-   * @param {StateIsTerminal<State>} stateIsTerminal_
-   * @param {Expand<State, Action>} expand_
-   * @param {BestChild<State, Action>} bestChild_
-   * @memberof DefaultSelect
-   */
   constructor(
     private stateIsTerminal_: StateIsTerminal<State>,
     private expand_: Expand<State, Action>,
-    private bestChild_: BestChild<State, Action>
+    private bestChild_: BestChild<State, Action>,
+    private ucb1_: UCB1<State, Action>,
+    private fpuParam_: number
   ) {}
 
-  /**
-   *
-   *
-   * @param {MCTSNode<State, Action>} node
-   * @param {number} explorationParam
-   * @returns {MCTSNode<State, Action>}
-   * @memberof DefaultSelect
-   */
-  run(node: MCTSNode<State, Action>, explorationParam: number): MCTSNode<State, Action> {
+  run(node: MCTSNode<State, Action>): MCTSNode<State, Action> {
     while (!this.stateIsTerminal_(node.mctsState.state)) {
+      const child = this.bestChild_.run(node)
+      if (!child) return this.expand_.run(node)
       if (node.isNotFullyExpanded()) {
-        return this.expand_.run(node)
+        const ucb1 = this.ucb1_.run(node.mctsState, child.mctsState)
+        if (ucb1 < this.fpuParam_) {
+          return this.expand_.run(node)
+        }
       }
-      node = this.bestChild_.run(node, explorationParam)
+      node = child
     }
     return node
   }

diff --git a/test/mcts.test.ts b/test/mcts.test.ts
@@ -7,7 +7,7 @@ import {
   ticTacToeBoard
 } from './tic-tac-toe/tic-tac-toe'
 import { MCTSState, MCTSNode } from '../src/entities'
-import { Expand, DefaultExpand } from '../src/mcts/expand/expand'
+import { Expand, DefaultExpand, FullExpand } from '../src/mcts/expand/expand'
 import {
   BestChild,
   UCB1,
@@ -37,9 +37,9 @@ beforeEach(() => {
   const map = new Map()
   dataStore = new DataStore(map)
   expand = new DefaultExpand(ticTacToeFuncs.applyAction, ticTacToeFuncs.generateActions, dataStore)
-  ucb1 = new DefaultUCB1()
+  ucb1 = new DefaultUCB1(1.414)
   bestChild = new DefaultBestChild(ucb1)
-  select = new DefaultSelect(ticTacToeFuncs.stateIsTerminal, expand, bestChild)
+  select = new DefaultSelect(ticTacToeFuncs.stateIsTerminal, expand, bestChild, ucb1, Infinity)
   simulate = new DefaultSimulate(
     ticTacToeFuncs.stateIsTerminal,
     ticTacToeFuncs.generateActions,
@@ -70,7 +70,7 @@ describe('The DefaultSelect instance', () => {
     const mtcsState = new MCTSState(state)
     const node = new MCTSNode(mtcsState, ticTacToeFuncs.generateActions(state))
     it('should return the current node', () => {
-      expect(select.run(node, 1.414)).toBe(node)
+      expect(select.run(node)).toBe(node)
     })
   })
   describe('when the current node is not terminal', () => {
@@ -83,7 +83,7 @@ describe('The DefaultSelect instance', () => {
     it('should return a node that is not the current node.', () => {
       const mtcsState = new MCTSState(state)
       const node = new MCTSNode(mtcsState, ticTacToeFuncs.generateActions(state))
-      const result = select.run(node, 1.414)
+      const result = select.run(node)
       expect(result).toBeInstanceOf(MCTSNode)
       expect(result).not.toBe(node)
     })
@@ -103,7 +103,7 @@ describe('The DefaultUCB1 function', () => {
       parent.visits = 300
       child.visits = 100
       child.reward = 50
-      expect(ucb1.run(parent, child, 1.414)).toBeCloseTo(0.8377)
+      expect(ucb1.run(parent, child)).toBeCloseTo(0.8377)
     })
   })
 })
@@ -141,7 +141,7 @@ describe('The DefaultBestChild instance', () => {
       child3State.visits = 50
       child3State.reward = 25
 
-      expect(bestChild.run(parentNode, 1.414)).toBe(parentNode.children[2])
+      expect(bestChild.run(parentNode)).toBe(parentNode.children[2])
     })
   })
 })