feat: Add the 'decisive moves' simulation policy option

The 'decisive moves' simulation policy is as follows: If there is a move which leads to an immidiate win, then play this move. Otherwise, play a random move.
snowfrogdev · May 3, 2018 · 8e9bcfb · 8e9bcfb
1 parent 7ec18c1
commit 8e9bcfb
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 75 deletions.
diff --git a/src/controller.ts b/src/controller.ts
@@ -11,7 +11,7 @@ import { DataStore } from './data-store'
 import { DefaultSelect } from './mcts/select/select'
 import { DefaultExpand } from './mcts/expand/expand'
 import { UCB1, DefaultUCB1, DefaultBestChild } from './mcts/select/best-child/best-child'
-import { DefaultSimulate } from './mcts/simulate/simulate'
+import { DefaultSimulate, Simulate, DecisiveMoveSimulate } from './mcts/simulate/simulate'
 import { DefaultBackPropagate } from './mcts/back-propagate/back-propagate'
 
 /**
@@ -27,6 +27,7 @@ export class Controller<State extends Playerwise, Action> {
   private mcts_!: MCTSFacade<State, Action>
   private duration_!: number
   private explorationParam_!: number
+  private simulate_!: string[]
 
   /**
    * Creates an instance of Controller.
@@ -52,9 +53,14 @@ export class Controller<State extends Playerwise, Action> {
     config: {
       duration: number
       explorationParam?: number
+      simulate?: string[]
     }
   ) {
-    this.init(funcs, config)
+    this.duration_ = config.duration
+    this.explorationParam_ = config.explorationParam || 1.414
+    this.simulate_ = config.simulate || []
+
+    this.init(funcs)
   }
 
   /**
@@ -72,34 +78,37 @@ export class Controller<State extends Playerwise, Action> {
    *     }} config
    * @memberof Controller
    */
-  init(
-    funcs: {
-      generateActions: GenerateActions<State, Action>
-      applyAction: ApplyAction<State, Action>
-      stateIsTerminal: StateIsTerminal<State>
-      calculateReward: CalculateReward<State>
-    },
-    config: {
-      duration: number
-      explorationParam?: number
-    }
-  ) {
-    this.duration_ = config.duration
-
+  init(funcs: {
+    generateActions: GenerateActions<State, Action>
+    applyAction: ApplyAction<State, Action>
+    stateIsTerminal: StateIsTerminal<State>
+    calculateReward: CalculateReward<State>
+  }) {
     // This is where we bootstrap the library according to initialization options.
-    this.explorationParam_ = config.explorationParam || 1.414
     const data: Map<string, MCTSState<State, Action>> = new Map()
     const dataStore = new DataStore(data)
     const expand = new DefaultExpand(funcs.applyAction, funcs.generateActions, dataStore)
     const UCB1: UCB1<State, Action> = new DefaultUCB1()
     const bestChild = new DefaultBestChild(UCB1)
     const select = new DefaultSelect(funcs.stateIsTerminal, expand, bestChild)
-    const simulate = new DefaultSimulate(
-      funcs.stateIsTerminal,
-      funcs.generateActions,
-      funcs.applyAction,
-      funcs.calculateReward
-    )
+
+    let simulate: Simulate<State, Action>
+    if (this.simulate_.includes('decisive')) {
+      simulate = new DecisiveMoveSimulate(
+        funcs.stateIsTerminal,
+        funcs.generateActions,
+        funcs.applyAction,
+        funcs.calculateReward
+      )
+    } else {
+      simulate = new DefaultSimulate(
+        funcs.stateIsTerminal,
+        funcs.generateActions,
+        funcs.applyAction,
+        funcs.calculateReward
+      )
+    }
+
     const backPropagate = new DefaultBackPropagate()
     this.mcts_ = new DefaultMCTSFacade(
       select,
@@ -109,7 +118,7 @@ export class Controller<State extends Playerwise, Action> {
       bestChild,
       funcs.generateActions,
       dataStore,
-      config.duration,
+      this.duration_,
       this.explorationParam_
     )
   }

diff --git a/src/entities.ts b/src/entities.ts
@@ -79,6 +79,9 @@ export interface StateIsTerminal<State extends Playerwise> {
  * and a `number` representing the player, as arguments. Given the game `State`,
  * it calculates a reward for the player and returns that reward as a `number`.
  *
+ * Normaly, you would want a win to return 1, a loss to return -1 and a draw
+ * to return 0 but you can decide on a different reward scheme.
+ *
  * ### Example
  * ```javascript
  * function(state, player) {

diff --git a/src/macao.ts b/src/macao.ts
@@ -1,12 +1,12 @@
-import { DefaultMCTSFacade } from './mcts'
+import { DefaultMCTSFacade } from './mcts/mcts'
 import { Controller } from './controller'
 import {
   Playerwise,
   GenerateActions,
   ApplyAction,
   StateIsTerminal,
   CalculateReward
-} from './classes'
+} from './entities'
 
 /**
  * The `Macao` class represents a Monte Carlo tree search that can be easily
@@ -65,7 +65,9 @@ export class Macao<State extends Playerwise, Action> {
    * @param {CalculateReward<State>} funcs.calculateReward
    * @param {object} config Configuration options
    * @param {number} config.duration Run time of the algorithm, in milliseconds.
-   * @param {number | undefined} config.explorationParam - The exploration parameter constant
+   * @param {number | undefined} config.explorationParam The exploration parameter constant.
+   * @param {string[]} config.simulate An array of the simulation algorithm enhancements
+   * you wish to use.
    * used in [UCT](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search). Defaults to 1.414.
    */
   constructor(
@@ -77,7 +79,12 @@ export class Macao<State extends Playerwise, Action> {
     },
     config: {
       duration: number
-      explorationParam?: number
+      explorationParam: number
+      /**
+       * An array of the `simulate` algorithm enhancements you wish to use.
+       * Valid options: "decisive".
+       */
+      simulate?: string[]
     }
   ) {
     this.controller_ = new Controller(funcs, config)
@@ -96,20 +103,4 @@ export class Macao<State extends Playerwise, Action> {
   getAction(state: State, duration?: number): Action {
     return this.controller_.getAction(state, duration)
   }
-
-  init(
-    funcs: {
-      generateActions: GenerateActions<State, Action>
-      applyAction: ApplyAction<State, Action>
-      stateIsTerminal: StateIsTerminal<State>
-      calculateReward: CalculateReward<State>
-    },
-    config: {
-      duration: number
-      explorationParam?: number
-    }
-  ): this {
-    this.controller_.init(funcs, config)
-    return this
-  }
 }
diff --git a/src/mcts/simulate/simulate.ts b/src/mcts/simulate/simulate.ts
@@ -8,27 +8,20 @@ import {
 import { spliceRandom } from '../../utils'
 
 /**
- *
+ * The Simulate interface provides a playthrough of the game and is
+ * a part of the Monte Carlo Tree Search algorithm.
  * @hidden
  * @internal
- * @export
- * @interface Simulate
- * @template State
- * @template Action
  */
 export interface Simulate<State, Action> {
   run: (state: State) => number
 }
 
 /**
- *
+ * The DefaultSimulate class provides, trough it's [[run]] method,
+ * a standard playthrough of the game where each move is selected entirely at random.
  * @hidden
  * @internal
- * @export
- * @class DefaultSimulate
- * @implements {Simulate<State, Action>}
- * @template State
- * @template Action
  */
 export class DefaultSimulate<State extends Playerwise, Action> implements Simulate<State, Action> {
   /**
@@ -47,9 +40,11 @@ export class DefaultSimulate<State extends Playerwise, Action> implements Simula
   ) {}
 
   /**
-   *
-   *
-   * @param {State} state
+   * The `run` method of the [[DefaultSimulate]] class runs a standard,
+   * entirely random, simulation of the game and returns a number representing
+   * the result of the simulation from the perspective of the player who's just
+   * played a move and is now waiting for his opponent's turn.
+   * @param {State} state An object representing the state of the game.
    * @returns {number}
    * @memberof DefaultSimulate
    */
@@ -68,3 +63,67 @@ export class DefaultSimulate<State extends Playerwise, Action> implements Simula
     return this.calculateReward_(state, player)
   }
 }
+
+/**
+ * The DecisiveMoveSimulate class provides, trough it's [[run]] method,
+ * a playthrough of the game where each time there is a winning move to be played,
+ * that move is selected, otherwise moves are selected at random.
+ * See ["On the Huge Benefit of Decisive Moves in Monte-Carlo Tree
+ * Search Algorithms"](https://hal.inria.fr/inria-00495078/document) - Teytaud & Teytaud
+ * @hidden
+ * @internal
+ */
+export class DecisiveMoveSimulate<State extends Playerwise, Action>
+  implements Simulate<State, Action> {
+  /**
+   * Creates an instance of DecisiveMoveSimulate
+   * @param {StateIsTerminal<State>} stateIsTerminal_
+   * @param {GenerateActions<State, Action>} generateActions_
+   * @param {ApplyAction<State, Action>} applyAction_
+   * @param {CalculateReward<State>} calculateReward_
+   * @memberof DefaultSimulate
+   */
+  constructor(
+    private stateIsTerminal_: StateIsTerminal<State>,
+    private generateActions_: GenerateActions<State, Action>,
+    private applyAction_: ApplyAction<State, Action>,
+    private calculateReward_: CalculateReward<State>
+  ) {}
+
+  /**
+   * The `run` method of the [[DecisiveMoveSimulate]] class runs a simulated
+   * playthrough of the game and returns a number representing
+   * the result of the simulation from the perspective of the player who's just
+   * played a move and is now waiting for his opponent's turn.
+   *
+   * During the simulation, each time there is a winning move to be played,
+   * that move is selected, otherwise moves are selected at random.   *
+   * @param {State} state An object representing the state of the game.
+   * @returns {number}
+   * @memberof DefaultSimulate
+   */
+  run(state: State): number {
+    const player = state.player
+    while (!this.stateIsTerminal_(state)) {
+      // Generate possible actions
+      const actions = this.generateActions_(state)
+
+      let action: Action | undefined
+      // Play all possible moves until you find a winning move and if you do, that is the action to play
+      for (const move of actions) {
+        const innerState = this.applyAction_(state, move)
+        const result = this.calculateReward_(innerState, innerState.player)
+        if (result === 1) {
+          action = move
+          break
+        }
+      }
+
+      if (!action) action = spliceRandom(actions)
+
+      // Apply action and create new state
+      state = this.applyAction_(state, action)
+    }
+    return this.calculateReward_(state, player)
+  }
+}
diff --git a/test/macao.test.ts b/test/macao.test.ts
@@ -27,20 +27,4 @@ describe('The Macao instance', () => {
       expect(macao.getAction(state)).toBeDefined()
     })
   })
-
-  describe('when calling init', () => {
-    it('should return itself for chaining', () => {
-      expect(
-        macao.init(
-          {
-            stateIsTerminal: ticTacToeFuncs.stateIsTerminal,
-            generateActions: ticTacToeFuncs.generateActions,
-            applyAction: ticTacToeFuncs.applyAction,
-            calculateReward: ticTacToeFuncs.calculateReward
-          },
-          { duration: 50 }
-        )
-      ).toBe(macao)
-    })
-  })
 })
diff --git a/test/mcts.test.ts b/test/mcts.test.ts
@@ -15,8 +15,9 @@ import {
   DefaultBestChild
 } from '../src/mcts/select/best-child/best-child'
 import { Select, DefaultSelect } from '../src/mcts/select/select'
-import { Simulate, DefaultSimulate } from '../src/mcts/simulate/simulate'
+import { Simulate, DefaultSimulate, DecisiveMoveSimulate } from '../src/mcts/simulate/simulate'
 import { BackPropagate, DefaultBackPropagate } from '../src/mcts/back-propagate/back-propagate'
+import { loopFor } from '../src/utils'
 
 let dataStore: DataGateway<string, MCTSState<TicTacToeState, TicTacToeMove>>
 let expand: Expand<TicTacToeState, TicTacToeMove>
@@ -175,6 +176,38 @@ describe('The DefaultSimulate instance', () => {
   })
 })
 
+describe('The DecisiveMoveSimulate instance', () => {
+  let simulate = new DecisiveMoveSimulate(
+    ticTacToeFuncs.stateIsTerminal,
+    ticTacToeFuncs.generateActions,
+    ticTacToeFuncs.applyAction,
+    ticTacToeFuncs.calculateReward
+  )
+  it('returns a number that is either 1, 0 or -1', () => {
+    const ticTacToeBoard = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
+    const state: TicTacToeState = {
+      board: ticTacToeBoard,
+      player: 1
+    }
+    expect(simulate.run(state)).toBeGreaterThanOrEqual(-1)
+    expect(simulate.run(state)).toBeLessThanOrEqual(1)
+  })
+  describe('When there is a winning move possible', () => {
+    it('should always play that move', () => {
+      let result = 0
+      loopFor(50).turns(() => {
+        const ticTacToeBoard = [[1, -1, 0], [0, 1, 0], [-1, 0, 0]]
+        const state: TicTacToeState = {
+          board: ticTacToeBoard,
+          player: -1
+        }
+        result += simulate.run(state)
+      })
+      expect(result).toBe(-50)
+    })
+  })
+})
+
 describe('The DefaultMCTSFacade instance', () => {
   it('returns an action', () => {
     const ticTacToeBoard = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]