Skip to content

Commit

Permalink
feat: Add the 'decisive moves' simulation policy option
Browse files Browse the repository at this point in the history
The 'decisive moves' simulation policy is as follows: If there is a move which leads to an immidiate
win, then play this move. Otherwise, play a random move.
  • Loading branch information
Philippe Vaillancourt committed May 3, 2018
1 parent 7ec18c1 commit 8e9bcfb
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 75 deletions.
57 changes: 33 additions & 24 deletions src/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { DataStore } from './data-store'
import { DefaultSelect } from './mcts/select/select'
import { DefaultExpand } from './mcts/expand/expand'
import { UCB1, DefaultUCB1, DefaultBestChild } from './mcts/select/best-child/best-child'
import { DefaultSimulate } from './mcts/simulate/simulate'
import { DefaultSimulate, Simulate, DecisiveMoveSimulate } from './mcts/simulate/simulate'
import { DefaultBackPropagate } from './mcts/back-propagate/back-propagate'

/**
Expand All @@ -27,6 +27,7 @@ export class Controller<State extends Playerwise, Action> {
private mcts_!: MCTSFacade<State, Action>
private duration_!: number
private explorationParam_!: number
private simulate_!: string[]

/**
* Creates an instance of Controller.
Expand All @@ -52,9 +53,14 @@ export class Controller<State extends Playerwise, Action> {
config: {
duration: number
explorationParam?: number
simulate?: string[]
}
) {
this.init(funcs, config)
this.duration_ = config.duration
this.explorationParam_ = config.explorationParam || 1.414
this.simulate_ = config.simulate || []

this.init(funcs)
}

/**
Expand All @@ -72,34 +78,37 @@ export class Controller<State extends Playerwise, Action> {
* }} config
* @memberof Controller
*/
init(
funcs: {
generateActions: GenerateActions<State, Action>
applyAction: ApplyAction<State, Action>
stateIsTerminal: StateIsTerminal<State>
calculateReward: CalculateReward<State>
},
config: {
duration: number
explorationParam?: number
}
) {
this.duration_ = config.duration

init(funcs: {
generateActions: GenerateActions<State, Action>
applyAction: ApplyAction<State, Action>
stateIsTerminal: StateIsTerminal<State>
calculateReward: CalculateReward<State>
}) {
// This is where we bootstrap the library according to initialization options.
this.explorationParam_ = config.explorationParam || 1.414
const data: Map<string, MCTSState<State, Action>> = new Map()
const dataStore = new DataStore(data)
const expand = new DefaultExpand(funcs.applyAction, funcs.generateActions, dataStore)
const UCB1: UCB1<State, Action> = new DefaultUCB1()
const bestChild = new DefaultBestChild(UCB1)
const select = new DefaultSelect(funcs.stateIsTerminal, expand, bestChild)
const simulate = new DefaultSimulate(
funcs.stateIsTerminal,
funcs.generateActions,
funcs.applyAction,
funcs.calculateReward
)

let simulate: Simulate<State, Action>
if (this.simulate_.includes('decisive')) {
simulate = new DecisiveMoveSimulate(
funcs.stateIsTerminal,
funcs.generateActions,
funcs.applyAction,
funcs.calculateReward
)
} else {
simulate = new DefaultSimulate(
funcs.stateIsTerminal,
funcs.generateActions,
funcs.applyAction,
funcs.calculateReward
)
}

const backPropagate = new DefaultBackPropagate()
this.mcts_ = new DefaultMCTSFacade(
select,
Expand All @@ -109,7 +118,7 @@ export class Controller<State extends Playerwise, Action> {
bestChild,
funcs.generateActions,
dataStore,
config.duration,
this.duration_,
this.explorationParam_
)
}
Expand Down
3 changes: 3 additions & 0 deletions src/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ export interface StateIsTerminal<State extends Playerwise> {
* and a `number` representing the player, as arguments. Given the game `State`,
* it calculates a reward for the player and returns that reward as a `number`.
*
* Normaly, you would want a win to return 1, a loss to return -1 and a draw
* to return 0 but you can decide on a different reward scheme.
*
* ### Example
* ```javascript
* function(state, player) {
Expand Down
31 changes: 11 additions & 20 deletions src/macao.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import { DefaultMCTSFacade } from './mcts'
import { DefaultMCTSFacade } from './mcts/mcts'
import { Controller } from './controller'
import {
Playerwise,
GenerateActions,
ApplyAction,
StateIsTerminal,
CalculateReward
} from './classes'
} from './entities'

/**
* The `Macao` class represents a Monte Carlo tree search that can be easily
Expand Down Expand Up @@ -65,7 +65,9 @@ export class Macao<State extends Playerwise, Action> {
* @param {CalculateReward<State>} funcs.calculateReward
* @param {object} config Configuration options
* @param {number} config.duration Run time of the algorithm, in milliseconds.
* @param {number | undefined} config.explorationParam - The exploration parameter constant
* @param {number | undefined} config.explorationParam The exploration parameter constant.
* @param {string[]} config.simulate An array of the simulation algorithm enhancements
* you wish to use.
* used in [UCT](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search). Defaults to 1.414.
*/
constructor(
Expand All @@ -77,7 +79,12 @@ export class Macao<State extends Playerwise, Action> {
},
config: {
duration: number
explorationParam?: number
explorationParam: number
/**
* An array of the `simulate` algorithm enhancements you wish to use.
* Valid options: "decisive".
*/
simulate?: string[]
}
) {
this.controller_ = new Controller(funcs, config)
Expand All @@ -96,20 +103,4 @@ export class Macao<State extends Playerwise, Action> {
getAction(state: State, duration?: number): Action {
return this.controller_.getAction(state, duration)
}

init(
funcs: {
generateActions: GenerateActions<State, Action>
applyAction: ApplyAction<State, Action>
stateIsTerminal: StateIsTerminal<State>
calculateReward: CalculateReward<State>
},
config: {
duration: number
explorationParam?: number
}
): this {
this.controller_.init(funcs, config)
return this
}
}
87 changes: 73 additions & 14 deletions src/mcts/simulate/simulate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,20 @@ import {
import { spliceRandom } from '../../utils'

/**
*
* The Simulate interface provides a playthrough of the game and is
* a part of the Monte Carlo Tree Search algorithm.
* @hidden
* @internal
* @export
* @interface Simulate
* @template State
* @template Action
*/
export interface Simulate<State, Action> {
run: (state: State) => number
}

/**
*
* The DefaultSimulate class provides, trough it's [[run]] method,
* a standard playthrough of the game where each move is selected entirely at random.
* @hidden
* @internal
* @export
* @class DefaultSimulate
* @implements {Simulate<State, Action>}
* @template State
* @template Action
*/
export class DefaultSimulate<State extends Playerwise, Action> implements Simulate<State, Action> {
/**
Expand All @@ -47,9 +40,11 @@ export class DefaultSimulate<State extends Playerwise, Action> implements Simula
) {}

/**
*
*
* @param {State} state
* The `run` method of the [[DefaultSimulate]] class runs a standard,
* entirely random, simulation of the game and returns a number representing
* the result of the simulation from the perspective of the player who's just
* played a move and is now waiting for his opponent's turn.
* @param {State} state An object representing the state of the game.
* @returns {number}
* @memberof DefaultSimulate
*/
Expand All @@ -68,3 +63,67 @@ export class DefaultSimulate<State extends Playerwise, Action> implements Simula
return this.calculateReward_(state, player)
}
}

/**
* The DecisiveMoveSimulate class provides, trough it's [[run]] method,
* a playthrough of the game where each time there is a winning move to be played,
* that move is selected, otherwise moves are selected at random.
* See ["On the Huge Benefit of Decisive Moves in Monte-Carlo Tree
* Search Algorithms"](https://hal.inria.fr/inria-00495078/document) - Teytaud & Teytaud
* @hidden
* @internal
*/
export class DecisiveMoveSimulate<State extends Playerwise, Action>
implements Simulate<State, Action> {
/**
* Creates an instance of DecisiveMoveSimulate
* @param {StateIsTerminal<State>} stateIsTerminal_
* @param {GenerateActions<State, Action>} generateActions_
* @param {ApplyAction<State, Action>} applyAction_
* @param {CalculateReward<State>} calculateReward_
* @memberof DefaultSimulate
*/
constructor(
private stateIsTerminal_: StateIsTerminal<State>,
private generateActions_: GenerateActions<State, Action>,
private applyAction_: ApplyAction<State, Action>,
private calculateReward_: CalculateReward<State>
) {}

/**
* The `run` method of the [[DecisiveMoveSimulate]] class runs a simulated
* playthrough of the game and returns a number representing
* the result of the simulation from the perspective of the player who's just
* played a move and is now waiting for his opponent's turn.
*
* During the simulation, each time there is a winning move to be played,
* that move is selected, otherwise moves are selected at random. *
* @param {State} state An object representing the state of the game.
* @returns {number}
* @memberof DefaultSimulate
*/
run(state: State): number {
const player = state.player
while (!this.stateIsTerminal_(state)) {
// Generate possible actions
const actions = this.generateActions_(state)

let action: Action | undefined
// Play all possible moves until you find a winning move and if you do, that is the action to play
for (const move of actions) {
const innerState = this.applyAction_(state, move)
const result = this.calculateReward_(innerState, innerState.player)
if (result === 1) {
action = move
break
}
}

if (!action) action = spliceRandom(actions)

// Apply action and create new state
state = this.applyAction_(state, action)
}
return this.calculateReward_(state, player)
}
}
16 changes: 0 additions & 16 deletions test/macao.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,4 @@ describe('The Macao instance', () => {
expect(macao.getAction(state)).toBeDefined()
})
})

describe('when calling init', () => {
it('should return itself for chaining', () => {
expect(
macao.init(
{
stateIsTerminal: ticTacToeFuncs.stateIsTerminal,
generateActions: ticTacToeFuncs.generateActions,
applyAction: ticTacToeFuncs.applyAction,
calculateReward: ticTacToeFuncs.calculateReward
},
{ duration: 50 }
)
).toBe(macao)
})
})
})
35 changes: 34 additions & 1 deletion test/mcts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ import {
DefaultBestChild
} from '../src/mcts/select/best-child/best-child'
import { Select, DefaultSelect } from '../src/mcts/select/select'
import { Simulate, DefaultSimulate } from '../src/mcts/simulate/simulate'
import { Simulate, DefaultSimulate, DecisiveMoveSimulate } from '../src/mcts/simulate/simulate'
import { BackPropagate, DefaultBackPropagate } from '../src/mcts/back-propagate/back-propagate'
import { loopFor } from '../src/utils'

let dataStore: DataGateway<string, MCTSState<TicTacToeState, TicTacToeMove>>
let expand: Expand<TicTacToeState, TicTacToeMove>
Expand Down Expand Up @@ -175,6 +176,38 @@ describe('The DefaultSimulate instance', () => {
})
})

describe('The DecisiveMoveSimulate instance', () => {
let simulate = new DecisiveMoveSimulate(
ticTacToeFuncs.stateIsTerminal,
ticTacToeFuncs.generateActions,
ticTacToeFuncs.applyAction,
ticTacToeFuncs.calculateReward
)
it('returns a number that is either 1, 0 or -1', () => {
const ticTacToeBoard = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
const state: TicTacToeState = {
board: ticTacToeBoard,
player: 1
}
expect(simulate.run(state)).toBeGreaterThanOrEqual(-1)
expect(simulate.run(state)).toBeLessThanOrEqual(1)
})
describe('When there is a winning move possible', () => {
it('should always play that move', () => {
let result = 0
loopFor(50).turns(() => {
const ticTacToeBoard = [[1, -1, 0], [0, 1, 0], [-1, 0, 0]]
const state: TicTacToeState = {
board: ticTacToeBoard,
player: -1
}
result += simulate.run(state)
})
expect(result).toBe(-50)
})
})
})

describe('The DefaultMCTSFacade instance', () => {
it('returns an action', () => {
const ticTacToeBoard = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
Expand Down

0 comments on commit 8e9bcfb

Please sign in to comment.