2_policy_evaluation.html

<html>

<head>
	<script src="https://d3js.org/d3.v4.min.js"></script>

</head>

<body>
	<div id="WriteUp">
		This is from Page 89 of https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf
		I implement Policy Evaluation. Dynamic Programming.
		DP assumes full model of environment. Computationally Expensive.
		Policy Evaluation is used to determine the State-Value function for a given policy pi.
		It can be solved in closed form. But we turn the Bellman Equation into an update rule, which eventually converges to the true state value function for a given policy, pi.
	</div>
	<div id="grid"></div>
	<div id="coords"></div>
	<div id="lastaction"></div>
</body>


<script>
var numRows = 10
var numCols = 10

function getData(){
	var xPos = 1
	var yPos = 1
	var widthPx = 100
	var heightPx = 100
	var data = new Array()

	for(var row = 0; row < numRows; row ++){
		data.push(new Array())

		for(var col = 0; col < numCols; col ++){
			data[row].push({
				xIdx: col,
				yIdx: row,
				x: xPos,
				y: yPos,
				width: widthPx,
				height: heightPx
			})
			xPos += widthPx
		}
		xPos = 1
		yPos += heightPx
	}

	return data
}

var gridData = getData()

function drawGrid(){
	var grid = d3.select("#grid")
		.append("svg")
		.attr("width", (100 * numCols + 2) + "px")
		.attr("height", (100 * numRows + 2) + "px")

	var row = grid.selectAll(".row")
		.data(gridData)
		.enter()
		.append("g")
		.attr("class", "row")

	var column = row.selectAll(".square")
		.data(function(d) { return d })
		.enter()
		.append("rect")
		.attr("class", "square")
		.attr("x", function(d){ return d.x })
		.attr("y", function(d){ return d.y })
		.attr("width", function(d){ return d.width })
		.attr("height", function(d){ return d.height })
		.attr("yIdx", function(d){ return d.yIdx })
		.attr("xIdx", function(d){ return d.xIdx })
		.attr("valueAtState", function(d){ return Math.round(normalized[d.yIdx][d.xIdx] * 255) })
		.style("fill", function(d){ return "rgb(" + (Math.round(normalized[d.yIdx][d.xIdx] * 255)) + ", " + (Math.round(normalized[d.yIdx][d.xIdx] * 255)) + ", " + (Math.round(normalized[d.yIdx][d.xIdx] * 255)) + ")"})
	    .style("stroke", "#222");
}

function drawBall(xPos, yPos) {

	d3.select(".circle")
		.remove()
		.exit()
		
	var ball = d3.select("#grid")
		.select("svg")
		.append("g")
		.attr("class", "circle")
		.append("circle")
		.attr("cx", (50 + 100 * (xPos)) + "px")
		.attr("cy", (50 + 100 * (yPos)) + "px")
		.attr("r", "25px")
		.style("fill", "red")
		.style("stroke", "black")
		.style("stroke-width", "3")
}

function printCoords(coords){
	d3.select("#coords")
		.select("p")
		.remove()
		.exit()

	d3.select("#coords")
		.append("p")
		.text("(" + coords + ")")
}

function printLastAction(lastaction){
	d3.select("#lastaction")
		.select("p")
		.remove()
		.exit()

	d3.select("#lastaction")
		.append("p")
		.text("Last Action: " + lastaction)

	if(collision){
		console.log("collision!")
		d3.select("#lastaction")
			.select("p")
			.style("color", "red")
	}
}

function getNextAction(){
	actions = ["n", "s", "w", "e"]
	return actions[Math.floor(Math.random() * actions.length)];
}

function takeAction(coords, action){
	if(action == 'n'){
		if (coords[1] == 0){
			collision = true;
			return coords
		}
		else
			return [coords[0], coords[1] - 1]
	} else if (action == 's'){
		if(coords[1] == numRows - 1){
			collision = true;
			return coords 
		}
		else
			return [coords[0], coords[1] + 1]
	} else if (action == 'w'){
		if(coords[0] == 0){
			collision = true
			return coords
		}
		else 
			return [coords[0] - 1, coords[1]]
	} else if (action == 'e'){
		if(coords[0] == numCols - 1){
			collision = true
			return coords
		}
		else 
			return [coords[0] + 1, coords[1]]
	}
}

function playGame(){
	collision = false
	action = getNextAction()
	coords = takeAction(coords, action)
	drawBall(coords[0], coords[1])
	printCoords(coords)
	printLastAction(action)
}

class ActionProbabilities {
	constructor(){
		this.actionProbs = {
			"n": 0.25,
			"s": 0.25,
			"e": 0.25,
			"w": 0.25
		};
	}
}


/* Policy Class */

class Policy {
  constructor(height, width) {
    this.rows = new Array (height);
    for (var y = 0; y < height; y ++){
    	this.rows[y] = new Array(width)
    	for (var x = 0; x < width; x ++){
    		this.rows[y][x] = new ActionProbabilities();
    	}
    }
  }

	getPolicyAtState(y, x){
		return (this.rows[y][x].actionProbs);
	}

}

class ValueFunction{
	constructor(height, width){
		this.height = height;
		this.width = width;
		this.rows = new Array(height);
		for (var y = 0; y < height; y++){
			this.rows[y] = new Array(width)
			for(var x = 0; x < numCols; x++){
				this.rows[y][x] = 0;
			}
		}
	}

	getValue(y, x){
		return (this.rows[y][x])
	}

	setValue(y, x, value){
		this.rows[y][x] = value;
	}

	print(){
		for (var y = 0; y < this.height; y++){
			var line = "";
			for(var x = 0; x < this.width; x++){
				//TODO: Pretty this print up at some point (for spaces)
				line = line + (Math.round(this.rows[y][x] * 1000) / 1000).toString() + "|";
			}
			console.log(line);
		}
	}

	getNormalized(){
		var normalized = new Array();
		var min = Infinity;
		var max = -Infinity;
		//Todo: Make more efficient.. if you care enough. Only gets run once at the end
		for (var y = 0; y < this.height; y++){
			normalized.push(new Array())
			for(var x = 0; x < this.width; x++){
				var abs = Math.abs(v.getValue(y, x))
				normalized[y].push(abs);
				if(abs < min)
					min = abs
				if(abs > max)
					max = abs
			}
		}

		for (var y = 0; y < this.height; y++){
			for(var x = 0; x < this.width; x++){
				normalized[y][x] = (normalized[y][x] - min) / (max - min)
			}
		}
		return normalized;
	}
}

class RewardFunction{
	constructor(height, width){
		this.rows = new Array(height);
		for (var y = 0; y < height; y++){
			this.rows[y] = new Array(width)
			for(var x = 0; x < numCols; x++){
				this.rows[y][x] = -1;
			}
		}
		this.rows[0][width - 1] = 0; //set goal state to top right
	}

	getReward(y, x){
		return (this.rows[y][x])
	}
}

p = new Policy(numRows, numCols);
v = new ValueFunction(numRows, numCols);
r = new RewardFunction(numRows, numCols);
gamma = 0.9;

function policyEvaluation(height, width){
	theta = 0.1;
	do {
		delta = 0;
		for (var y = 0; y < height; y++){
			for(var x = 0; x < width; x++){
				currentV = v.getValue(y, x);
				policyAtState = p.getPolicyAtState(y, x);
				newValueAtState = Object.keys(policyAtState).map(function(key, index){
					nextState = takeAction([y, x], key);
					rewardAtNextState = r.getReward(nextState[0], nextState[1]);
					discountedValueAtNextState = gamma * v.getValue(nextState[0], nextState[1]);
					return (policyAtState[key] * (rewardAtNextState + discountedValueAtNextState));
				}).reduce((a, b) => a + b, 0);
				v.setValue(y, x, newValueAtState);
				delta = Math.max(delta, Math.abs(currentV - newValueAtState));
				numIter ++;
			}
		}
	} while(delta >= theta)
}	

var numIter = 0;
policyEvaluation(numRows, numCols);
console.log(numIter);
normalized = v.getNormalized()


var collision = false
var coords = [0, numRows - 1] //Starting co-ords
drawGrid()
drawBall(coords[0], coords[1])
printCoords(coords)
//d3.interval(playGame, 1000)
</script>



</html>