index.html

<!DOCTYPE html>
<html lang="en-US">
	<head>
		<title>ML Race -- AryanM</title>
		<meta charset="utf-8">
		<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0">
		<meta name="author" content="Aryan M">
		<meta name="description" content="Visualize the difference between machine learning and hardcoded instructions, through a game!">
		<meta name="keywords" content="AryanM">
		<meta name="language" content="English">
		<link rel="icon" href="https://mittaldev.com/favicon.ico">
	</head>
	<body style="text-align:center;font-family:Verdana, Geneva, Tahoma, sans-serif">
		<center>
			<span style="font-size:2em;"><strong>ML Race</strong></span><br>
			Aryan Mittal<br>
			(Explanation in <a href="https://github.com/aryanm5/MLRace/" target="_blank">README</a>)<br><br>
			<canvas id="canvas" style="background-color:lightskyblue;border:1px solid black;" width="500" height="500">Sorry, your browser doesn't support the canvas element. This won't work!</canvas>
			<br><br>
			<div style="font-family:monospace;font-size:1.5em;">
				<span style="display:inline-block;width:200px">Machine Learning:<br><span id="agentscore">0</span></span>
				<span style="display:inline-block;width:200px">Hardcoded:<br><span id="hardcodescore">0</span></span>
			</div>
			<canvas id="graph" width="500" height="500"></canvas>
			<br>
			<span style="font-family:monospace">
				Average Reward: <span id="averageReward">0</span><br>
				Current Reward: <span id="reward">0</span><br><br>
				Steps Per Tick: <input type="number" id="stepsPerTick" min="1" value="10" style="width:5em">
			</span>
			<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.9.3/Chart.min.js"></script>
		</center>
	</body>
	<script type="text/javascript" src="rl.js"></script>
	<script>
		var c = document.getElementById("canvas");
		var ctx = c.getContext("2d");
		var cc = document.getElementById("graph");
		c.width = 500;
		c.height = 500;
		var WIDTH = c.width;
		var HEIGHT = c.height;
		var interval;
		var maxdist = Math.sqrt(2);
		var averageRewardSpan = document.getElementById("averageReward");
		var rewardSpan = document.getElementById("reward");
		var agentScoreSpan = document.getElementById("agentscore");
		var hardcodeScoreSpan = document.getElementById("hardcodescore");
		var gamestarted = false;
		
		//circle
		function circle(x, y, r, stroke) {
		    ctx.beginPath();
		    ctx.arc(x, y, r, 0, 2 * Math.PI);
		    if (stroke == undefined) {
		        ctx.fill();
		    } else {
		        ctx.stroke();
		    }
		    ctx.closePath(); //may be able to remove
		}
		//rectangle
		function rect(x, y, w, h) {
		    ctx.beginPath();
		    ctx.rect(x, y, w, h);
		    ctx.closePath();
		    ctx.fill();
		}
		//line
		function line(x, y, x2, y2) {
		    ctx.beginPath();
		    ctx.moveTo(x, y);
		    ctx.lineTo(x2, y2);
		    ctx.closePath();
		    ctx.stroke();
		}
		//clear
		function clear() {
		    ctx.clearRect(0, 0, WIDTH, HEIGHT);
		}
		
		var myagent = {
		    x: Math.random() * 0.9 + 0.05,
		    y: Math.random() * 0.9 + 0.05,
		    rad: 0.05,
		    xvel: 0,
		    yvel: 0,
		    velchange: 0.0005,
		    maxspeed: 0.01,
		    points: 0,
		    draw: function(action, reward) {
		        // color agent by reward
		        var r, g, b;
		        var vv = reward + 0.5;
		        var ms = 255.0;
		        if (vv > 0) {
		            g = 255;
		            r = 255 - vv * ms;
		            b = 255 - vv * ms;
		        }
		        if (vv < 0) {
		            g = 255 + vv * ms;
		            r = 255;
		            b = 255 + vv * ms;
		        }
		        var vcol = 'rgb(' + Math.floor(r) + ',' + Math.floor(g) + ',' + Math.floor(b) + ')';
		        ctx.fillStyle = "black";
		        circle(this.x * WIDTH, this.y * HEIGHT, this.rad * WIDTH + 1, true);
		        ctx.fillStyle = vcol;
		        circle(this.x * WIDTH, this.y * HEIGHT, this.rad * WIDTH);
		
		        ctx.fillStyle = "black";
		        ctx.lineWidth = 3;
		        if (action == 0) {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH, this.y * HEIGHT - this.rad * HEIGHT / 1.5);
		        } else if (action == 1) {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH, this.y * HEIGHT + this.rad * HEIGHT / 1.5);
		        } else if (action == 2) {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH - this.rad * WIDTH / 1.5, this.y * HEIGHT);
		        } else if (action == 3) {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH + this.rad * WIDTH / 1.5, this.y * HEIGHT);
		        }
		    }
		};
		
		var red = {
		    x: Math.random() * 0.9 + 0.05,
		    y: Math.random() * 0.9 + 0.05,
		    xvel: 0,
		    yvel: 0,
		    velchange: 0.00003,
		    points: 0,
		    rad: 0.05,
		    maxspeed: 0.00018,
		    step: {
		        x: 0,
		        y: 0
		    }, //step.x and step.y
		    color: "goldenrod",
		    draw: function(action) {
		        ctx.fillStyle = "black";
		        circle(this.x * WIDTH, this.y * HEIGHT, this.rad * WIDTH + 1, true);
		        ctx.fillStyle = this.color;
		        circle(this.x * WIDTH, this.y * HEIGHT, this.rad * WIDTH);
		
		        ctx.fillStyle = "black";
		        ctx.lineWidth = 3;
		        if (action == "up") {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH, this.y * HEIGHT - this.rad * HEIGHT / 1.5);
		        } else if (action == "down") {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH, this.y * HEIGHT + this.rad * HEIGHT / 1.5);
		        } else if (action == "left") {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH - this.rad * WIDTH / 1.5, this.y * HEIGHT);
		        } else if (action == "right") {
		            arrow(this.x * WIDTH, this.y * HEIGHT, this.x * WIDTH + this.rad * WIDTH / 1.5, this.y * HEIGHT);
		        }
		    }
		};
		var green = {
		    x: Math.random() * 0.9 + 0.05,
		    y: Math.random() * 0.9 + 0.05,
		    rad: 0.1,
		    color: "lightgreen",
		    innercolor: "green",
		    draw: function() {
		        ctx.fillStyle = this.color;
		        circle(this.x * WIDTH, this.y * HEIGHT, this.rad * WIDTH);
		        ctx.fillStyle = this.innercolor;
		        circle(this.x * WIDTH, this.y * HEIGHT, 0.01 * WIDTH);
		    }
		};
		
		var env = {};
		env.getNumStates = function() {
		    return 6;
		}
		env.getMaxNumActions = function() {
		    return 5;
		}
		
		// create the agent, yay!
		var spec = {}
		spec.update = 'qlearn'; // qlearn | sarsa
		spec.gamma = 0.9; // discount factor, [0, 1)
		spec.epsilon = 0.2; // initial epsilon for epsilon-greedy policy, [0, 1)
		spec.alpha = 0.01; // value function learning rate
		spec.experience_add_every = 10; // number of time steps before we add another experience to replay memory
		spec.experience_size = 5000; // size of experience replay memory
		spec.learning_steps_per_iteration = 20;
		spec.tderror_clamp = 1.0; // for robustness
		spec.num_hidden_units = 100 // number of neurons in hidden layer
		
		agent = new RL.DQNAgent(env, spec);
		
		var s = []; //agent x, y; agent velocity vx, vy; green x, y; red x, y
		var reward;
		var frame = 0;
		var stepsPerTick = 10; //higher means faster but choppier 1 good
		var totalScore = 0;
		var averageReward = 0;
		
		setInterval(function() { // start the learning loop
		    var action;
		    var redaction;
		    for (var i = 0; i < stepsPerTick; ++i) {
		        reward = 0;
		        s = [myagent.x, myagent.y, myagent.xvel, myagent.yvel, green.x, green.y];
		        action = agent.act(s); // s is an array of length 8, action is 0, 1, 2, or 3, or 4
		        // execute action in environment and get the reward
		
		        myagent.xvel *= 0.95; //dampener
		        myagent.yvel *= 0.95;
		        red.xvel *= 0.95;
		        red.yvel *= 0.95;
		
		        if (action === 0) {
		            myagent.yvel -= myagent.velchange;
		        } else if (action === 1) {
		            myagent.yvel += myagent.velchange;
		        } else if (action === 2) {
		            myagent.xvel -= myagent.velchange;
		        } else if (action === 3) {
		            myagent.xvel += myagent.velchange;
		        }
		        if (myagent.xvel > myagent.maxspeed) {
		            myagent.xvel = myagent.maxspeed;
		        }
		        if (myagent.yvel > myagent.maxspeed) {
		            myagent.yvel = myagent.maxspeed;
		        }
		        if (myagent.xvel < -1 * myagent.maxspeed) {
		            myagent.xvel = -1 * myagent.maxspeed;
		        }
		        if (myagent.yvel < -1 * myagent.maxspeed) {
		            myagent.yvel = -1 * myagent.maxspeed;
		        }
		
		        myagent.x += myagent.xvel;
		        myagent.y += myagent.yvel;
		
		        //bounce and border conditions
		        var border = 0.05;
		        if (myagent.x < 0.05) {
		            myagent.xvel *= -0.5; //bounce!
		            myagent.x = 0.05;
		        }
		        if (myagent.x > 0.95) {
		            myagent.xvel *= -0.5;
		            myagent.x = 0.95;
		        }
		        if (myagent.y < 0.05) {
		            myagent.yvel *= -0.5;
		            myagent.y = 0.05;
		        }
		        if (myagent.y > 0.95) {
		            myagent.yvel *= -0.5;
		            myagent.y = 0.95;
		        }
		
		        var xdist = (green.x - red.x);
		        var ydist = (green.y - red.y);
		        if (Math.abs(xdist)/* - 0.5 + Math.random() * 1*/ > Math.abs(ydist)/* - 0.5 + Math.random() * 1*/) {
		            if (xdist < 0) {
		                red.xvel -= red.velchange;
		                redaction = "left";
		            } else if (xdist > 0) {
		                red.xvel += red.velchange;
		                redaction = "right";
		            }
		        } else if (Math.abs(ydist)/* - 0.5 + Math.random() * 1*/ > Math.abs(xdist)/* - 0.5 + Math.random() * 1*/) {
		            if (ydist < 0) {
		                red.yvel -= red.velchange;
		                redaction = "up";
		            } else if (ydist > 0) {
		                red.yvel += red.velchange;
		                redaction = "down";
		            }
		        } else {
		            redaction = "none";
		        }
		
		        if (red.xvel > red.maxspeed) {
		            red.xvel = red.maxspeed;
		        }
		        if (red.yvel > red.maxspeed) {
		            red.yvel = red.maxspeed;
		        }
		        if (red.xvel < -1 * red.maxspeed) {
		            red.xvel = -1 * red.maxspeed;
		        }
		        if (red.yvel < -1 * red.maxspeed) {
		            red.yvel = -1 * red.maxspeed;
		        }
		        red.x += red.xvel;
		        red.y += red.yvel;
		
		        if (red.x < 0.05) {
		            red.xvel *= -0.5; //bounce!
		            red.x = 0.05;
		        }
		        if (red.x > 0.95) {
		            red.xvel *= -0.5;
		            red.x = 0.95;
		        }
		        if (red.y < 0.05) {
		            red.yvel *= -0.5;
		            red.y = 0.05;
		        }
		        if (red.y > 0.95) {
		            red.yvel *= -0.5;
		            red.y = 0.95;
		        }
		
		        if (Math.sqrt((myagent.x - green.x) * (myagent.x - green.x) + (myagent.y - green.y) * (myagent.y - green.y)) <= green.rad) {
		            myagent.points++;
		            green.x = Math.random() * 0.9 + 0.05;
		            green.y = Math.random() * 0.9 + 0.05;
		            reward = 10;
		        } else
		        if (Math.sqrt((red.x - green.x) * (red.x - green.x) + (red.y - green.y) * (red.y - green.y)) <= green.rad) {
		            red.points++;
		            green.x = Math.random() * 0.9 + 0.05;
		            green.y = Math.random() * 0.9 + 0.05;
		        }
		
		        if (reward < 1) {
		            reward = calculateReward(action);
		        }
		
		        totalScore += reward;
		        averageReward = totalScore / frame;
		        agent.learn(reward); // the agent improves its Q,policy,model, etc. reward is a float
		        frame++;
		    }
		
		    clear();
		    red.draw(redaction);
		    green.draw();
		    myagent.draw(action, reward);
		
		    averageRewardSpan.innerHTML = averageReward.toFixed(3);
		    rewardSpan.innerHTML = reward.toFixed(3);
		    agentScoreSpan.innerHTML = myagent.points;
		    hardcodeScoreSpan.innerHTML = red.points;
		}, 20);
		
		//FUNCTIONS
		var graphData = [{
		    label: "Hardcoded",
		    fill: false,
		    borderColor: "rgba(218, 165, 32, 0.8)",
		    data: [0]
		}, {
		    label: "Machine Learning",
		    fill: false,
		    borderColor: "rgba(0,255,0,1)",
		    data: [0]
		}];
		var chart;
		var seconds = 0;
		function updateGraph() {
		    seconds+=5;
		    chart.data.labels.push(seconds);
		    chart.data.datasets[0].data.push(red.points);
		    chart.data.datasets[1].data.push(myagent.points);
		    chart.update();
		}
		function startGraph() {
		    chart = new Chart(cc, {
		        type: 'line',
		        data: {
		            labels: [0],
		            datasets: graphData
		        },
		        options: {
		            scales: {
		                yAxes: [{
		                    ticks: {
		                        beginAtZero: true
		                    },
		                    scaleLabel: {
		                        display: true,
		                        labelString: 'Points'
		                    }
		                }],
		                xAxes: [{
		                    ticks: {
		                        beginAtZero: true
		                    },
		                    scaleLabel: {
		                        display: true,
		                        labelString: 'Seconds Elapsed'
		                    }
		                }],
		            },
		            responsive: false,
		            title: {
		                display: true,
		                text: 'Points Over Time'
		            }
		        }
		    });
		
		    setInterval(updateGraph, 5000);
		}
		startGraph();
		
		function calculateReward(action) {
		    var greendist = Math.sqrt((myagent.x - green.x) * (myagent.x - green.x) + (myagent.y - green.y) * (myagent.y - green.y));
		    var reward = -greendist;
		    if(action === 4) { reward += 0.01; }
		    return reward;
		}
		
		function arrow(fromx, fromy, tox, toy) {
		    var headlen = 0.02 * WIDTH; // length of head in pixels
		    var dx = tox - fromx;
		    var dy = toy - fromy;
		    var angle = Math.atan2(dy, dx);
		    ctx.beginPath();
		    ctx.moveTo(fromx, fromy);
		    ctx.lineTo(tox, toy);
		    ctx.moveTo(tox, toy);
		    ctx.lineTo(tox - headlen * Math.cos(angle - Math.PI / 6), toy - headlen * Math.sin(angle - Math.PI / 6));
		    ctx.moveTo(tox, toy);
		    ctx.lineTo(tox - headlen * Math.cos(angle + Math.PI / 6), toy - headlen * Math.sin(angle + Math.PI / 6));
		    ctx.stroke();
		}
		
		function normalize(v) {
		    var length = Math.sqrt(v.x * v.x + v.y * v.y);
		    return {
		        x: v.x / length,
		        y: v.y / length
		    };
		}
		
		document.getElementById("stepsPerTick").addEventListener("input", function(e) {
		    stepsPerTick = Number(this.value);
		});
		
		var getJSON = function(url, callback) {
		    var xhr = new XMLHttpRequest();
		    xhr.open('GET', url, true);
		    xhr.responseType = 'json';
		    xhr.onload = function() {
		      var status = xhr.status;
		      if (status === 200) {
		        callback(null, xhr.response);
		      } else {
		        callback(status, xhr.response);
		      }
		    };
		    xhr.send();
		};
	</script>
</html>