-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
146 lines (125 loc) · 3.66 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import random
from pylab import *
from tkinter import *
TILE_GOAL = 16
STATE_COUNT = 16
ACTION_COUNT = 4
class Agent():
def __init__(self):
"""
Initialize the learning agent.
"""
self.reset()
def reset(self):
"""
Resets all agent data.
"""
self.testmode = False
self.run = 0
self.episode = 0
self.step = 0
self.returnSum = 0
self.G = 0
self.init_Q()
def init_Q(self):
"""
Initialize Q values.
"""
self.Q = zeros((STATE_COUNT, ACTION_COUNT), dtype=float)
def get_Qs(self, S):
"""
Returns Q values for the given OBSERVATION state.
"""
return self.Q[self.get_S(S)]
def get_S(self, obs):
"""
Given the observation state, returns the agent state.
"""
return obs
def set_testmode(self, enabled):
"""
Turn test mode on or off. When in test mode, the agent should:
- Disable learning
- Behave deterministically
"""
self.testmode = enabled
def init_run(self):
"""
Resets all run data and starts a new run.
Override this to reset data!
"""
self.returnSum = 0
self.run += 1
self.episode = -1
self.init_Q()
self.init_episode()
def init_episode(self):
"""
Initializes an episode.
"""
self.returnSum += self.G
self.G = 0
self.step = 0
self.episode += 1
def do_step(self, S, act, logfile=None):
"""
Make the agent take a single step. The agent is given its current state
and a function to call which takes an action and returns a pair of
(reward, state).
Possible actions are:
0 = go right
1 = go up
2 = go left
3 = go down
This function should return the new state.
Override this!
"""
self.step += 1
return S
def init_options(self, master):
"""
Override this to add options to the agent options panel.
"""
pass
def init_info(self, master):
"""
Override this to add options to the agent info panel.
"""
# Step
label = Label(master)
label["text"] = "Step:"
label.grid(row=0, column=0)
self.step_var = StringVar()
label = Label(master)
label["textvariable"] = self.step_var
label["width"] = 8
label.grid(row=0, column=1)
# Episode
label = Label(master)
label["text"] = "Episode:"
label.grid(row=1, column=0)
self.episode_var = StringVar()
label = Label(master)
label["textvariable"] = self.episode_var
label["width"] = 8
label.grid(row=1, column=1)
# Average return
label = Label(master)
label["text"] = "Avg return:"
label.grid(row=2, column=0)
self.avg_return_var = StringVar()
label = Label(master)
label["textvariable"] = self.avg_return_var
label["width"] = 8
label.grid(row=2, column=1)
def update_info(self):
"""
Override this to update the agent info panel.
"""
self.step_var.set(self.step)
self.episode_var.set(self.episode)
if self.episode > 0:
avgret = self.returnSum / self.episode
self.avg_return_var.set("{:.3f}".format(avgret))
else:
self.avg_return_var.set("NaN")