-
Notifications
You must be signed in to change notification settings - Fork 0
/
Qlearning.py
127 lines (102 loc) · 3.53 KB
/
Qlearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from agent import *
import random
class Qlearning(Agent):
def reset(self):
Agent.reset(self)
self.epsilon = 0.1
self.alpha = 0.1
self.gamma = 1
def do_step(self, S, act, logfile=None):
Agent.do_step(self, S, act, logfile)
# Observation -> agent state
S = self.get_S(S)
# Epsilon-greedy action selection
if ranf() <= self.epsilon:
# Choose a random action (0 or 1)
A = randint(ACTION_COUNT)
else:
# Choose the best action for this state
Qs = self.Q[S]
maxQ = max(Qs)
As = where(Qs == maxQ)[0]
A = random.choice(As)
# Observe reward and new state
R, Sp = act(A)
# Update return
self.G += R
# max_a(Q(S', a))
nextmax = 0 if Sp == TILE_GOAL else max(self.Q[Sp])
# Update Q for this state/action pair
delta = R + self.gamma * nextmax - self.Q[S][A]
self.Q[S][A] += self.alpha * delta
if logfile:
logfile.write("{}\n".format(abs(delta)))
return Sp
def update_alpha(self, event=None):
if self.testmode: return
self.alpha = self.alpha_var.get()
def update_epsilon(self, event=None):
if self.testmode: return
self.epsilon = self.epsilon_var.get()
def update_gamma(self, event=None):
if self.testmode: return
self.gamma = self.gamma_var.get()
def set_testmode(self, enabled):
if not self.testmode and enabled:
self.tempAlpha = self.alpha
self.tempEpsilon = self.epsilon
self.alpha = 0
self.epsilon = 0
elif self.testmode and not enabled:
self.alpha = self.tempAlpha
self.epsilon = self.tempEpsilon
Agent.set_testmode(self, enabled)
def init_options(self, master):
# Alpha
frame = LabelFrame(master)
frame["text"] = "Alpha"
frame["padx"] = 5
frame["pady"] = 5
frame.grid(row=0, column=0)
self.alpha_var = DoubleVar()
self.alpha_var.set(self.alpha)
scale = Scale(frame)
scale["from"] = 1
scale["to"] = 0
scale["resolution"] = 0.05
scale["orient"] = VERTICAL
scale["variable"] = self.alpha_var
scale["command"] = self.update_alpha
scale.pack()
# Epsilon
frame = LabelFrame(master)
frame["text"] = "Epsilon"
frame["padx"] = 5
frame["pady"] = 5
frame.grid(row=1, column=0)
self.epsilon_var = DoubleVar()
self.epsilon_var.set(self.epsilon)
scale = Scale(frame)
scale["from"] = 1
scale["to"] = 0
scale["resolution"] = 0.05
scale["orient"] = VERTICAL
scale["variable"] = self.epsilon_var
scale["command"] = self.update_epsilon
scale.pack()
# Gamma
frame = LabelFrame(master)
frame["text"] = "Gamma"
frame["padx"] = 5
frame["pady"] = 5
frame.grid(row=2, column=0)
self.gamma_var = DoubleVar()
self.gamma_var.set(self.gamma)
scale = Scale(frame)
scale["from"] = 1
scale["to"] = 0
scale["resolution"] = 0.05
scale["orient"] = VERTICAL
scale["variable"] = self.gamma_var
scale["command"] = self.update_gamma
scale.pack()