-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalueIteration.py
186 lines (166 loc) · 4.41 KB
/
valueIteration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# AI - Homework 3
# Value Iteration
# Kathy Xie, Monica Kuo
y = 1
e = 0.001
MAX_ITERS = 1000
# input - an MDP with states s (2D array)
# - actions A (N, NE, E, SE, S, SW, W, NW, stay)
# - transition model Pr (s′ | s, a)
# output - the value (utility) of each state s in S
def valueIteration(U, case):
# initialize Up, everything equal to 0
rows = len(U)
cols = len(U[0])
Up = [[0 for x in range(rows)] for y in range(cols)]
iterator = 0
sigma = 1
while(sigma >= e or iterator < MAX_ITERS):
iterator += 1
U = Up
sigma = 0
# for each state s in S
for i in range(0, rows):
for j in range(0, cols):
s = (i, j)
# find all the states that can result from all possible actions at current state
#A = possibleActions(s, case)
A = [(-1,-1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1,1)]
# Up[i][j] = R(s) + y*max(sum(Pr(sp, s, A)*U[ip][jp]))
maxValue = None
for a in A:
if case == 1:
sp = ( (a[0] + s[0]), (a[1] + s[1]) )
elif case == 2:
if(s[1] in range(3, 6)):
sp = ( (a[0] + s[0]-1), (a[1] + s[1]) )
else:
sp = ( (a[0] + s[0]), (a[1] + s[1]) )
elif case == 3:
if(s[1] in range(3, 6)):
sp = ( (a[0] + s[0]-2), (a[1] + s[1]) )
else:
sp = ( (a[0] + s[0]), (a[1] + s[1]) )
#bounds checking
if (sp[0] > 6):
sp = (6, sp[1])
if (sp[0] < 0):
sp = (0, sp[1])
if (sp[1] > 6):
sp = (sp[0], 6)
if (sp[1] < 0):
sp = (sp[0], 0)
probability = U[sp[0]][sp[1]]#Pr(sp, s, a)*
if(maxValue == None):
maxValue = probability
else:
maxValue = max(maxValue, probability)
if(maxValue != None):
Up[i][j] = R(s) + y*maxValue
if (abs(Up[i][j] - U[i][j]) > sigma):
sigma = abs(Up[i][j] - U[i][j])
return Up
# transition model Pr (s′ | s, a)
def Pr(sp, s, a):
sa = ( (a[0] + s[0]), (a[1] + s[1]) )
if(sp == sa):
return 1
else:
return 0
# possible actions A at a certain state s
def possibleActions(s, case):
size = 7
directions = [(-1,-1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1,1)]
A = []
# consider actions in all 9 directions
for i in range(0, len(directions)):
direction = directions[i]
# case I - no wind, no change
if(case == 1):
sp = ( (direction[0] + s[0]), (direction[1] + s[1]) )
# valid only when location is in bounds
inBounds = isValid(sp)
if(inBounds):
A.append(direction)
# case II - light wind
# wind blows along columns 3-5 from south to north, row reduced by 1
if(case == 2):
wind = False
if(s[1] in range(3, 6)):
wind = True
direction = ( direction[0] - 1, direction[1] )
sp = ( (direction[0] + s[0]), (direction[1] + s[1]) )
# valid only when location is in bounds
if(isValid(sp)):
A.append(direction)
else:
if (sp[0] > 6):
sp = (6, sp[1])
if (sp[0] < 0):
sp = (0, sp[1])
if (sp[1] > 6):
sp = (sp[0], 6)
if (sp[1] < 0):
sp = (sp[0], 0)
#if (isValid(sp)):
#A.append(direction)
'''
# if wind blew us out of map, push us back onto map
elif(wind == True):
sp = ( (sp[0] + 1), sp[1] )
if(isValid(sp)):
A.append(direction)
'''
# case III - strong wind
# wind blows along columns 3-5 from south to north, row reduced by 2
if(case == 3):
wind = False
if(s[1] in range(3, 6)):
wind = True
direction = ( direction[0] - 2, direction[1] )
sp = ( (direction[0] + s[0]), (direction[1] + s[1]) )
# valid only when location is in bounds
if(isValid(sp)):
A.append(direction)
else:
if (sp[0] > 6):
sp = (6, sp[1])
if (sp[0] < 0):
sp = (0, sp[1])
if (sp[1] > 6):
sp = (sp[0], 6)
if (sp[1] < 0):
sp = (sp[0], 0)
'''
# if wind blew us out of map, push us back onto map
elif(wind == True):
sp = ( (sp[0] + 1), sp[1] )
if(isValid(sp)):
A.append(direction)
else:
sp = ( (sp[0] + 1), sp[1] )
if(isValid(sp)):
A.append(direction)
'''
return A
# Reward Function
def R(s):
if(s[0] == 3 and s[1] == 6):
return 0
else:
return -1
# is state on map
def isValid(s):
if(0 <= s[0] and s[0] < 7 and 0 <= s[1] and s[1] < 7):
return True
else:
return False
if __name__ == "__main__":
# call value Iteration function for three cases
for i in range(1, 4):
U = [[0 for x in range(7)] for y in range(7)]
Up = valueIteration(U, i)
print("case " + str(i))
for row in Up:
print(row)
print()