-
Notifications
You must be signed in to change notification settings - Fork 4
/
main.m
174 lines (151 loc) · 4.75 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
% main.m
%
% Matlab script that calls all the functions for computing the optimal cost
% and policy of the given problem.
%
% Dynamic Programming and Optimal Control
% Fall 2020
% Programming Exercise
%
% --
% ETH Zurich
% Institute for Dynamic Systems and Control
%
% --
%% Clear workspace and command window
clear all;
close all;
clc;
%% Options
% [M, N]
mapSize = [10, 15];
% Set to true to generate a random map of size mapSize, else set to false
% to load the pre-exsisting example map
generateRandomWorld = true;
% Plotting options
global PLOT_POLICY PLOT_COST
PLOT_POLICY = true;
PLOT_COST = false;
%% Global problem parameters
% IMPORTANT: Do not add or remove any global parameter in main.m
global GAMMA R Nc P_WIND
GAMMA = 0.2; % Shooter gamma factor
R = 2; % Shooter range
Nc = 10; % Time steps required to bring drone to base when it crashes
P_WIND = 0.1; % Gust of wind probability
% IDs of elements in the map matrix
global FREE TREE SHOOTER PICK_UP DROP_OFF BASE
FREE = 0;
TREE = 1;
SHOOTER = 2;
PICK_UP = 3;
DROP_OFF = 4;
BASE = 5;
% Index of each action in the P and G matrices. Use this ordering
global NORTH SOUTH EAST WEST HOVER
NORTH = 1;
SOUTH = 2;
EAST = 3;
WEST = 4;
HOVER = 5;
%% Generate map
% map(m,n) represents the cell at indices (m,n) according to the axes
% specified in the PDF.
disp('Generate map');
if generateRandomWorld
[map] = GenerateWorld(mapSize(1), mapSize(2));
else
% We can load a pre-generated map.
load('exampleWorld.mat');
end
MakePlots(map);
%% Generate state space
disp('Generate state space');
% Generate a (K x 3)-matrix 'stateSpace', where each accessible cell is
% represented by two rows (with and without carrying a package).
stateSpace = [];
for m = 1 : size(map, 1)
for n = 1 : size(map, 2)
if map(m, n) ~= TREE
stateSpace = [stateSpace;
m, n, 0;
m, n, 1];
end
end
end
% State space size
global K
K=size(stateSpace,1);
%% Set the following to true as you progress with the files
transitionProbabilitiesImplemented = true;
stageCostsImplemented = true;
valueIterationImplemented = true;
policyIterationImplemented = true;
linearProgrammingImplemented = true;
%% Compute the terminal state index
global TERMINAL_STATE_INDEX
if transitionProbabilitiesImplemented
% TODO: Question a)
TERMINAL_STATE_INDEX = ComputeTerminalStateIndex(stateSpace, map);
end
%% Compute transition probabilities
if transitionProbabilitiesImplemented
disp('Compute transition probabilities');
% Compute the transition probabilities between all states in the
% state space for all control inputs.
% The transition probability matrix has the dimension (K x K x L), i.e.
% the entry P(i, j, l) representes the transition probability from state i
% to state j if control input l is applied.
% TODO: Question b)
P = ComputeTransitionProbabilities(stateSpace, map);
end
%% Compute stage costs
if stageCostsImplemented
disp('Compute stage costs');
% Compute the stage costs for all states in the state space for all
% control inputs.
% The stage cost matrix has the dimension (K x L), i.e. the entry G(i, l)
% represents the cost if we are in state i and apply control input l.
% TODO: Question c)
G = ComputeStageCosts(stateSpace, map);
end
%% Solve stochastic shortest path problem
% Solve the stochastic shortest path problem by Value Iteration,
% Policy Iteration, and Linear Programming
if valueIterationImplemented
disp('Solve stochastic shortest path problem with Value Iteration');
% TODO: Question d)
[ J_opt_vi, u_opt_ind_vi ] = ValueIteration(P, G);
if size(J_opt_vi,1)~=K || size(u_opt_ind_vi,1)~=K
disp('[ERROR] the size of J and u must be K')
end
end
if policyIterationImplemented
disp('Solve stochastic shortest path problem with Policy Iteration');
% TODO: Question d)
[ J_opt_pi, u_opt_ind_pi ] = PolicyIteration(P, G);
if size(J_opt_pi,1)~=K || size(u_opt_ind_pi,1)~=K
disp('[ERROR] the size of J and u must be K')
end
end
if linearProgrammingImplemented
disp('Solve stochastic shortest path problem with Linear Programming');
% TODO: Question d)
[ J_opt_lp, u_opt_ind_lp ] = LinearProgramming(P, G);
if size(J_opt_lp,1)~=K || size(u_opt_ind_lp,1)~=K
disp('[ERROR] the size of J and u must be K')
end
end
%% Plot results
disp('Plot results');
if valueIterationImplemented
MakePlots(map, stateSpace, J_opt_vi, u_opt_ind_vi, 'Value iteration');
end
if policyIterationImplemented
MakePlots(map, stateSpace, J_opt_pi, u_opt_ind_pi, 'Policy iteration');
end
if linearProgrammingImplemented
MakePlots(map, stateSpace, J_opt_lp, u_opt_ind_lp, 'Linear programming');
end
%% Terminated
disp('Terminated');