-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplotting.py
103 lines (90 loc) · 3.88 KB
/
plotting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import matplotlib
import numpy as np
import pandas as pd
from collections import namedtuple
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from time import time as t
EpisodeStats = namedtuple("Stats",["episode_lengths", "episode_rewards"])
def plot_cost_to_go_mountain_car(env, estimator, num_tiles=20):
x = np.linspace(env.observation_space.low[0], env.observation_space.high[0], num=num_tiles)
y = np.linspace(env.observation_space.low[1], env.observation_space.high[1], num=num_tiles)
X, Y = np.meshgrid(x, y)
#Z = np.apply_along_axis(lambda _: -np.max(estimator.predict(_)), 2, np.dstack([X, Y]))
Z = np.apply_along_axis(lambda _: np.max(estimator.predict(_)), 2, np.dstack([X, Y]))
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
cmap=matplotlib.cm.coolwarm, vmin=-1.0, vmax=1.0, alpha=0.5)
ax.set_xlabel('Position')
ax.set_ylabel('Velocity')
ax.set_zlabel('Value')
#ax.set_title("Mountain \"Cost To Go\" Function")
ax.set_title("Mountain Value Function")
fig.colorbar(surf)
fig.savefig("data/mc_value_fn_{}.png".format(t()), ppi=300, bbox_inches='tight')
plt.show()
def plot_value_function(V, title="Value Function"):
"""
Plots the value function as a surface plot.
"""
min_x = min(k[0] for k in V.keys())
max_x = max(k[0] for k in V.keys())
min_y = min(k[1] for k in V.keys())
max_y = max(k[1] for k in V.keys())
x_range = np.arange(min_x, max_x + 1)
y_range = np.arange(min_y, max_y + 1)
X, Y = np.meshgrid(x_range, y_range)
# Find value for all (x, y) coordinates
Z_noace = np.apply_along_axis(lambda _: V[(_[0], _[1], False)], 2, np.dstack([X, Y]))
Z_ace = np.apply_along_axis(lambda _: V[(_[0], _[1], True)], 2, np.dstack([X, Y]))
def plot_surface(X, Y, Z, title):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
cmap=matplotlib.cm.coolwarm, vmin=-1.0, vmax=1.0)
ax.set_xlabel('Player Sum')
ax.set_ylabel('Dealer Showing')
ax.set_zlabel('Value')
ax.set_title(title)
ax.view_init(ax.elev, -120)
fig.colorbar(surf)
plt.show()
plot_surface(X, Y, Z_noace, "{} (No Usable Ace)".format(title))
plot_surface(X, Y, Z_ace, "{} (Usable Ace)".format(title))
def plot_episode_stats(stats, smoothing_window=10, noshow=False):
# Plot the episode length over time
fig1 = plt.figure(figsize=(10,5))
plt.plot(stats.episode_lengths)
plt.xlabel("Episode")
plt.ylabel("Episode Length")
plt.title("Episode Length over Time")
fig1.savefig("data/episode_len_{}.png".format(t()), ppi=300, bbox_inches='tight')
if noshow:
plt.close(fig1)
else:
plt.show(fig1)
# Plot the episode reward over time
fig2 = plt.figure(figsize=(10,5))
rewards_smoothed = pd.Series(stats.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean()
plt.plot(rewards_smoothed)
plt.xlabel("Episode")
plt.ylabel("Episode Reward (Smoothed)")
plt.title("Episode Reward over Time (Smoothed over window size {})".format(smoothing_window))
fig2.savefig("data/episode_reward_{}.png".format(t()), ppi=300, bbox_inches='tight')
if noshow:
plt.close(fig2)
else:
plt.show(fig2)
# Plot time steps and episode number
fig3 = plt.figure(figsize=(10,5))
plt.plot(np.cumsum(stats.episode_lengths), np.arange(len(stats.episode_lengths)))
plt.xlabel("Time Steps")
plt.ylabel("Episode")
plt.title("Episode per time step")
fig3.savefig("data/episode_t_epi_{}.png".format(t()), ppi=300, bbox_inches='tight')
if noshow:
plt.close(fig3)
else:
plt.show(fig3)
return fig1, fig2, fig3