-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab3-plot-results.py
122 lines (109 loc) · 5.87 KB
/
lab3-plot-results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pickle
import seaborn as sns; sns.set()
import time
import matplotlib.pyplot as plt
import pandas as pd
def run_fig_1():
results = pickle.load(open('results_for_figs.pkl', 'rb'))
test_df = pd.DataFrame(results)
new_df = test_df[['episode', 'replay_steps1_repeats10','no-replay_steps1_repeats10' ]].copy()
new_df.columns= ['episode', 'DQN + Experience Replay, no Target Network', 'DQN without Experience Replay, no Target Network']
plt.figure()
lineplot = sns.lineplot(x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='Return', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:], loc='upper left')
fig = lineplot.get_figure()
fig.savefig("yoni-fig-1-{}.png".format(int(time.time())))
def run_fig_2():
results = pickle.load(open('results_for_figs.pkl', 'rb'))
test_df = pd.DataFrame(results)
new_df = test_df[['episode', 'replay_steps1_repeats10','replay_steps50_repeats10', 'replay_steps200_repeats10' ]].copy()
new_df.columns= ['episode', 'DQN + Experience Replay, no Target Network', 'DQN + Experience Replay, Target Network 50', 'DQN + Experience Replay, Target Network 200']
plt.figure()
lineplot = sns.lineplot(x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='Return', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:], loc='upper left')
fig = lineplot.get_figure()
fig.savefig("yoni-fig-2-{}.png".format(int(time.time())))
def run_fig_3():
results = pickle.load(open('results_for_figs.pkl', 'rb'))
test_df = pd.DataFrame(results)
new_df = test_df[['episode', 'no-replay_steps1_repeats10','no-replay_steps50_repeats10', 'no-replay_steps200_repeats10' ]].copy()
new_df.columns= ['episode', 'DQN without Experience Replay, no Target Network', 'DQN without Experience Replay, Target Network 50', 'DQN without Experience Replay, Target Network 200']
plt.figure()
lineplot = sns.lineplot(x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='Return', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:], loc='upper left')
fig = lineplot.get_figure()
fig.savefig("yoni-fig-3-{}.png".format(int(time.time())))
def run_ommitted_fig_of_non_convergence():
results = pickle.load(open('results_for_dqn_wo_replay_not_converging_over_500_episodes.pkl', 'rb'))
test_df = pd.DataFrame(results)
new_df = test_df[['episode', 'no-replay_steps50_repeats10']].copy()
new_df.columns= ['episode', 'DQN without Experience Replay, Target Network 50']
plt.figure()
lineplot = sns.lineplot(x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='Return', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:], loc='upper left')
fig = lineplot.get_figure()
fig.savefig("yoni-fig-omit-{}.png".format(int(time.time())))
def run_q_values():
results = pickle.load(open('qresults_1571423446.pkl', 'rb'))
test_df = pd.DataFrame(results)
print(test_df.keys())
# new_df = test_df[['episode', 'max_q_no-replay_steps1_repeats3', 'max_q_no-replay_steps50_repeats3', 'max_q_no-replay_steps200_repeats3']].copy()
new_df = test_df[['episode', 'max_q_replay_steps1_repeats10', 'max_q_replay_steps50_repeats10', 'max_q_replay_steps200_repeats10', 'max_q_replay_steps500_repeats10']].copy()
new_df.columns= [
'episode',
'DQN with Experience Replay, Target Network 1',
'DQN with Experience Replay, Target Network 50',
'DQN with Experience Replay, Target Network 200',
'DQN with Experience Replay, Target Network 500',
]
a4_dims = (11.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)
# plt.figure()
lineplot = sns.lineplot(ax=ax, x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='maximal Q-value', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:])
fig = lineplot.get_figure()
fig.savefig("q-values-with_exp.png".format(int(time.time())))
def run_q_values_non():
results = pickle.load(open('qresults_1571423446.pkl', 'rb'))
test_df = pd.DataFrame(results)
print(test_df.keys())
new_df = test_df[['episode', 'max_q_no-replay_steps1_repeats10', 'max_q_no-replay_steps50_repeats10', 'max_q_no-replay_steps200_repeats10', 'max_q_no-replay_steps500_repeats10']].copy()
# new_df = test_df[['episode', 'max_q_replay_steps1_repeats3', 'max_q_replay_steps50_repeats3', 'max_q_replay_steps200_repeats3']].copy()
new_df.columns= [
'episode',
'DQN without Experience Replay, Target Network 1',
'DQN without Experience Replay, Target Network 50',
'DQN without Experience Replay, Target Network 200',
'DQN without Experience Replay, Target Network 500',
]
a4_dims = (11.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)
# plt.figure()
lineplot = sns.lineplot(ax=ax, x='episode', y='value', hue='variable',
data=pd.melt(new_df, ['episode']), ci=95)
lineplot.set(ylabel='maximal Q-value', xlabel='Episode')
handles, labels = lineplot.get_legend_handles_labels()
lineplot.legend(handles=handles[1:], labels=labels[1:])
fig = lineplot.get_figure()
fig.savefig("q-values-without_exp.png".format(int(time.time())))
# run_fig_1()
# run_fig_2()
# run_fig_3()
# run_ommitted_fig_of_non_convergence()
run_q_values()
run_q_values_non()