-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathCartpole.java
105 lines (79 loc) · 3.2 KB
/
Cartpole.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package org.deeplearning4j.rl4j;
import org.deeplearning4j.rl4j.gym.space.Box;
import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteDense;
import org.deeplearning4j.rl4j.mdp.gym.GymEnv;
import org.deeplearning4j.rl4j.network.dqn.DQNFactoryStdDense;
import org.deeplearning4j.rl4j.policy.DQNPolicy;
import org.deeplearning4j.rl4j.space.DiscreteSpace;
import org.deeplearning4j.rl4j.util.DataManager;
import java.util.logging.Logger;
/**
* @author rubenfiszel (ruben.fiszel@epfl.ch) on 8/11/16.
*
* Main example for Cartpole DQN
*
* **/
public class Cartpole
{
public static QLearning.QLConfiguration CARTPOLE_QL =
new QLearning.QLConfiguration(
123, //Random seed
200, //Max step By epoch
150000, //Max step
150000, //Max size of experience replay
32, //size of batches
500, //target update (hard)
10, //num step noop warmup
0.01, //reward scaling
0.99, //gamma
1.0, //td-error clipping
0.1f, //min epsilon
1000, //num step for eps greedy anneal
true //double DQN
);
public static DQNFactoryStdDense.Configuration CARTPOLE_NET =
new DQNFactoryStdDense.Configuration(
3, //number of layers
16, //number of hidden nodes
0.001, //learning rate
0.00 //l2 regularization
);
public static void main( String[] args )
{
cartPole();
loadCartpole();
}
public static void cartPole() {
//record the training data in rl4j-data in a new folder (save)
DataManager manager = new DataManager(true);
//define the mdp from gym (name, render)
GymEnv<Box, Integer, DiscreteSpace> mdp = new GymEnv("CartPole-v0", false, false);
//define the training
QLearningDiscreteDense<Box> dql = new QLearningDiscreteDense(mdp, CARTPOLE_NET, CARTPOLE_QL, manager);
//train
dql.train();
//get the final policy
DQNPolicy<Box> pol = dql.getPolicy();
//serialize and save (serialization showcase, but not required)
pol.save("/tmp/pol1");
//close the mdp (close http)
mdp.close();
}
public static void loadCartpole(){
//showcase serialization by using the trained agent on a new similar mdp (but render it this time)
//define the mdp from gym (name, render)
GymEnv mdp2 = new GymEnv("CartPole-v0", true, false);
//load the previous agent
DQNPolicy<Box> pol2 = DQNPolicy.load("/tmp/pol1");
//evaluate the agent
double rewards = 0;
for (int i = 0; i < 1000; i++) {
mdp2.reset();
double reward = pol2.play(mdp2);
rewards += reward;
Logger.getAnonymousLogger().info("Reward: " + reward);
}
Logger.getAnonymousLogger().info("average: " + rewards/1000);
}
}