diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index 0ae3ea9a90..1e50fcb367 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -92,6 +92,24 @@ ) +# To ensure reproducibility during training, you can fix the random seeds +# by uncommenting the lines below. This makes the results consistent across +# runs, which is helpful for debugging or comparing different approaches. +# +# That said, allowing randomness can be beneficial in practice, as it lets +# the model explore different training trajectories. + + +# seed = 42 +# random.seed(seed) +# torch.manual_seed(seed) +# env.reset(seed=seed) +# env.action_space.seed(seed) +# env.observation_space.seed(seed) +# if torch.cuda.is_available(): +# torch.cuda.manual_seed(seed) + + ###################################################################### # Replay Memory # ------------- @@ -253,13 +271,15 @@ def forward(self, x): # EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay # TAU is the update rate of the target network # LR is the learning rate of the ``AdamW`` optimizer + BATCH_SIZE = 128 GAMMA = 0.99 EPS_START = 0.9 -EPS_END = 0.05 -EPS_DECAY = 1000 +EPS_END = 0.01 +EPS_DECAY = 2500 TAU = 0.005 -LR = 1e-4 +LR = 3e-4 + # Get number of actions from gym action space n_actions = env.action_space.n