Skip to content

Commit

Permalink
Fix get_optimal_route infinte looping
Browse files Browse the repository at this point in the history
  • Loading branch information
AriMirsky committed Nov 9, 2023
1 parent d8959fe commit 151678c
Showing 1 changed file with 19 additions and 13 deletions.
32 changes: 19 additions & 13 deletions src/qlearning/qagent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np
from abc import abstractmethod

class QAgent():

class QAgent:
@abstractmethod
def getPlayableActions(self, currentState, differentials, timestep):
print("AAAAA")
Expand All @@ -27,18 +28,20 @@ def qlearning(self, rewards_new, iterations, end_state):
if current_state == end_state:
continue
playable_actions = self.getPlayableActions(
current_state, self.differentials, self.dt)
temporal_difference = rewards_new[current_state] + self.gamma * \
np.amax(self.q[playable_actions]) - \
self.q[current_state]
self.q[current_state] += self.alpha * \
temporal_difference

current_state, self.differentials, self.dt
)
temporal_difference = (
rewards_new[current_state]
+ self.gamma * np.amax(self.q[playable_actions])
- self.q[current_state]
)
self.q[current_state] += self.alpha * temporal_difference

def reset_matrix(self, rewards_new, iterations, end_state, dimensions):
shape = tuple([len(self.q)] * dimensions)
self.q = np.zeros(shape)
QAgent.qlearning(self, rewards_new, iterations, end_state)

def alter_matrix(self, rewards_new, iterations, end_state, scale):
rewards_new = rewards_new * scale
QAgent.qlearning(self, rewards_new, iterations, end_state)
Expand All @@ -48,12 +51,15 @@ def get_optimal_route(self, start_state, end_state):
next_state = start_state
while next_state != end_state:
playable_actions = self.getPlayableActions(
next_state, self.differentials, self.dt)
next_state, self.differentials, self.dt
)
t1 = self.q[playable_actions]
t2 = np.argmax(self.q[playable_actions])
t3 = playable_actions[0]
next_state = playable_actions[0][np.argmax(
self.q[playable_actions])]
next_state = playable_actions[0][np.argmax(self.q[playable_actions])]
if next_state in route:
route.append(next_state)
break
route.append(next_state)

return route
Expand All @@ -70,4 +76,4 @@ def __init__(self, alpha, gamma, rewards, dt):
self.alpha = alpha
self.rewards = rewards
self.q, self.differentials = self.getStateMatrix()
self.dt = dt
self.dt = dt

0 comments on commit 151678c

Please sign in to comment.