Fix get_optimal_route infinte looping

da-luce · Nov 9, 2023 · 151678c · 151678c
1 parent d8959fe
commit 151678c
Showing 1 changed file with 19 additions and 13 deletions.
diff --git a/src/qlearning/qagent.py b/src/qlearning/qagent.py
@@ -1,7 +1,8 @@
 import numpy as np
 from abc import abstractmethod
 
-class QAgent():
+
+class QAgent:
     @abstractmethod
     def getPlayableActions(self, currentState, differentials, timestep):
         print("AAAAA")
@@ -27,18 +28,20 @@ def qlearning(self, rewards_new, iterations, end_state):
             if current_state == end_state:
                 continue
             playable_actions = self.getPlayableActions(
-                current_state, self.differentials, self.dt)
-            temporal_difference = rewards_new[current_state] + self.gamma * \
-                np.amax(self.q[playable_actions]) - \
-                self.q[current_state]
-            self.q[current_state] += self.alpha * \
-                temporal_difference
-
+                current_state, self.differentials, self.dt
+            )
+            temporal_difference = (
+                rewards_new[current_state]
+                + self.gamma * np.amax(self.q[playable_actions])
+                - self.q[current_state]
+            )
+            self.q[current_state] += self.alpha * temporal_difference
+
     def reset_matrix(self, rewards_new, iterations, end_state, dimensions):
         shape = tuple([len(self.q)] * dimensions)
         self.q = np.zeros(shape)
         QAgent.qlearning(self, rewards_new, iterations, end_state)
-    
+
     def alter_matrix(self, rewards_new, iterations, end_state, scale):
         rewards_new = rewards_new * scale
         QAgent.qlearning(self, rewards_new, iterations, end_state)
@@ -48,12 +51,15 @@ def get_optimal_route(self, start_state, end_state):
         next_state = start_state
         while next_state != end_state:
             playable_actions = self.getPlayableActions(
-                next_state, self.differentials, self.dt)
+                next_state, self.differentials, self.dt
+            )
             t1 = self.q[playable_actions]
             t2 = np.argmax(self.q[playable_actions])
             t3 = playable_actions[0]
-            next_state = playable_actions[0][np.argmax(
-                self.q[playable_actions])]
+            next_state = playable_actions[0][np.argmax(self.q[playable_actions])]
+            if next_state in route:
+                route.append(next_state)
+                break
             route.append(next_state)
 
         return route
@@ -70,4 +76,4 @@ def __init__(self, alpha, gamma, rewards, dt):
         self.alpha = alpha
         self.rewards = rewards
         self.q, self.differentials = self.getStateMatrix()
-        self.dt = dt
+        self.dt = dt