da-luce · cm823 · Nov 16, 2023 · Dec 1, 2022 · Sep 7, 2023 · Sep 14, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,13 @@
 __pycache__
+Q-Learning.iml
+misc.xml
+modules.xml
+vcs.xml
 src/qlearning.egg-info
 .coverage
 .tox
 .DS_Store
 .pytest_cache
 .mypy_cache
 .vscode
-build
+build
diff --git a/src/qlearning/main.py b/src/qlearning/main.py
@@ -1,4 +1,4 @@
-import smallExampleQAgent
+from src.qlearning import smallExampleQAgent
 import numpy as np
 
 if __name__ == "__main__":

diff --git a/src/qlearning/qagent.py b/src/qlearning/qagent.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 
 
-class QAgent():
+class QAgent:
     @abstractmethod
     def getPlayableActions(self, currentState, differentials, timestep):
         print("AAAAA")
@@ -28,24 +28,38 @@ def qlearning(self, rewards_new, iterations, end_state):
             if current_state == end_state:
                 continue
             playable_actions = self.getPlayableActions(
-                current_state, self.differentials, self.dt)
-            temporal_difference = rewards_new[current_state] + self.gamma * \
-                np.amax(self.q[playable_actions]) - \
-                self.q[current_state]
-            self.q[current_state] += self.alpha * \
-                temporal_difference
+                current_state, self.differentials, self.dt
+            )
+            temporal_difference = (
+                rewards_new[current_state]
+                + self.gamma * np.amax(self.q[playable_actions])
+                - self.q[current_state]
+            )
+            self.q[current_state] += self.alpha * temporal_difference
+
+    def reset_matrix(self, rewards_new, iterations, end_state, dimensions):
+        shape = tuple([len(self.q)] * dimensions)
+        self.q: np.ndarray = np.zeros(shape)
+        QAgent.qlearning(self, rewards_new, iterations, end_state)
+
+    def alter_matrix(self, rewards_new, iterations, end_state, scale):
+        rewards_new = rewards_new * scale
+        QAgent.qlearning(self, rewards_new, iterations, end_state)
 
     def get_optimal_route(self, start_state, end_state):
         route = [start_state]
         next_state = start_state
         while next_state != end_state:
             playable_actions = self.getPlayableActions(
-                next_state, self.differentials, self.dt)
+                next_state, self.differentials, self.dt
+            )
             t1 = self.q[playable_actions]
             t2 = np.argmax(self.q[playable_actions])
             t3 = playable_actions[0]
-            next_state = playable_actions[0][np.argmax(
-                self.q[playable_actions])]
+            next_state = playable_actions[0][np.argmax(self.q[playable_actions])]
+            if next_state in route:
+                route.append(next_state)
+                break
             route.append(next_state)
 
         return route
@@ -62,4 +76,4 @@ def __init__(self, alpha, gamma, rewards, dt):
         self.alpha = alpha
         self.rewards = rewards
         self.q, self.differentials = self.getStateMatrix()
-        self.dt = dt
+        self.dt = dt
diff --git a/src/qlearning/smallExampleQAgent.py b/src/qlearning/smallExampleQAgent.py
@@ -1,21 +1,25 @@
-from qagent import QAgent
+from src.qlearning import qagent
 import numpy as np
 
 
-class SmallExampleQAgent(QAgent):
+class SmallExampleQAgent(qagent.QAgent):
     def __init__(self, alpha, gamma, rewards):
         super().__init__(alpha, gamma, rewards, 1)
 
     def getPlayableActions(self, currentState, differentials, timestep):
-        playableActions = np.array([[0, 1, 0, 0, 0, 0, 0, 0, 0],
-                                    [1, 0, 1, 0, 1, 0, 0, 0, 0],
-                                    [0, 1, 0, 0, 0, 1, 0, 0, 0],
-                                    [0, 0, 0, 0, 0, 0, 1, 0, 0],
-                                    [0, 1, 0, 0, 0, 0, 0, 1, 0],
-                                    [0, 0, 1, 0, 0, 0, 0, 0, 0],
-                                    [0, 0, 0, 1, 0, 0, 0, 1, 0],
-                                    [0, 0, 0, 0, 1, 0, 1, 0, 1],
-                                    [0, 0, 0, 0, 0, 0, 0, 1, 0]])
+        playableActions = np.array(
+            [
+                [0, 1, 0, 0, 0, 0, 0, 0, 0],
+                [1, 0, 1, 0, 1, 0, 0, 0, 0],
+                [0, 1, 0, 0, 0, 1, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 1, 0, 0],
+                [0, 1, 0, 0, 0, 0, 0, 1, 0],
+                [0, 0, 1, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 1, 0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 1, 0, 1, 0, 1],
+                [0, 0, 0, 0, 0, 0, 0, 1, 0],
+            ]
+        )
 
         return np.nonzero(playableActions[currentState])
 

diff --git a/state_prediction.png b/state_prediction.png
diff --git a/tests/testingfile.py b/tests/testingfile.py
@@ -1,17 +1,122 @@
-"""
-dummy test case just to see if testing configurations work
-"""
+import unittest
+import numpy as np
+from src.qlearning import smallExampleQAgent
 
-# Example import
-from src.qlearning.qagent import QAgent
 
-def dummy_test():
-    assert 1 > 0
+class TestQAgent(unittest.TestCase):
+
+    def setUp(self):
+        self.alpha = 0.1
+        self.gamma = 0.9
+        self.rewards = np.zeros((9,))
+        self.agent = smallExampleQAgent.SmallExampleQAgent(
+            self.alpha, self.gamma, self.rewards
+        )
 
-def dummy_test2():
-    assert 2 != 0
+    def test_q_learning(self):
+        iterations = 1000
+        end_state = 8
+
+        self.agent.qlearning(self.rewards, iterations, end_state)
+
+    def test_q_learning_2(self):
+        iterations = 0
+        end_state = 0
+
+        self.agent.qlearning(self.rewards, iterations, end_state)
+
+    def test_q_learning_3(self):
+        rewards = np.random.rand(
+            9,
+        )
+        iterations = 1000
+        end_state = 5
+
+        self.agent.qlearning(rewards, iterations, end_state)
+
+    def test_reset_matrix(self):
+        iterations = 1000
+        end_state = 8
+        dimensions = 1
+
+        self.agent.reset_matrix(self.rewards, iterations, end_state, dimensions)
+        self.assertFalse(np.any(self.agent.q))
+
+    def test_reset_matrix_2(self):
+        rewards = np.random.rand(
+            9,
+        )
+        iterations = 1000
+        end_state = 8
+        dimensions = 1
+
+        self.agent.reset_matrix(rewards, iterations, end_state, dimensions)
+        self.assertTrue(np.any(self.agent.q))
+
+    def test_reset_matrix_3(self):
+        rewards = np.random.rand(
+            9,
+        )
+        iterations = 1000
+        end_state = 1
+        dimensions = 1
+
+        self.agent.reset_matrix(rewards, iterations, end_state, dimensions)
+        self.assertTrue(np.any(self.agent.q))
+
+    def test_alter_matrix(self):
+        iterations = 1000
+        end_state = 8
+        scale = 0.5
+
+        self.agent.alter_matrix(self.rewards, iterations, end_state, scale)
+        self.assertFalse(np.any(self.agent.q))
+
+    def test_alter_matrix_2(self):
+        rewards = np.random.rand(
+            9,
+        )
+        iterations = 1000
+        end_state = 8
+        scale = 0.5
+
+        self.agent.alter_matrix(rewards, iterations, end_state, scale)
+        self.assertTrue(np.any(self.agent.q))
+
+    def test_alter_matrix_3(self):
+        rewards = np.random.rand(
+            9,
+        )
+        iterations = 1000
+        end_state = 5
+        scale = 0.1
+
+        self.agent.alter_matrix(rewards, iterations, end_state, scale)
+        self.assertTrue(np.any(self.agent.q))
+
+    def test_training(self):
+        iterations = 1000
+        start_state = 0
+        end_state = 8
+
+        route = self.agent.training(start_state, end_state, iterations)
+        print(route)
+
+    def test_training2(self):
+        iterations = 1000
+        start_state = 1
+        end_state = 5
+
+        route = self.agent.training(start_state, end_state, iterations)
+        print(route)
+
+    def test_get_optimal_route(self):
+        start_state = 0
+        end_state = 8
+        route = self.agent.get_optimal_route(start_state, end_state)
+        self.assertTrue(len(route) > 0, "The route is empty.")
+        print(f"Route: {route}")
 
 
 if __name__ == "__main__":
-    dummy_test()
-    dummy_test2()
+    unittest.main()