From 8601da7e97889de881606061ae83094ab9888663 Mon Sep 17 00:00:00 2001 From: Deadsg <144394753+Deadsg@users.noreply.github.com> Date: Sat, 18 Nov 2023 19:04:05 -0500 Subject: [PATCH] Add files via upload --- DQNAgent/DQNAgent.py | 101 ++++ DQNAgent/QLAgent.py | 376 +++++++++++++ DQNAgent/QNetwork.py | 87 +++ DQNAgent/__pycache__/QLAgent.cpython-312.pyc | Bin 0 -> 16984 bytes DQNAgent/__pycache__/QLAgent.cpython-38.pyc | Bin 0 -> 9735 bytes DQNAgent/__pycache__/QLAgent.cpython-39.pyc | Bin 0 -> 9738 bytes DQNAgent/__pycache__/QNetwork.cpython-38.pyc | Bin 0 -> 3074 bytes .../integrationmodule.cpython-38.pyc | Bin 0 -> 675 bytes .../__pycache__/learningmodule.cpython-38.pyc | Bin 0 -> 13058 bytes DQNAgent/__pycache__/lpmodule.cpython-38.pyc | Bin 0 -> 562 bytes .../perceptionmodule.cpython-38.pyc | Bin 0 -> 1954 bytes .../reasoningmodule.cpython-38.pyc | Bin 0 -> 590 bytes DQNAgent/__pycache__/rlmodule.cpython-38.pyc | Bin 0 -> 2324 bytes DQNAgent/decisionmakingmodule.py | 8 + DQNAgent/integrationmodule.py | 19 + DQNAgent/learningmodule.py | 527 ++++++++++++++++++ DQNAgent/lpmodule.py | 13 + DQNAgent/perceptionmodule.py | 85 +++ DQNAgent/reasoningmodule.py | 13 + DQNAgent/rlmodule.py | 66 +++ 20 files changed, 1295 insertions(+) create mode 100644 DQNAgent/DQNAgent.py create mode 100644 DQNAgent/QLAgent.py create mode 100644 DQNAgent/QNetwork.py create mode 100644 DQNAgent/__pycache__/QLAgent.cpython-312.pyc create mode 100644 DQNAgent/__pycache__/QLAgent.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/QLAgent.cpython-39.pyc create mode 100644 DQNAgent/__pycache__/QNetwork.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/integrationmodule.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/learningmodule.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/lpmodule.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/perceptionmodule.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/reasoningmodule.cpython-38.pyc create mode 100644 DQNAgent/__pycache__/rlmodule.cpython-38.pyc create mode 100644 DQNAgent/decisionmakingmodule.py create mode 100644 DQNAgent/integrationmodule.py create mode 100644 DQNAgent/learningmodule.py create mode 100644 DQNAgent/lpmodule.py create mode 100644 DQNAgent/perceptionmodule.py create mode 100644 DQNAgent/reasoningmodule.py create mode 100644 DQNAgent/rlmodule.py diff --git a/DQNAgent/DQNAgent.py b/DQNAgent/DQNAgent.py new file mode 100644 index 0000000..091ef71 --- /dev/null +++ b/DQNAgent/DQNAgent.py @@ -0,0 +1,101 @@ +import QLAgent +from perceptionmodule import image_recognition, text_processing +from learningmodule import supervised_learning, QLearningAgent, run_q_learning, reinforcement_learning +from rlmodule import execute_action_and_get_reward +from reasoningmodule import rule_based_reasoning, decision_making +from lpmodule import simple_chatbot +from integrationmodule import integrate_modules + + + +def image_recognition(image_data): + pass + +def text_processing(text_data): + pass + +# Example data +image_data = "path_to_image.jpg" +text_data = "This is a sample text." +user_input = "How are you?" + +# Perception Module +image_result = image_recognition(image_data) +text_result = text_processing(text_data) + +# Learning Module +supervised_result = supervised_learning(X_train, y_train) +reinforcement_result = reinforcement_learning() + +# Reasoning Module +rule_based_result = rule_based_reasoning(text_data) +decision_making_result = decision_making(X_train, y_train) + +# Language Processing Module +chatbot_response = simple_chatbot(user_input) + +# Integration Module +final_output = integrate_modules(image_result, text_result, supervised_result, + reinforcement_result, rule_based_result, + decision_making_result, chatbot_response) +def cagi_agent(states): + # Placeholder function, replace with actual state representation logic + return states[0] + +# RL Agent +rl_agent = QLearningAgent(num_actions=3) # Assuming 3 possible actions + +def execute_action_and_get_reward(action): + # Placeholder function, replace with actual action execution and reward logic + return 1.0 # Placeholder reward + +def integrate_modules(image_data, text_data, user_input): + perception_output = image_recognition(image_data) + learning_output = supervised_learning(text_data) + reasoning_output = rule_based_reasoning(user_input) + language_output = simple_chatbot(user_input) + + # RL Module + current_state = cagi_agent(environment_states) + rl_action = rl_agent.select_action(current_state) + rl_reward = execute_action_and_get_reward(rl_action) + next_state = cagi_agent(environment_states) + rl_agent.update_q_table(current_state, rl_action, rl_reward, next_state) + + final_output["rl_learning"] = {"action": rl_action, "reward": rl_reward} + + return final_output + + # Load a sample dataset for illustration (replace with your dataset) +iris = load_iris() +X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) + +# Other imports and definitions from your script + +# Example usage +image_data = "path_to_image.jpg" +text_data = "This is a sample text." +user_input = "How are you?" + +environment_states = ["State1", "State2", "State3"] + +output = integrate_modules(image_data, text_data, user_input) +print("CAGI Agent Output:", output) + +env = gym.make('FrozenLake-v1') + +# Ensure that observation_space and action_space are valid gym.spaces objects +observation_space = env.observation_space +action_space = env.action_space + +# Initialize the QLearningAgent with q_table, observation_space, and action_space +q_table = ... # Define or load your q_table +agent = QLearningAgent(q_table, observation_space, action_space) + +num_episodes = 100 + +# Get the number of episodes +num_episodes = get_num_episodes() + +# Call run_q_learning using the created agent +run_q_learning(agent, env, num_episodes) \ No newline at end of file diff --git a/DQNAgent/QLAgent.py b/DQNAgent/QLAgent.py new file mode 100644 index 0000000..3dc6ca0 --- /dev/null +++ b/DQNAgent/QLAgent.py @@ -0,0 +1,376 @@ +from sklearn.tree import DecisionTreeClassifier +from sklearn.model_selection import train_test_split +import numpy as np +import gym + +def run_q_learning(agent, env, _): + pass + +def initialize_q_table(num_states, num_actions): + return np.zeros((num_states, num_actions)) + +# Example usage: +num_states = 4 # Number of states +num_actions = 2 # Number of actions +Q = initialize_q_table(num_states, num_actions) + +def num_actions(env): + return env.action_space.n + +def update_q_table(self, state, action, reward, next_state): + pass + +def q_table(env): + # Assuming env is a Gym environment + if isinstance(env.observation_space, gym.spaces.Discrete) and isinstance(env.action_space, gym.spaces.Discrete): + return np.zeros((env.observation_space.n, env.action_space.n)) + else: + raise ValueError("The environment's state and action space should be discrete for Q-table approach.") + +def q_learning(env, q_table, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1, episodes=1000): + for episode in range(episodes): + state = env.reset() + done = False + while not done: + # Selecting action using epsilon-greedy strategy + if np.random.uniform(0, 1) < exploration_prob: + action = env.action_space.sample() + else: + action = np.argmax(q_table[state, :]) + + # Taking action and observing next state and reward + next_state, reward, done, _ = env.step(action) + + # Updating Q-value + best_next_action = np.argmax(q_table[next_state, :]) + td_target = reward + discount_factor * q_table[next_state, best_next_action] + td_error = td_target - q_table[state, action] + q_table[state, action] += learning_rate * td_error + + state = next_state + + return q_table + +def shape(space): + if isinstance(space, gym.spaces.Discrete): + return space.n + else: + return space.shape[0] + +def observation_space(): + pass + +def action_space(): + pass + +def QLearningAgent(self, q_table, observation_space, action_space, num_actions, learning_rate, discount_factor, exploration_prob, num_states, select_action): + + + def run_q_learning(agent, env, _): + + + def learning_rate(): + pass + + def discount_factor(): + pass + + def exploration_prob(): + pass + + def num_states(): + pass + + def env(observation_space, action_space, n): + pass + +def update_q_value(Q, state, action, reward, next_state, learning_rate, discount_factor): + if state < Q.shape[0] and action < Q.shape[1] and next_state < Q.shape[0]: + Q[state, action] += learning_rate * (reward + discount_factor * (np.max(Q[next_state, :]) - Q[state, action])) + else: + raise IndexError("Index out of bounds for Q-table") + return Q + +def accuracy_score(y_true, y_pred): + # Check if the lengths of y_true and y_pred match + if len(y_true) != len(y_pred): + raise ValueError("The lengths of y_true and y_pred should match") + + # Count the number of correct predictions + correct_predictions = sum(1 for true, pred in zip(y_true, y_pred) if true == pred) + + # Calculate the accuracy + accuracy = correct_predictions / len(y_true) + + return accuracy + +def select_action(q_table, state, exploration_rate, num_actions): + if np.random.rand() < exploration_rate: + return np.random.choice(1) # Exploration + else: + return np.argmax(q_table[state]) + +def train_test_split(X, y, test_size=0.2, random_state=None): + # Check if the length of X and y matches + if len(X) != len(y): + raise ValueError("The lengths of X and y should match") + + # Combine the features and labels into a single dataset + dataset = np.column_stack([X, y]) + + # Set the random seed for reproducibility + if random_state is not None: + np.random.seed(random_state) + + # Shuffle the dataset + np.random.shuffle(dataset) + + # Calculate the split index + split_index = int(len(dataset) * (1 - test_size)) + + # Split the dataset into training and testing sets + X_train, y_train = dataset[:split_index, :-1], dataset[:split_index, -1] + X_test, y_test = dataset[split_index:, :-1], dataset[split_index:, -1] + + return X_train, X_test, y_train, y_test + +def q_table(env): + # Assuming env is a Gym environment + if isinstance(env.observation_space, gym.spaces.Discrete) and isinstance(env.action_space, gym.spaces.Discrete): + return np.zeros((env.observation_space.n, env.action_space.n)) + else: + raise ValueError("The environment's state and action space should be discrete for Q-table approach.") + +def q_learning(env, q_table, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1, episodes=1000): + for episode in range(episodes): + state = env.reset() + done = False + while not done: + # Selecting action using epsilon-greedy strategy + if np.random.uniform(0, 1) < exploration_prob: + action = env.action_space.sample() + else: + action = np.argmax(0) + + # Taking action and observing next state and reward + next_state, reward, done, _, _ = env.step(action) + + # Updating Q-value + best_next_action = np.argmax(q_table[next_state, :]) + td_target = reward + discount_factor * q_table[next_state, best_next_action] + td_error = td_target - q_table[0] + q_table[0] += learning_rate * td_error + + state = next_state + return q_table + +# Example usage: +env = gym.make('FrozenLake-v1') +table = q_table(env) +Q_table = q_learning(env, table) + +# Using the Q-table for inference +state = env.reset() +done = False +while not done: + action = np.argmax(0) + next_state, reward, done, _, _ = env.step(action) + state = next_state + +class QLearningAgent: + def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1, exploration_prob=0.3, select_action=select_action): + self.num_states = num_states + self.num_actions = num_actions + self.learning_rate = learning_rate + self.discount_factor = discount_factor + self.exploration_rate = exploration_rate + self.q_table = np.zeros((4, 2)) + self.q_table = q_table(env) + self.select_action = select_action + + def select_action(self, state, num_actions): + return select_action(self.q_table, state, self.exploration_rate, self.num_actions) + + def run_q_learning(agent, env, num_episodes): + for episode in range(num_episodes): + state_tuple = env.reset() + state = np.ravel_multi_index(state_tuple, env.observation_space.shape) + done = False + +class SupervisedLearningModel: + def __init__(self): + self.model = DecisionTreeClassifier() + + def train(self, X_train, y_train): + self.model.fit(X_train, y_train) + + def predict(self, X_test): + return self.model.predict(X_test) + +def supervised_learning(X, y): + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Create and train the model + model = SupervisedLearningModel() + model.train(X_train, y_train) + + # Make predictions on the testing set + y_pred = model.predict(X_test) + + # Calculate and print the accuracy + accuracy = accuracy_score(y_test, y_pred) + print(f"Accuracy: {accuracy}") + + return model + +env = gym.make('FrozenLake-v1') + +# Ensure that observation_space and action_space are valid gym.spaces objects +observation_space = env.observation_space +action_space = env.action_space + +num_states = env.observation_space.n +num_actions = env.action_space.n +learning_rate = 0.1 +discount_factor = 0.9 +exploration_rate = 0.1 +agent = QLearningAgent(num_states, num_actions, learning_rate, discount_factor, exploration_rate) + +# Run Q-learning +run_q_learning(agent, env, 1000) + +# After running Q-learning, we can use the learned Q-table to generate a dataset for supervised learning +states = np.arange(env.observation_space.n) +actions = np.argmax(agent.q_table, axis=1) + +# The states are the inputs and the actions are the outputs +X = states.reshape(-1, 1) +y = actions + +# Train a supervised learning model on the Q-learning data +supervised_model = supervised_learning(X, y) + +def q_learning(env, learning_rate=0.1, discount_factor=0.9, epsilon=0.9, episodes=1000): + # Initializing Q-table + Q = np.zeros((env.observation_space.n, env.action_space.n)) + + # Q-learning algorithm + for episode in range(10): + state = env.reset() + done = False + while not done: + # Selecting action using epsilon-greedy strategy + if np.random.uniform(0, 1) < epsilon: + action = env.action_space.sample() + else: + action = np.argmax(Q[0]) + + # Taking action and observing next state and reward + next_state, reward, done, _, _ = env.step(action) + + # Updating Q-value + if len(Q[1].shape) > 1: + Q[1] = Q[1].flatten() + +# Use the first maximum value if there are multiple +max_Q1 = np.max(Q[1]) +if isinstance(max_Q1, np.ndarray): + max_Q1 = max_Q1[0] + +# Update the Q-value +Q[3, 1] += learning_rate * (reward + discount_factor * max_Q1 - Q[3, 1]) + +state = next_state + +print (Q) + +# Initializing the environment +env = gym.make('FrozenLake-v1') +table = q_table(env) + +# Define num_actions and other parameters +num_actions = env.action_space.n +learning_rate = 0.1 +discount_factor = 0.9 +exploration_prob = 0.1 +num_states = env.observation_space.n + +# Initialize QLearningAgent with Q-table and parameters +agent = QLearningAgent(table, learning_rate, discount_factor, exploration_prob, select_action) + +# Run Q-learning +Q_table = q_learning(env, table, learning_rate, discount_factor, exploration_prob) + +# Use Q-table for inference +state = env.reset() +done = False +while not done: + action = agent.select_action(state, exploration_rate, num_actions, _) + next_state, reward, done, _, _ = env.step(action) + state = next_state + + def select_action(self, state): + if np.random.rand() < self.exploration_rate: + return np.random.choice(self.num_actions) # Exploration + else: + return np.argmax(self.q_table[state]) # Exploitation + + def update_q_table(self, state, action, reward, next_state): + best_next_action = np.argmax(self.q_table[next_state]) + td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action] + td_error = td_target - self.q_table[state][action] + self.q_table[state][action] += self.learning_rate * td_error + + def QLAgent(): + + def run_q_learning(agent, env, num_episodes): + for episode in range(num_episodes): + state_tuple = env.reset() # Reset the environment to get the initial state + state = np.ravel_multi_index(state_tuple, env.observation_space.n) # Convert the state to a single index using the observation space dimensions + done = False + + while not done: + action = agent.select_action(state) + next_state, reward, done, _ = env.step(action) + agent.update_q_table(state, action, reward, next_state) + state = next_state + + if (episode + 1) % 10 == 0: + print(f"Episode {episode + 1} completed") + + print("Training finished") + + def select_action(self, state): + if np.random.rand() < self.exploration_prob: + return np.random.choice(self.num_actions) # Exploration + else: + return np.argmax(self.q_table[state]) # Exploitation + + def update_q_table(self, state, action, reward, next_state): + best_next_action = np.argmax(self.q_table[next_state]) + td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action] + td_error = td_target - self.q_table[state][action] + self.q_table[state][action] += self.learning_rate * td_error + +if __name__ == "__main__": + # Create environment and Q-table + env = gym.make('FrozenLake-v1') + table = q_table(env) + + # Define num_actions + num_actions = env.action_space.n + + # Initialize QLearningAgent with Q-table and num_actions + agent = QLearningAgent(table, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1) + + # Run Q-learning + Q_table = q_learning(env, table, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1) + + # Use Q-table for inference + state = env.reset() + done = False + while not done: + action = agent.select_action(state) + next_state, reward, done, _ = env.step(action) + state = next_state \ No newline at end of file diff --git a/DQNAgent/QNetwork.py b/DQNAgent/QNetwork.py new file mode 100644 index 0000000..91f54ab --- /dev/null +++ b/DQNAgent/QNetwork.py @@ -0,0 +1,87 @@ +import QLAgent +import numpy as np +import tensorflow as tf +from tensorflow.keras.layers import Dense +from tensorflow.keras import models +import gym + +def QNetwork(): + pass + +def dtype(): + dtype(reshape, dtype=np.float32) + state = np.array(state, reshape, dtype=(object)) + +def convert_dtype(reshape_func, state): + return np.array(reshape_func(state), dtype=np.float32) + +# Reshape function to flatten the input state +reshape = lambda x: np.array(x).reshape(1, -1) + +class DQNAgent: + def __init__(self, observation_space, action_space): + self.observation_space = observation_space + self.action_space = action_space + self.q_network = self.build_q_network() + self.target_network = self.build_q_network() + self.update_target_network() + + def build_q_network(self): + model = models.Sequential([ + Dense(64, activation='relu', input_shape=self.observation_space.shape), + Dense(64, activation='relu'), + Dense(self.action_space.n, activation='linear') + ]) + model.compile(optimizer='adam', loss='mse') + return model + + def update_target_network(self): + self.target_network.set_weights(self.q_network.get_weights()) + + def act(self, state, epsilon=0.1): + if np.random.rand() < epsilon: + return np.random.choice(self.action_space.n) + + state = convert_dtype(reshape, state) + q_values = self.q_network.predict(state) + return np.argmax(q_values) + + def train(self, state, action, reward, next_state, done, gamma=0.99, batch_size=32): + state = convert_dtype(reshape, state) + next_state = convert_dtype(reshape, next_state) + + target = self.q_network.predict(state) + + if done: + target[0][action] = reward + else: + next_q_values = self.target_network.predict(next_state) + target[0][action] = reward + gamma * np.max(next_q_values) + + self.q_network.fit(state, target, epochs=1, verbose=0, batch_size=batch_size) + +env = gym.make('CartPole-v1') +observation_space = env.observation_space +action_space = env.action_space +agent = DQNAgent(observation_space, action_space) + +for episode in range(100): + state = env.reset() + total_reward = 0 + done = False + while not done: + action = agent.act([0, 2, 3, 0], 0.1) + next_state, reward, done, _, _ = env.step(action) + + agent.train([0.3, 0.3, 0.3, 0.3], action, reward, [0.3, 0.3, 0.3, 0.3], done) + total_reward += reward + state = next_state + + # Print the values of state and next_state + print("State:", state) + print("Next State:", next_state) + + print(f"Episode {episode + 1}, Total Reward: {total_reward}") + print(agent.q_network.summary()) + for layer in agent.q_network.layers: + print(layer.get_weights()) \ No newline at end of file diff --git a/DQNAgent/__pycache__/QLAgent.cpython-312.pyc b/DQNAgent/__pycache__/QLAgent.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1daa1f768705e3a7b9c3db9197ac70571a38ca78 GIT binary patch literal 16984 zcmeG@3v3%lcDv&8Pl}Q#iK4#L$C4=7iY0#%DgGvs<*)7(Cyv7?VQB8klB}P1DaYbb z7v|z?sM>{TxUK2t?o78uPe!gdwYcKkYk}s90!?sN9M{y9%+>``G(c;hC>-g4YY#=+ z_hz}vkEz5-dq4}^2zom^^JaGD&71d|;g76V69wsm*I(rRdP!s5`$VX0*j$0Qs_~+jK02jqgOVxv%e^Qv7n|+?=xM!z*f-pHg$?;HA7T|r8 zv)%wF1iZrh^kiVfpOGl)LgV`eF7FL$k-9^V+@ahWN$Wey%?2b5H+w;%y#nI6T&`bi ze|BK(1%cy*u@k;U-ZwTl=U(UF7LY;5#6dG^dNN~Qe5toOWkn)C6q zle1@e3-km!mD)gI7pBsTVFNOhKcgKnPvKz5sJ27q1{KKYE{dAU#X?L-Au2?VmnalH zN%7dbyF@CPotLyhj-M0Y#WcGx;}rrvpo3(_yw4v1atQ)<=yLI9ct{nKvy*{I-}GdV zgAoUOXQny65-N=FFH{2vQ}@l*t4}RI74^jH5|!JM=AH!8!cowE&v&zoHA`cZ-&*$ePdiD;@lL2AZ^1yjJWC|1TG~HB9}z!Vvj&+?qUP6iY#{k@kyG(g)T@_hJpfk zRTafdmXV-uV*KS}{}1}!AHdm>G==<7kc!V`s;mGFUW<>c7-lZh2ap9wVrgY7e2u2rjZ}F5fKck{8uQSj#0$%q>i_t}~pAofP~$7vNmubG&PG z8`%ji-~2p3=krhW+%ZWe5H_>W%`9+a`P>;02p4>~pE69UIlDL`=}51FWEjl!#@E0A zd7Ltje9EITmQ1I7(+k`X&(HBZnjS)yttGv(2~jUO;D2KtK$!YiUmk5uRdk3I9pABk zxAsnL%DqQ)?^)CD{j09`Po_@1BA$3<&3h*0ofN&3DK9U2`L$<+`?m6{7nd)_>?voD z=3w~Nm0tF5WNXT-i|QhkGB-(aF8}bIS*t9`AoYq!QA;{lWW+2;?`GbY^enw(2pM?# zO3i1mBuavU#Fnrch2=xcIoLvc-6l+>vTU{sbqvPY`hY4cin25v(u3baeZ6Fy*jvgR zmyGZ=T;PHfGAbBju!WaQOXelRlI1hMVJi9!YsehZgy@hdWR<_JSb0mOp;9Q3_kA<-(;P@m{_KqJB8bueH-D#$2yPy^S+Xh$(aVFZ zlx(==<`!lH-f`HIbG%f>U7VkugF4(*u<_1F25x>*m}5bK=`%UL0efQDxmk{Hge>nu z(1ZYmn^blNwLTHl8R=~du%OrYvs^$jK$gR8C*Y1G;yVY(3el9Kw}$`=KO+1b04PFx zO|0hn{&mW(ZHnmAW%e7!YsTy5h&FAjj4)C~^~bQ?7MB-OHkW8~#mnOdleYGRyZ@f8 zKkckfIo+buEpNf3v+tVjV`tqh=S^q)$(3Dy+`RIX*t#?6+!fV5)KT3%-|f89xw<>q zH6U6#B1dDoI1~5%Oh>s|ZY|ziTyd<{{$O;qL3Hm=HXTS<>eDri@ut-$?+zq}Urapx zqPXL<*nC>7IUPBYcDS(r)%KOWVr#$X=#LyuTkO%H*zV|wq@^W3vpONR?-MQi;N_#r z#>mh{Gi7%?)WZxPc2O34c;r)*R~V?0ue<`P@|sdr79eF(G3BM~THXRXsE8svxruUM zX%wX=!RQ$w?CRF>TcCjNKmay0#k&y@d8-vBeDfR_hFV!XXT>c)j7SJ}!@sZ%04y_z z+AZI(->SV?n{sXyomGd<_` zO$+;a^3CWTwLDLb9EL6w!`f0NvuoZ^;N>w#vlmCR|MQNfK#U!OPWTZ7XcK)=!tBGL z9QeFLDHNb&82{t3RNzP4k3Jv`&exUl!Ox{V&gv`10Ed>vRbLjdCD@GSYNM!wNV_0*i8gyB*bijLu5O_ z*@K>L2&^sDiqV1%qzkA#R7}r68>W~ArSyf&q(;Or4>n&A>_R0n76QEzQ0EVpCCWoc zU!&cOtZn!{cnvlknPs_)uDOMPYi`_i222@N$aNpwjI3YK*pM`66PZX0V)?uE5w_m^g z`pWs+!F!Fpt5fgw{`HeTeDeL?9}e8xeIjizMz%z&BioXOn%IezQ>$}7uJ}py2i1u$ zk0zcQyZ5R$@tjxu@@vT>z68r9#?K{Ygv7WYvH|f(K-|3`Zd(xRFNlT<&>UP_LKff>7?DP81UF-dm#B9B4{-zMcylVnf^B%m#T;z+i#OOQWa*KF;Ru+)b&W90@3pRu-yKi( z4XrhtO4goAFb?3tvew@}xl;XmFN?PA;lm%9?cp!UT!>#+66OgP{xM{L3zhPsp_eI; z4TVYJCRjmOO}^?LqvdJkJW9FA_P+UlaZ$v?AafWCYZEXbT(PUVL-$?XSN1)ui5$ zsO`F^@4}!0@p76(TT`4)+FBBZmW=BYbHuA|HYRPI2}9?jlt)?;%3p#x|8MXAV%`N( z@2napU1CC-H=A;z3q|NX1rnezg;WhdQ=iegNOU2@C~F6;K?2AryXx6`7NnA*my|Y3 zTGVAB?bk~{qiK}5gIcDMJ^m#<#8;RQoXHmAWy%yhBg9yAAp?mOFu+U9NU*g)%AJ-4 zT%K$r(&)i|L2?`T*I))j9LUiCi+^rD1t80m-nTCNrad}(z51TsL1fClWa<8dVL!+eVCuQ$ zx!C!5b8I|Wx+Ode(!@}i&^O&Tn64U^jaN&TOQWGRLmS{~^=}_}^GJ%R5}B&#*)^sS z%Iwu3tvaqx$9*gA+cRSI&Sb@|@G0(bX#-$YD{w7uOUmI{s+@o5v6NA;dv|Cb{r2fL~hRu=kyM5-#lN{qE^Ir&q_5 z+xCivWg zP=t~cP;G%J^neRe)`o*Ns*RVU;nD3QNzvuGc%XqPuX>U}LU3U8R{=g>4Zv+8F$ejS zGaHOR%Pi+K2_=pkv_RVpNJWAM6RoN24Sbp{^J-$s~v@zW@xO6#(pfTYc>LTVtTscdfRi z`i8~6;rA<3M_v+-yp*(g5(dws1i+0#2=EQar6Kf7FX^rv07n(0#)ly6z7vMYtjyVome?aTCrj?v1aAkxqe&D5j_ zwv9$8$`!02KuORZL@EUBL6|ad!NGBIVIEGdCk2jGg6=2afNeT2dV$iBuwV=6GV80n zoB1lpF6FaIcpsz}Hj%VI7H-;2lRcDkvdxE~Ht5>q^Qx5j0I&p~9Y2?;iAHV;z$ zV{BFl07Rw1a@DkKBHqL6as5vfw^&s`fQ7>(lX(g+?oDsV6y^h(Pm zL)LI$nULWf>YJ+Mrm2t)+-~hi=5oac`yZe^1nU%l2fqLyXgZh);tjY!ydtL)Qshwd zER;#wc^>_0BhMp-fPnBUt~Fs?{wtV6e&AUI;{XI4v7-4iu{gV+iiGTywciH3d)NS5 zf%Dw)SK;IrWrin zQv20cmS2hOPFh>ShXAjv5dzZVN!zyY;rom+ax{7|I+ZXruQ4qjnaje*C{dB5(uh_jeQNAx}*dIO!TddL< ztGM33PT92Wk9|XB#c|KJ{nIX}^u|YT?|i4}vwY1md$cz?dTr;8folVo--wsrv$doh zHL>z*fg7P~q3d6b($VH?%!U&#n0%s#0h0ahE_r~ek1SW7uxFr@D8+w;3Qz7PPKNxEpcmNMZ6YkV-(wo`1|nY8apSay9ZS7jWTEn>s=q`fy`=_L!8C!S&Z z%ZtXq7jrdqu|)%7;35~K5D^p#K_UDN?s_pn{AsO2S`;?w^*GS>n_!cpl~6V^1WtL; zF=g=7L+GOFBBU=oNY31Rxsy4m>4!ndp+XcL#5=!}@!5&EKm%UB%I3n{GYFnVFbLpL zpPg^UIt2T1eNpej`qnkv>c80^fBxHJx5rky#4UT1b^8+FqgU3(o=-LOiVeN1gWo^? zN5|i*6?Yy_z`fC(uZj(?ij}WMjA?s)EO0AyGqf@!HubIU7Mlh{`#{8iHmqgY5`955 z)Wr@Z4XyD&s-s`*=)Zed>=+acgKLIXnHOLPYJxGzynsSEbJs}r8uRWNp+-l3-WY=h zxhw;AI(7l+|AM>whE6yD%uz(AzoCY2;VvaTrJnG}V@C+4VP+=8EHx|{mvl=e=nakE zf`o=-$QUvxhAbG22G|5TI43^v2Do#$%B&^`R0Dn43F~jqxdsiJMaFoCVs+olIkj3u z!t3E?Ct6qhO$6wG1Pw#-{YqERs3;L}y18BlUmp{xi|bOxK0UsR~~zVcoCVIZpTXMl-Z?~2za>w3V& zw>Cue>Bi<;lQ$>dITtnGuc=MdY!z#^uDpD=H8t=hao|g-fl+Z_G+FapRGW6xUt=EX zq0uKM%2AhgHp(~tR$fRtcSLn*h^$WBnOMD$?Aa%lbw^FHLvUE$oGzdwZSfaY z4y_)#dvvXAI9<~c)vg<%lZO_nye8Is%^c&=R~o%%YrkJs6*I&gE4{bt;?@;UqWYP-Pvm&5LdZc+5EK9FJMjX9&Rm0aOg(Y!NXGKU}dI2T03c`SW-t2EQ?Jjl)Bj z;MiX&QJ@iA;MqipLfovV66L=I6~(pZX`noR3)`XY{*}H1B9OGHzTA)e;m^+b>y!bL5_A zPXK-&CdZ~6N)d@sdF7xjTs9|8wxorCska6ghCL*>NciG|;c6=-_Nr#`Lz}CL9#NE7zxA@AQIMvT{Iy;H7W-J z&^MPonJl4*afl)q@86Rf`#KU#g`4y8J$T5#6@I)3oedaBWjU@tp$UJAbOCqOyraG3 zMzy4$WqmvkzkEYEFJFg5c_asYP$0=N4CVMQ411s#2#W6GzYnjlz~IGz5&uv_)AZj{ zPy9W#^Y5sNzoSg+3{4v$-a|j}&_I>i!lpC;^M_1zG#Gz6$vhFJ5u*_?08LBJ(i^4K zw0(U$1BI4MTd1+q{?ywER-^ z{N=Imq4bu{Z=b$>dSyJhWtYgbgpWpbF(&4Vm!~b&;h}XsWw!m?R6#pGp#c26(?EB9 zLIGH>w9(}cTMCB(U#zN0Rl3DWcY-QU+bU8vyiXIhd}J+4SzAPFOUl|NTHBB|<*Cv} zv9vK&>Jm#`VKe;lAZ$%nRE2G6dv$^;`_Mq>wraimMgs2Z8GK>wCpwC@MHZs|*v>V| z1>Z8+BNbPj%g*SIl)Y25cc$!JqP;6=><;VFP}g$Rz3h$--8gaW#0tG;>LL`;n%^FI zb0pdqJ)dNpVLHv2!!&eU*%3d#(www!g->rVm2~^V9?Db(-B&MH%Rb)a`mpXpjWseJ zJ#%B~+SHn+Ic>E^4=%qHJ_JLdOTuk$=_7kVnR-_*fQ2g7?|qMcukD9=v3^)I9~P;@ zZ|OfWRD`wvKxxD0FBzfDW@`vlK)Fw)PP&kuFa@I{x)7UL@h0|-B)OcROB@_cnx5N~EhI@sgdcA_d--^H2qtc? zPF1vs6)mZXHnE~Dd<^Dgt&8o7ci%mkc zZi}_->XGFmmybc*4uWrkAC^|6%d1o6ZDKi?HOcbsh%Vjk{`SP}iIoe<_T8ePHDZFB YCxdaYerhAypXn)^{b4J`$R~LJ4fRoZw*UYD literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/QLAgent.cpython-38.pyc b/DQNAgent/__pycache__/QLAgent.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f69ff91996f2f870a31b63ff2462fda775468f3 GIT binary patch literal 9735 zcmc&)&2Jk?cJFUCDTmP@lr5+#b1 zs#{)LXkH>OusICiYqD8DI_k2A!R8PkK!Thyzy<*lAcwx@u#M6?y?XD}@4eEO!^1fPzwdtfBl$O%4CCKe=>26-xQQoxXd8wwgjqM1%)m66 zx9ZlC9oRZ=*PXycon7|=PdLJTZUhkw8y+5PNri@Z(~B7+BAbnIV>mF&0tCv*e^=K z0eRqs8IDPJk#W`{Q8!I>?-`zMXE{JoRRxmBk1n1;&IW5nD zn)BkOxY*``yKzAdiF13owu|zTyogcWTen`A>fhx>VeBm_xrWF`}LWVII7 znvDla%G>oy7}jQMQlVueQk7bx9LX>$hs*U^G*g8}c7M#9cy8kf9VEiIZ)}+}TSm!F z+{&D6M2RgMPZG1NTr}(CKhNL3`smA0s_@Ylm1nB*=uWe`x`eLbqg$VT_UO*NnOpqu z(Y??0dwTgdz%z2#7LSmSThNP?Nd%;<^)MZ_Ed&=HMhCLR1r0#;BZdQ~P=DPW(rWPw} zX6@dCd0B)!)l{=Vi8&b-HL1l)Llo1M6g2~j;e2zoE{cz3QPje!l94RVHdXQ7S-ok+ z%JQ;mR;u&UTe&0$(Qjaw2KYYOd2!)G+byv`158Sd>1=>rY$p*XbUOChE+5tEOiXI-~Pp)^Y_;`e*2HV;qO0gy!;*1 ztE#)PD}Ez*C=^<>{|*^&(Ooz17+-(e@?wbjrqRj>v*pKr=##e_jp{|m% z8y@;(8xxTQew%SdKxKbs%~~|ett`gy<1CIGrQsFEobJgqO`tbvXxC-RcXvgyksaN z&0a_*Hn+KyWL6tBY~hmTJT|1Vv|N{vUp2Q>c?zM96nf+X6CI4d(56N$ITIQ z%JMDs1GK-QUJ#B{5RM{4b&AXdY&n!`92@Q2r^30y+zX3L^(k31n$smu9mEIfFq0ya zBTTgXy5W3fS(ePe)#5%NzUmq}UdEG(u30cWOV?FV*Z2Eh>`%{_nkzJ)id^8}9dp9+ zTMtSFz0or$%T*0Kk>QWZOFMlVPe>&erm&tHEyz09W%r+SR6BOQ&(3GIJKMq*4n6|* z6dvBL$Os>APh>?7Z*96tLo=^kjvnk)8~s`yUBsNf$H_^BVXl32@ALL)n4Xf?>dk7U z9$ugBKPsoY^?mB_*5q6Q5r5D_&W$%EXFwg_nxOYU%uEkKH{X?2h?uN-y=?1+bGHpdg!|JrgY%|)?=13ilD1g;Y^A z6t_c>1B0_u13$qNYE`uT_TSox??**C@sQfXp^q`4u}Bw7Ec7}Ol3^eoS&iDLPD924 zvXVmo^dDoWRMTvA9r-Gm*7-3{dvP02_yh@#I-oh)S{$^+Agtfz;HH3UxY_3Md=4|mX zPC60xLY|VNZFPF4+B`CI`U{O#N}0sGrw_aInA0bmKI^u3bl?!%MuSfXdVWM+PzDV1 zw0YC=O+^;=0dpEU4x-t16B!;_>n2iQyhU40MP`cjB8#Zyxe?i9#JY6{z1Gc^6QZ8_)s{<6;HAq;@aCMsZVaF( z*(}vkpC$1-;}4Ql=rp7g|Cn@XEwn~V-=4DbmZC|oa-%vw=K1JNZW60bVntW~8t$7UzhWDU0ZU>T8{c6kPt zqCUcBi+H+F1hliRjiSH1(YFXLvb%_HQP^4XHnhym!BuQIu`P_nthx#$?QB3S zesy*hXxgqdqEap~A13CrB%6Bu@O6`XiosLA5D~UYQ~^E@hIB}vT%+YlG7q782o@w6 z3Mn%lYJiU^t$YveA$4yBPe@nE&@x!CeKT+8?Q#2rdBRfc(WkNmmfek{f#F8Jom;8) zV;DsU{yLXU3+A7pvBzE8*(Li`N>S>PZM>mNmZPgBD0tg$nA>r~-qaD}>61EPB0Xz% zaKO|_F1*C#0+W6tuN{vUsDF)%lEWs^Mai4vu=Z=icU5yuHa@Q`%Ck={+^yoHz8?p^ zi6=adL>d8baR9RlLvIRO+Ow{X-kO2aj^5hB7g@y6T#+JT&DABluewqMj4JUQag5$IuV>Q6}=&bq)$=l(TK? z1)EQwTQTBN%B6T_0A&z!rq+#+AT?wJ2-+M$2Q^U9K$1gcBKvf^~>LZO@>XJ5%0!XuTG^2>~-PF+x2TEaM)64~D!QHgrUYRX# z#cyxVT{yF+N!YtghL9wqs`5nE%S)^EsFoV06d=;Q?8U1|p6I&c@GH8t$&ijEm7~?= zx=g{)6^=_9(?r8(9wvW}r;T>#wvU)=2M3nj?}g2jJnm?tvLwspBv&pkHN|S3`Fy#& zvRbL9E$IdXoR72g%&ck9(^Ia0&FP6Kd~-bK+pj2_g{GVALxt&8HYilvVTGb;hZPEz zfr9zTt~HGE9vSPfz-8^_wj-;G9TFESl=YICO3wfb zxx(-8bY!Nu!`wTIboUS#!!QKc2=M~5ZCbn*>Ik%@9-}p|XKPVMjK4yKB3A9;E!mp5 z`pEhXvK^uh?nIRC*@tYo;;!K#TRIf=1e`T3Nu~|dU|6(TJw)dLQj|d5KjG;Ra(E{~ z+G9bu6tSEtGx-Sp$RW%MtxB#k#(Qku{Vvz284XhSnnnOeaA9 z#zC0SYS}HPj?*j?0qn$?b*tq^u=2703*(nibM}DXxijhy zG44yMF_`iIFZCsowcM>XO1@ge`p8{Pp4wg&v1;NjD`J+c`XlzBORV%BYsTu0{$tir z5Y%T(exFIdJSMxtuIzX382%akLRvt>nzB8hlAN703znMW_ZrmVu&O+*g^GOX3&GRc zce(bLD5N(KjLtzYoCQEU@Wll;;MO2``&1(ZPX}+oB?NB|0as@>e8!d#ytRR}f5pMY|ic9nwMDUbK)$yR+9=7zR9b79BO6mNyDO?r6DYtW2*Q(q`11c}ZiF z#$nTy?dmzu4xvp)2U8!L`;?m7Gt8Z3Lffr@ygJXEMxC^`L1wmIi6Yr34W}`Ji)?0i zfC1L9R&O@c`>eXfgeJJh>Q1uQA8sAkHjPgH3*EvABnE<}y&=>B@X452vR-P@;fD1| zHdVH3L`YZHufdrJesc#*`ZG9oQr`vv_63B@x4oOx-uWr7G&erSIBJH;7fdKUDPN8u zx7Aj85<9pOB7hG8Zrkq62n4N9qKIrLv$yetRPK&M#n@xY648y8E@&UZ?QT0L4~;}U z+*yX;H6r9COS`ON5Rj@k(+SOYP3KnIVWW4J0sA)mI|kVY>q3#A3$<-K)ZJT2z)w%maqYkp_Wzvq)&>)DKI+FttSswS)GU|Nzb0ps%KEz)5 z6I^w8G4>YDn2D>9%qA~A;B>(~%js(n*sM4k`U)Lh2>&^tkzX*`snQ6roZ^4nvFS5BZ~?oQpQSca zUqNZv*~KA;nP{7Ni#aXwG@RPbr@>3D8e^=x&g5ezgaLZH5@!kCr`C+X)z`d1=3aV9 zpPpFT`l0QbRuZjU1IJ8-BkFU0H*?$U)25vQ5#ZWUUsQV9We9^zdi{qCZZ3I=wv9)E zksfrRd7^kN6?lljmhWBAw{bzHAu3AYzaeHhqn1=EON#5Z?WoRayy{=WP;eoxW9NqrdwV!Vvw;YU)G4u$W&9_JJmyS1r@zCz!Zxr-+c@JN* NFK#u*x^s@1{{y41gC+m~ literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/QLAgent.cpython-39.pyc b/DQNAgent/__pycache__/QLAgent.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c0e65b84fbcc239a123d3222c7f3f98bf31f1b9 GIT binary patch literal 9738 zcmc&)OKcoRdhY7(>1j5H6iHDcWxYlZOBz`s^{^#NE9+s&b~Y<>SCH%|9ow^`sUkJh zaE7dIDiYCi3AMoHU_jSovjBP2B?lup1PG8Ir!25RfCR{)uQ}{0$St=Vl6+tF%y4ET z1{MS)Lv(djS9SGYfBlc||BIE8k-)+)xb%hk>+_cNuWa=G@@QPg72S6%OIlLYt&n%S zE|%=j7OXpUXUPp+Qy2AI=t)=Ro>^gDdNPl@Fa2;x4uyg8)sPC-?QmEIau_{*&(OoH11<^k9-IBak*FS!+k>Tmj`g)DG$mb?vwJ6 zJdFD;c~u^j?_vg59&68eO&-tYRMy6Bd16z9Q))y_t&4D4jjAa%4T|=tsf~9wt#Gd_ zh5OXL=OUU=-o}1uVe|p?9aIN4#aFqo7#<4ug@?n#su=EF`R!+xJV|=wKFl$J)_d|4 zTBp!DB2UX1P;|8Ct5b_(;h36Q91q7)jxD|$4zY!MAjiUEay&ea`w2BcKB;5sxH_?( z3ri}mN@_|@tUJO2RVTA|LD_rCTimZoiz9MYp83`ePpS7dPRp}$cGC`L)TwY*9aS@G z7Sx=P*X6l37rc$LYFN(h;o8or^XeRCd4Ju0F7&^tbJE&l;pwnEpF<5a90C>RCao!p z&-Pf;)}(dOa;z&BIiNmJ!|MW6y~^2F*L9)0<_R@zR(C<|GV3$DkUc?ra4+oWUsP#h zm)fgNfGdYL-R*ZozTcLD5885dL0-fjTvQ*(4@sxI)b78W^*^^)V)dc?!6W;*qwj6M z3hcQzsM6FSaYn6I! zO@W-a@}RExyhFA<=ZHlgWp2^WHY*;I zVqN@^F!E*3t=+lzP!%CqHQj7bVva{eLu#?oki~2zMZ>^i^su>Fm&FIFC~Hwwt5_B1 zo4R=CwAr*`WqDaQE7gZHFM>3H3N$cH1AHHEK7e38u5kC;Tj4A`T}nMOYn1x8+Oz2i zEJ_cvD6rUxA`QN*)K}FfS~s;Odj>g}Z_CUS`XUOkWf#S6TOUE~H-Gvk{QcFn-~7X` z`TO^4FMkU?tD0dPir*M63WX8vzeNRH%&m)C*4N*)yaZytX|?iFwEV=6eDZd~m9}&? z?Zka#>C5EohKDgjjoq;gev2e8p}Rk`=WW{O))40KlOgyXw-vO8TYjt19vAeD8%c%} z2X6<-2;Zc=ZjC0RoG%&TlWD89qtl{gPwZqz$C4*OE594^m$(*P^C`4am1^dN*fsWwW6)H%;$h zG0lQHoQ^%9fix=9)}x_V!Y=3q6{kL`%E&_324+?y*s^2?FiW%#g(V8Y7p`#a2~n_j zI}^@sF)7BxwC&sa2k3uAy&xQ!ARI=8>J*DbY&n!`5*zK_rNZT;*aOo{^(onNhSMca z@5c-JAd4c4LoAH^=Aws{WmOVk&WQVf`06VdcmY=?x?)s#ws}^?v%cT|Vt;z(G+d$i z)Z_vu@0b&|-+EFi=(V0n*{&M=i3)#IUfSuKxFRaC6w-cXwIJ(YmowMr@~Czke3yey zZ4Gv$BVD`%5Gp;~b22Y|+&wuY1Kf@2Dh35Yg-RFJk@Ydv<2N6H&A?Mngk~5%=Z%xqqAZD(IpzCi+(4aYZ zYl=od&7B^KF25;7TcD&*6dlY-4ckU~N{-Q|%t7tMxE^sb+D1{nhpQt==iihrIAAkp zDX;0BGMh_#s41kHqM^ARnj9GXIyLYUT#->l$8Z0w9s7P%WFH>ZyEyd`7Bm*wVu^)b zM?yLR#G|Tl8`T-eI6zi9+W+~FF;&JNM_)y~N~U#wtkYiH#1%b80aph!M_UUwy9j5; z7Tlm|xqW>c3wLXrVh&E!i^T@UCvLBUC0gaEEC8D720;x!hP@#mN@$|o%%bZPAYtv` z4;xZFDK=N*VspOu0G>`py^yEm8e5(DREI7zFmD*NQp%^|j&XLGn=>BH_`0nf9dKe> zXz(#X&+m~JlmSbe6xVHEXtJ;mnA6a462rFZsL*Mx3zWckj~qQ6iwy0>Hc`tnD|X0; zb^8`Zt&5hO*x4I7*i3sbeu@FCuxbUatSg$IB2cN>LE7?JUOO?3#u+NE7 zk9bAAC`F0&_3p3i7QY1cKeDwgtuLW!E4GQ7Y{TfeOah(Mo;j1c{8;GnJA(o>bOk{S zY>K`rG4+4oveph&s?}9psXi?OifFZEo4aF6Zt7q)`Y?v4PPDp|I%~D%k{f#2!cyG% zwQwgU(3EMmYFV_B`kna)i5cy5WFP(pvuFBej|tzIb_%w>jb~MEQRl}xFSA|rcroDl zhP??e2f2R7v>YiIQz62FlqZmXwmN8oVp+;%7@6; zBW5$f&cIUiM|kZKt}X}x?R?h8&%e6Xw+I@U?jpWJV|$s~Fw(YwAhG2pj$q;q-i9=Pk0Sa3A#7%NU+`XgP zPdcM~;BYVT4VDIsgPN}$8Ibb(nSl4nwvo=4|BYYC*PmgjMu-gi!$P%LUtK~(6IZH_ zG&dte6bLcOM-Nx$=YgZ0S|ct6skontr|D1@_an+p3mN*(B0?nDDsctKJRCMjfpU#D zC(Yl7(ji@t<|(ACcz6P2OljqPa1W_O0QROK z82wHbe#y+sY!iCv<6L-&#aR~p=G}xG&++^F3_&ToW3<_n1K*S*oDonhPj&kO6CV|ToYL>utq%Vh%M$5?n!ar=>uRe-* z^}8q*&YRyqU#l8tgD3p4PLa8ZE26)qM=mq$w8{cr)+!?eM(HFulx~bLn3fCme4LN{ zbyI`FS>>U&^(xt#(Y9u;r4(dHW&mIOnEhKBfFo=Y8W?i~9W)TdK%m#XXaEuo*8t~J zzYMji#pUu}qB9~9mhifN1rdm~seZ6B(|O|+nAmlL zxlJ2^m1ub@etUa9MHqXMguTatn~;v{%41b8FRj+&T4tCsXvn~_7pbNN;_6PQuNly$ z!zPPVj#roKDuY26IWK9<3Im+!O#Tj68|N@!9}{c)2bSF*f6bLV?r5X3q{`(qD3_O- za<$HSpZGY*46n zs0u|hp(+$C4+Znnsk^Jo$a&QwB|9PI=g5N8U$MWO+-O(Ik&0$(hiZuf-iPCP3Bq zS@e^X=8VhwHF`Qk?ca_lGqMlUa>Wj=E?YVj^`x2&Eor_D)L>YQTK)d3q-cd_|A?zY z$ieLhY0m}W(q+!`Qx+8#Mua45oUCW9X4OWH9-!mzakb@$5t!z)zJ+ll<;*PXC#cYR zBmn4y=HI$V5ZW!L<+gIMSO>64JVY_DvD^mK0ng#h@-e_pl3%x5ehe$0_`k4z2{q>k z2wrYZbKaL!V=(0l-1L_y)`A;tlzh2}^^v=VJiWE5+jy4dmNhZUkiN?i_gI)cHjFhJ z9r9U~#qYBC5sQJ%-x+mfzk9myPZ$@`0$TR8;{lZfP9R2Y{gCgUK!F6ATX|B8^c^(% zlJB(kU9SBZ8rj(c^KuXjcM%W|e93_u2xySDePWP;XVSLd64JH@fUENxKC?st9X9FOUbIj^zq`j;905FaAGvxaYj2DKxufTbwKB7Ez?jk8 z+)EmpECZXZY)6knV@PE3f?0IUeag%&@uNP?g0|ZLd3}a8gF0z%!~A@`6341h8p&K@ zH;iOzHLBH{4gCS%xWR%ZxX0>FvDu$!9oRO5PW}tSB047v>C)ch=_v5YgeciBjp%U0 z`Xrkw+chGjs~gwACxYMH0l|0%ZYPUvkXm0vvV1GL$?Tn<@yc-HbIhZS>zrea(v$J! z2x>3dDoVs?M%H0f2d%mNxTG)SLj!Q?iAt{HY0^j-U7nbyXiHr>Y9RW5ikK+sqoUXCpqSpLRvO|U9T2QPud1etS2}HT2PF#-u zip7@38DMC1ioDYcC;)}I=uDr!X3gl$H>{a0xyai0P#)_1XWR|Gm7P0`A(ig}Sx#VY zJj7#b2Rnza-SlxktT8(bAJ4f4c00|t-~EK&ptf1IZlYWpzsKMZ=eNaq+@gm{{vRo( z!P>7;OkIDKe1pX9lXP;M!utQLk$QqF9Z2>&92GEBHMD5 z)4a>V)Q+JB-%CF{w@q*FJLX06&Lwkx6c(aK=2&4S#y=1k_!mxf9vK)|&Vm3Ou^C4m z=77DIpJs;B$j%PZLbsXO%p0s3k*DD_c0Nm68r7KKv#TsVW^tPZH6?YI5PcfW0G#8g ze3-wJ9nWVTYfC?jT{B8zv}?f45C&y_ekW^N?E5jB6o?Q9j^>EcGa*A1=Cgx8RB&d= zEK+CXwHP7p=Fn&an#Y2P!oU4Gsl@jbNhEI`;2Alu?1rVJ3qd-mw$ zXyA_rz&aiRdUFbi-Z5ljMp+8i3G85M6bF93bJTV*JLWJm@N2#^hQDmSG0ca44}asJ S$18YvlVfq7Inn**g!mt^UWMNP literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/QNetwork.cpython-38.pyc b/DQNAgent/__pycache__/QNetwork.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..997810cd0f3e2b797c9677b48fef882d114422da GIT binary patch literal 3074 zcmZ`*&2Jn@6|d^A=^2m5df5SWvC5dhgZi>Q}$_QT6HU zY>lD)aqHKzKf~DHiMifr_8H!Mk25Bi;0X)Lkq4IH?ZClkC2r3PJWg|VQtA1DZ)l!W zgQ{?Z`#B40!V?vYGr|{DjCD~HGZ<%IGEo<^pEEIgY6T5h9nXozga=L8?9K=C=of+o z*^u*c0rQJ;Nz9${ZT6eL1k18HzBOUNieSO2Xa;L??F*hS$;I()*fB2_1nW3rabg8` zp!uG>GvRw~a5q>F)`Itg_o25TmddB&$UAcV8n-raIg_h${S&M$?*=RKo>-PErxv95 z#4UN3sG~oDUXjaUb%lM&#F||FlKsIs<)kAQyZ6W6nlQtG?tU9KF9aW8=7aHrQvL-$ z`q9yE1>a?@+h>IPPK#@ITc)`L;q@{hle|q6*Be~HUf|6gbb{@&ulUYaOu68$x_>=< z@p$jkT&jHU=h2CZ_O`S3peNHJ-+TV@r+eG4cAj^@?Y-KO#hXmM-t3=f|AO&pbqgKi zs)ByX1o}Pw%b0KQqhU_vi+`X7lcM$M1{QCm-6tjSpo4{Lcn-~8%f)k3^6sL2_R$U z5{QCz`G}49DFqc9tGXRfJ|& zq;Y{*6LgzUx@bgSnz8_`oX}2f$CLv^+hZ?ITi; zqax1IFz-igsq0bu8aKB;h!YVWg=rbBx_U)6ib!>2aXGa-=nI5QI4y43+R0^dpjPlp zwMyL$hx%a{r*RR6w=kKTy2w2|H9W)li_LFdXia&T5TH;pC~Siu4;KdI5$oDrN7(P| zk{8Z9s{lG(x9b6Qa2>1Y5j*`isW$7AWQlg}1;lbk^9!kiQEYDlMBA@Bnt~?r`0>n|G$b16~ z(v5P`;G=n}EA6b;j}xq;?$SrDDM>ewQ)`%_>AZoCafdenS>xZH+u+01OW0s5{WmGdZ08M#i1PB3HMKgu>ul0!+xHo2I@8*#o@ixjJ0G zH1B^#%{RaQ9ko9{or-yiTEftH`w0jtEhf^f*G#P&4iVpVy6yyiEWDzEvSY!=tm}`f zBMyJth>+R{p1;U~8B>I`r=nD3J@_1>_S%P8+%Em0KE$6C6&FRpS#G(~S5n07LVHMp zUi7x*l-s6yq${$Y$4Qn3{!w@wB?Fn?fJ57es1{Vu!@a?qR*l#Buzm>(6u$ZT<==)& zU;pFj*#=bG6z3O>450(UAfZ6eSy1lPP!4vDj3+OeUHcd?_NVbvkLXJZrxJY|5Ni7%E?Tu~IC^C*f^X!TNC{m_^$h(D#;`AXt$jKx|WyY#o4sTd>e%l&0EDRNMy-=RrKqCdSKp_OO4PMjFqP08 ztdx7pcuhd~+^_gm->&(JdF+UZ5FrM6{y ze04zJGD24*?IIOBlCsHI4_>15m`j1K7qo-03=H+0fG_}4RltE$$i8|;!l%UZ&;YO9 zzKYZ0Jc088MGme!E|aD^!XM#xCu`kk8k<5@8Gc-AJ)pl a3(@4GB0J!g_>y&>I2$=`t>V^H=l=z7R>5`v literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/integrationmodule.cpython-38.pyc b/DQNAgent/__pycache__/integrationmodule.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4204a1ae90dc38206665afa31b6e9a446e4fb669 GIT binary patch literal 675 zcmZWny-ve05Vn)F4H4x>^Z{72Ffby7T8e}i=u&lwB1LX&f~CfZYzKi9CXjduUVvB1 z%3B}?&Ti9yIP3nt^SLbFT~%?ptiv|6!~@~Juehnf<~w2ZKHL8eBu}d9=6Ke()j56V(taNnP^_LWtHqd@srv(nSHCkl9ZF&PVyLViW)yC@tq+(Mf&a!68y8b literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/learningmodule.cpython-38.pyc b/DQNAgent/__pycache__/learningmodule.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ab4234b5ac4c94cc968fde4da1c61210200c973 GIT binary patch literal 13058 zcmb_iYiu0Xb)NUm&MtRJiZ4;JL|eAywPjjz5_;c}a5?AnHO4EcU z^qS@;cilJeY}CxUwEk?oi--?32tN3pA<($R*c-x{V9A{kGs$qx+{HM5``CdEo zc=u^}SRRp2$$5En+r~KN(K_ByAL`mIeeEyhXXGQ*&u;1Bz*P&m&)|*Ei32oZls<#G z9~5(#`y56~PY;TR@I=6qJZ>c1wRlV{~?;M3>3GspYC`Wli*23S%+QXW}`Z1_MpF~#3;Y9jwj0_0H;3j zi`$F`7b>kn&d72ls5BezDk)FbN6 z!BZx3BAZmUl?tM^Pu! zm_j>aQ3|DiGu`d1l|dWhQHBJnm2Hi*yjHH2+cCO-2=@qmOK;^H6X71TG7+UaqcbQs z+!JMR&!A*d7($P1Va#aTCfZI3%Lep_^k^h9P(Fnmd$9irZCXQ~vsatZCbVZYQ+rX1 zvaR&m^|dQe3X7jwP++QdhsfE2!4HZUe#ZboMh-X5C``v5mbwvyrAArC*-|;gx?^oi zcrsTu>aoM=Aa>7Gg0hmKjP*ur2Ukj~s8ExQc&xb;NOi3=@S@RJjjdJH6tTT2RWpcf zRcfqA++-lb*uj%%*5lNAqq5vo)JLhlT9dI;QY-b+_1FqRxf+|5Mi`H&(luEt*4Jxc zrC4bQiPCbdf>Km0jK-#HT#H9)vZNZ7#!69@Fu=UPP@C(Gu(*u*G*z6Et3jpKY{afy ztppJFAkH+_>&0ZYLF{4rp)3ZKP0a2BRp(lmCgsPLXf|kY%$wM2$m@`PE^(Cnprk~s z7yZ$sjZ%P7hH^!!+nV|u*!=TnPanU05gZCGzg5~$rORiU<@Gw$Ft~i`wbw77xv+3* z1y7f|Q-C(E*W~=_M(nK>yP0`36g&;0>7%-5WQ|chXV5>ZySjz{xSlnpj0xm>pe~+0 z{o!A@rfaBY@m87Ez59bULS#DHN0LC=7wKC@3-EnWyRDPeaJvI5nCb}X#F?&W5R=kk ztx+t|xOt8Vc?Q+g^B^?sfP2*Wxs2zbAIT-_zJ&yvD`Vk~Kzg7xYNX$8Jh~#oBCT0> z1)oECx09EUE>ol3AEkR!dW=35`y0)G3HsDSIgDYP9~jCD_cxTTp7+*?`Z z-+!D~K_~eA*NK&1oA};q|Fn7I#LAEVWc4NQ$Nza^<$rGf_us#@bp8a^l(2(3PK5Y? zR%E@YHe!3FRIis*f$}mPbXKfa8u3W?LC7-T5{W>ZEiSEBYNE)FN^J8Df=bIe(EhNX z#%>WntP&QBB~%M&BiD2X|148VIr`>U7v;=%YwwPfX3;{VSD`}iMZnj#9>KQ&rx-P< zmr;yCP2#5BCPLP@J4({2I8$k?t^+v|bRLi?lB75fT#WJd%4^8%sFPEgGrOW(kNO1H!y%lIz^fLymP5v*_i(3mMJgW@rYMlC^cXlj&p@qEe4N(yqiYxUH-AQ0vY~s_N7#4eZZ=@BzSeNtEh%&#E

Ys?j#L z3FKN=b@ezuL*J{;Ho^nAr`j%Qddt9D>BtZqs09o*7j7GgulNi6*mW#;ikCPA_l<@3 z0d$51hZmP7cYE?;5DTitfPu;a5X80r&#cQS7x(s$Q!9-DnIsqR* zKvUu6fNo}c%Y_Ot;N8${ zTM@zvsD&T_*kL0FhLG6;7A5Q;N4w>sBppF*+7{~TEpp?Dd1<7*$c;Rz8(DC{kze&7 z9R!<6jc^9CHjZ3`3#!?xmik@<)Xh&L&ufNO%&>{I7ZfRL-$}oXn2cO)mahbH>Y7wb zO@wK3ig>OL5+OO`1yReCBK!_5f_YB#phC}fNvYCCY%W(qULr*>RV{$PCX?Zey`@rE zzQR7QpN8eF@Uz-YU}GnQb1p+a)A4h;eK*N0F7I4QIL51pCvBmpfMkdKp>d!E3+p)t zFExw*6uj5Xd~a=&H-?s%whW3J@$x=|#IS_7VTG%-kXV+k{Hijl!FEum34i7&#TJ4% z1L5XS%ytJPZaDk_euN_m8 zTHuJzPvNvweg;*&Glx8y{m{?E(@9*x6i5$!IE*`V;13%nJf&Vop(3qQq)}xueBIvx zBzlJMyXY)I7NH)Qh;JEcDl)4^!v%%VLlJ;3BDmR|v<*`Uj6o_^Od%czj3M?i*g;ed z2P@b~Zs<@k#O{Pg%0*E*VYM>2XGNwp0tHJ)UX+Ql(TK?2v|-sY7(u$4#Wj+YY&oqQ z)eUn|4z))pcpc{3qg1O(@xOeSuZ~4I?A`EFZyK%9DBm6zc`*X@AB#p?`9IdTNBB#VQUh{hH3U}C&d)*Q)m~FJhW?yY0MU5Kx$^M#&O2!{yd_Ywv7nlUfeC* zXZ!E8_JMbJkGO}Uxl7!J(eV0hwf67mWk0t*G;?_AGnn%OnDY$g`~c0F&B?xn1h-?Q zc3VIc&e#9gJ^Rz)h(pdSZ zVjubM#Q8h4l2qvjxH@qaN8hh+7KZVD554t0{VtwUS-d(Fe5MhT=J4$Q6^Tz8e1^@} zs_ClT2`W3G*9j_r7yF9V&S05c_*UsC9h?kP)odF}EX+<&S>av1okJZDLFH6C4`o2O zWFV-F;59@RN^lwCkFatY!jC$Uw`_F6!C8cZQwT>p5q6ILn;Z`A)Eo*4XIk#s`L&ag zldKo5(%B^HJD$Y2Mv=2?per$<1bvdF+DEDVM0hJAyO8WhbCi0J$Ri-wsvf2EVIu6j z6)5#Mktc{eNo1Y~+h9gRjF1={eTE9|5d`8e=3k*OppLs?(D5As&~fu+!!%>dl%qw;t}Y2ItkXfrhX}<1!dCV}oE!<K<+tYx)fSxbz8^zUTty13TTfYR_t)gv2 zX?C@P=V<^v6Bt`?d1c}y_HE!%s>6m*Q7RxZtlw-BDYPDt5@@=iPwBb;b*vdSil9c; z9M@;e8Jq=W^>M>8EZsHkvR*wTkO1sRA2=t``J4ntvHf$B0?tWz(?5Yeu{OM+Gh2s8 z7w5YtHi^mwb2L&i8p+wn=FyMUo*fOSSMe_Q4TrJ^RyhUJ-OnHh%iDfoGO-a6W$>c2 z4Dc>QaAd&Tf0Z7`Kw_(2f$ z&7!1m;FF-83)IlFL`Yox{7@vmxaN=Th=wC;d}k0EdAY%}vEmv9Sou`NcazvMs-!y$ zfuwnf$Oh_-4vd}yYwBw>mK#KVn+R{$rzpiNW9A*9$KxPzcCg!>@<~(sb6mmaK_~>X zA9Bc%H^Z~NeV$4FApYjKIfZoA%)#T&;@LA?2md+OIRM#(AB5b8Al;*=y*a&$+$`Kn zDz2ep^#PIVM3~uqIpq{PUbNnShKoZluFmU!moH_Y!bz(1Fi6jrg6oOzBXLObkC&TqVFy4IK z=XUA22bS+TDX{P!4`Cb$Y!{t)7k!`8-+_(9mtIAcPFyEWT`KZ-A#rLWN$e1W*yiHT z4Givq-3G@s?`flY%;}Dd@Kl#M=P?|%y&a1bI~Wr-{T(CWD$fm!59s*l)W&=Sw`rpg=~(N4M8c|`RQU# zl8?|sqrjf*I@{n=0*t=Pz=&LPeAI(jpc~8rp*!sqalPZKor8p#Z5MUY@YN_h3LX#P zS^`OM*n1$!ZDkoirF*^_K_mEt$ffU_d!G)muSQS{HHUmPK%uqA*Jc?4Sqo?T(ZE7> zrz18>Oz4A?U+#^K?2ba7q;`5JBne9&qr5%{Vf?Yf&d^@e=l?0kb_@HZ&1 zM!ZCXE z-3tpc{Gh3>#;FVE_=5s`2e+d+WfS8_o#Sk9mG|)Z5D-z3)%K^mS=?zc@vAU6)-ylf zE0v+DlmkVY#!faqb!uWxj1DL?s9qv6No1M`NkZ&2m#Ua6zViz=07QO=3J6LhM`H|o zXscGIh>*9TULdlc2z%hmlwx`M2BoS*SPmYfR9_g5(j!a65~Zeyyh$V^@;VVRa;&>x zRp)?rj&uY$6>K2h#i2N=Hz6K)+LGZhSSJX#T^c^CQN;^UF{{EjSOF;3JBtQ!0tAbK3A8<+E-Fbbszyi~RtXDDYNN)8hlNQI5?MGUM;c zzN#5H&-Dm-W$COOsLKKRnt=B(OCE(WtB-c^8nhpHZTHqt-m6R44&SG7yMzM`obK%Y zCfO6zXH=N52aN|HQ6khHnb@gIC4ew$~DI?Ray G?D>BJC0*43 literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/lpmodule.cpython-38.pyc b/DQNAgent/__pycache__/lpmodule.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b24635a763e997a49bf5df80fe886ab562ea04d GIT binary patch literal 562 zcmYjOO-sW-5Zz7M*cKH8JqR9F5F{YI3L<`0uszsIDMAU9HM_ROBpY@&NJG7O^XA!u z9{o$Xdh#y_f-|YoPMAsNWq335roXn)Pt zGO;E&zv=`R+{Qd%=bfO%N0T5*VOLU9RE zd37J@jJ}Z>nNd;QChrce(#gQRkoXNAeIp3yb zUNwwOKGxMWA#~}lRMsxIxbeX)RGgWCQ+iRZikW7CS%ocwm6K*&@DlQd;{{P0aJ0Wa z+r2+}rUBN`V}7FeXfKauDL|`755N3swD-;a!wJkrg;bF&Oq^$FE@p`w7AIu~&pBS- ztA)s-Al3#A?a=PpK2=?qpY9wyk98mZyszW5NMs* zg(tivz0w=Y+QXUpPztv2&+9zEIVLj9CoW_InUiTCnphMQDmKn-EIY`D&i-W|b{$xk zHVmodwmHf~pfheCx5nvgeT-arh1B)qUUFjsS zrfj6rfR7%8%ER5Ng!O=BnP`PYqwpTg+s(5V_x4!pF+YBi=Lz6`Z?9}TO1LpHE8RVr zX<0U+JT2lxV%b(9swAsL2amoPr@-Y{$ta&>G2Z)aFl$7KSU%mREgI4;?f&BhJzISU z)>n!c4;R2&k^3zu*W?1RbLbrB)HqYGCK!lA-9@smbPp&6ec}LtQ1Vmgbx0}@*hB*7 z*m+vebkuE#T{E?X+FcZzATBYzfPi?C=CM$(!;EeH7-w9ayn?xU13lgZG4R*8s&~+O z4XSzjE6t75FJE&{$lA`hZyrfB$-Z&r%o$nNT^0g4 zI#-8D{`2}6th=@jtL|EHTpbLu=_rw0Wy3>>6)4A0p^!UB)ewtTxe&SZ1@t&ApGdN4~&RIo4W2ZcukoA)Uz}N zbXoY~y1|!i?3b`drVn$Kmh^`ka8S}TlQUJ+CaLUXXvN6HoAi0u zy7CrG+_`ESx^U9{_bcK7 iQb$v69JrP>O+T}lN;|tb%ZdN6TR_*cO^o=i@BRW=9dnNW literal 0 HcmV?d00001 diff --git a/DQNAgent/__pycache__/rlmodule.cpython-38.pyc b/DQNAgent/__pycache__/rlmodule.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5299d1f6064d45430686446456c1fe9cc8dd3991 GIT binary patch literal 2324 zcmZWq&2QaA5a0FpbKY0dKpN4OstAO8=z#;3P(=wvRFxnSih8l0v~s+ghf{yNuI-XW zj)c@+`v-tHG{^o4oVmh3uvZRMB@Uc{12b#;y(HbIH=fxY@6P<@x8sMSkxyX!`(jW0 zHX!6rG|mnaj4$9{{s@8-PE(T6kY+|`P_&KI%&gGL?9kS>nL3#px|tVx+P2a`=7+x4 z?Q|_1a)-OmNjSPsxW@<22_HN&LYw=14Q9u}=0iSuLBk0j^9jsM`IN82bDi6JB-nTZ zQINn;>q!>P1(PB!=6O;jMXn~5II7q}7O^PHB%g!VrLtZK`6MX?XQ_x}4%2G<-8Y@O zGZ%RUZkw`B1>29%O^T>2wBLk_I4NOgmPLwsZf%?8~UiqWt>KgW0`k zp3G#L6&&KbwRoz$-f-~@0|cRR2=c8!mSy`0Rs{(uC!ql#o7{R%LzCOw0Z=XOau1$1 z9{{*((gx?(KmUZ|*SoL(_#MYzcVnFH2kN-F&hjQG4Q&V`jRBH2CP<^PK$?~H(r)Mh zJ;ikgv5>$o61ZwDuQL|KShP}&`noU~RYFa8QpQD{SL^^h3aO^zXpt7MhJ}FQ?kgi- zDEnB*qEzl@%2yA7QB0Z23I6+QPtfSaKu>yY$Oyx0_0AmG0sh8J+4zJ%Zw7!^6BFO5^ukVEqkk=L{hAROAKbPrZvI-t9#uVC=xC3sUgGRk?8 zDGQ%EUge^iuStxl<}6ILv6b#n3w=#aIh2f!nKh$L5jj(1z9t)}~;teBqYyu6At zA!Q*iLNcxw9FSriO)kUM9VSD1jUHdUfXR99-N2M1jB|_vwZK941bdz}S_z zsJDi;m}0o4?H>&){_3|I^2&A}hW-*>w+ zCGL3fz^mdLgE)`bC&*<4i6Q?ZC2!W(@IH$jgvhJIiZTaxY*Fe=FQ89gy8+E92} z6%Y%ac4g|P14OhMyKy-E#X4~t7oxOw11t2Q;m2Zu?IQI0-VzR0ZQV!^0CCO+@jnT9dpx_4~EKbKT|-$ zNDcJ&NvnPh-5=V}qOQXARluyME1a72arYC`W%&Kfq|c(8P{V^HkJ8>6#kG{n5Y+%9 zY;P2*Z9U|53{-Pc3(c8zXgp$$t+-=%8Gu)L0|cR7`?CzHfB!kIWfI!`3}VPn!7JEM z(^X3C6>0IrdaH4tTeqS?#>%V7GRba9Lyz4^dFi5?!%8R{J|K&yZ6`tE=+a2+#8QKg kVVp^9m7RB|-R5(oc)1B;3f*x^htxAXbG+% self.epsilon_min: + self.epsilon *= self.epsilon_decay + + def predict(self, state): + state = np.reshape(state, (1, -1)) + return np.argmax(self.model.predict(state)[0]) + +dtype = object +env = gym.make('CartPole-v1') +state_size = env.observation_space.shape[0] +action_size = env.action_space.n +agent = DQNAgent(state_size, action_size) +agent.set_input_shape(env.observation_space) + +# Create an instance of the DQNAgent +dqn_agent = DQNAgent(env.observation_space.shape[0], env.action_space.n) + +# Training the DQN +state = env.reset() +state = np.reshape(state, [-1, 1]) +for time in range(500): + action = dqn_agent.act(state) + next_state, reward, done, _, _ = env.step(action) + reward = reward if not done else -10 + next_state = np.reshape(next_state, [-1, 1]) + dqn_agent.remember(state, action, reward, next_state, done) + state = next_state + if done: + break + if len(dqn_agent.memory) > 32: + dqn_agent.replay(32) + +class QLearningAgent: + def __init__(self, q_table, observation_space, action_space, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1): + self.q_table = q_table + self.num_actions = action_space.n if hasattr(action_space, 'n') else action_space.shape[0] # Use shape[0] for continuous action space + self.num_states = observation_space.shape[0] # Use shape[0] for the number of dimensions + self.learning_rate = learning_rate + self.discount_factor = discount_factor + self.exploration_rate = exploration_prob + + def __init__(self, q_table, observation_space, action_space, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1): + self.q_table = q_table + self.num_actions = action_space.n if hasattr(action_space, 'n') else action_space.shape[2] + self.num_states = observation_space.shape[0] + + def q_learning(env, learning_rate=0.1, discount_factor=0.9, epsilon=0.9, episodes=1000): + if isinstance(env.action_space, gym.spaces.Discrete): + num_actions = env.action_space.n + else: + num_actions = env.action_space.shape[0] + + Q = np.zeros((env.observation_space.n, num_actions)) # Corrected size of Q-table + + for episode in range(episodes): + state = env.reset() + done = False + while not done: + if np.random.uniform(0, 1) < epsilon: + action = env.action_space.sample() + else: + action = np.argmax(Q[state, :]) + + next_state, reward, done, _ = env.step(action) + + # Update Q-value + Q[state, action] += learning_rate * (reward + discount_factor * np.max(Q[next_state, :]) - Q[state, action]) + + state = next_state + + return Q + + # Main part of the code + env = gym.make('CartPole-v1') + + # Q-learning parameters + learning_rate_q = 0.1 + discount_factor_q = 0.9 + exploration_prob_q = 0.1 + num_episodes_q = 100 + + # Initialize Q-table for Q-learning + q_table = q_learning(env, learning_rate=learning_rate_q, discount_factor=discount_factor_q, epsilon=exploration_prob_q, episodes=num_episodes_q) + + # Create Q-learning agent + q_agent = QLearningAgent(q_table, env.observation_space, env.action_space, learning_rate_q, discount_factor_q, exploration_prob_q) + + # Run Q-learning + num_episodes_q = 100 + run_q_learning(q_agent, env, num_episodes_q) + + # Use Q-learning data to train a supervised learning model + states_q = np.arange(env.observation_space.n) + actions_q = np.argmax(q_agent.q_table, axis=1) + X_q = states_q.reshape(-1, 1) + y_q = actions_q + + # Train supervised learning model + supervised_model = supervised_learning(X_q, y_q) + + def select_action(self, state): + if np.random.rand() < self.exploration_rate: + return np.random.choice(self.num_actions) + else: + return np.argmax(self.q_table[state, :]) + + def update_q_table(self, state, action, reward, next_state): + best_next_action = np.argmax(self.q_table[next_state, :]) + td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action] + td_error = td_target - self.q_table[state, action] + self.q_table[state, action] += self.learning_rate * td_error + + def q_learning(env, learning_rate, discount_factor, epsilon, episodes): + model = Sequential([ + Dense(64, input_shape=(env.observation_space.shape[0],), activation='relu'), + Dense(env.action_space.n, activation='linear') + ]) + model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='mse') + + for episode in range(episodes): + state = env.reset() + state = np.reshape(state, [1, env.observation_space.shape[0]]) + + for time in range(500): # Adjust the maximum time steps as needed + # Choose action using epsilon-greedy policy or exploration strategy + action = epsilon_greedy_policy(model, state, epsilon) + + # Take the chosen action and observe the next state and reward + next_state, reward, done, _ = env.step(action) + next_state = np.reshape(next_state, [1, env.observation_space.shape[0]]) + + # Update Q-values using the Bellman equation and backpropagation + target = reward + discount_factor * np.max(model.predict(next_state)) + target_f = model.predict(state) + target_f[0][action] = target + model.fit(state, target_f, epochs=1, verbose=0) + + state = next_state + + if done: + break + + return model + + def get_num_actions(self, action_space): + if isinstance(action_space, gym.spaces.Discrete): + return action_space.n + else: + return action_space.shape[2] + + Q = np.zeros((env.observation_space.shape[4], env.action_space.shape[2])) + env = gym.make('CartPole-v1') + num_states = agent.num_states() + num_actions = agent.num_actions() + state_size = env.observation_space.shape[4] + action_size = env.action_space.n if hasattr(env.action_space, 'n') else env.action_space.shape[2] + q_table = np.zeros((env.observation_space.shape[4], action_size)) # Initialize q_table + agent = QLearningAgent(q_table, env.observation_space, env.action_space) + num_episodes = 100 + run_q_learning(agent, env, num_episodes) + +class SupervisedLearningModel: + def __init__(self): + self.model = DecisionTreeClassifier() + + def train(self, X_train, y_train): + self.model.fit(X_train, y_train) + + def predict(self, X_test): + return self.model.predict(X_test) + +def supervised_learning(X, y): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + model = SupervisedLearningModel() + model.train(X_train, y_train) + y_pred = model.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + print(f"Accuracy: {accuracy}") + return model + +# Initializing the environment +env = gym.make('CartPole-v1') +state = env.reset() +state = np.reshape(state, (-1, 1)) + +# Running the Q-learning algorithm +Q_table = q_learning(env) + +# Using the Q-table for inference +state = env.reset() +done = False +while not done: + action = np.argmax(Q_table[0, :]) + next_state, reward, done, _, _ = env.step(action) + state = next_state + +Q = np.zeros((env.observation_space.shape[0], env.action_space.n)) + +# Instantiate QLearningAgent +if isinstance(env.observation_space, gym.spaces.Discrete): + q_table = np.zeros((env.observation_space.n, env.action_space.n)) +else: + q_table = np.zeros((env.observation_space.shape[0], env.action_space.n)) # Initialize q_table +agent = QLearningAgent(q_table, env.observation_space, env.action_space) + +# Call the run_q_learning function +num_episodes = 100 +agent.run_q_learning(env, num_episodes) + +# After running Q-learning, we can use the learned Q-table to generate a dataset for supervised learning +states = np.arange(env.observation_space.n) +actions = np.argmax(agent.q_table, axis=1) + +# The states are the inputs and the actions are the outputs +X = states.reshape(-1, 1) +y = actions + +# Train a supervised learning model on the Q-learning data +supervised_model = supervised_learning(X, y) + +# Training the DQN +state = env.reset() +state = np.reshape(state, [-1, 1]) +for time in range(500): + action = dqn_agent.act(state) + next_state, reward, done, _, _ = env.step(action) + reward = reward if not done else -10 + next_state = np.reshape(next_state, [-1, 1]) + dqn_agent.remember(state, action, reward, next_state, done) + state = next_state + if done: + break + if len(dqn_agent.memory) > 32: + dqn_agent.replay(32) + +# Instantiate QLearningAgent +q_table = np.zeros([0]) # Initialize q_table +agent = QLearningAgent(q_table, env.observation_space, env.action_space) + +# Call the run_q_learning function +num_episodes = 100 +run_q_learning(agent, env, num_episodes) + +# Using the Q-learning data to train a supervised learning model +states = np.arange(env.observation_space.n) +actions = np.argmax(agent.q_table, axis=1) +X = states.reshape(-1, 1) +y = actions +supervised_model = supervised_learning(X, y) + +env = gym.make('CartPole-v1') +state = env.reset() +state = np.reshape(state, (1, -1)) # Reshape the state +observation_space = env.observation_space +action_space = env.action_space + +dqn_agent = DQNAgent(state_size, env.action_space.n) # Pass state.shape[1] as state_size +agent.set_input_shape(env.observation_space) + +# Training the DQN +state = env.reset() +state = np.reshape(state, [-1, 1]) +for time in range(500): + action = agent.act(state) + next_state, reward, done, _, _ = env.step(action) + reward = reward if not done else -10 + next_state = np.reshape(next_state, [-1, 1]) + agent.remember(state, action, reward, next_state, done) + state = next_state + if done: + break + if len(agent.memory) > 32: + agent.replay(32) + +q_table = np.zeros((observation_space.n, action_space.n)) # Initialize q_table +agent = QLearningAgent(q_table, observation_space, action_space) # Instantiate QLearningAgent +num_episodes = 100 +run_q_learning(agent, env, num_episodes) # Call the run_q_learning function + +# After running Q-learning, we can use the learned Q-table to generate a dataset for supervised learning +states = np.arange(env.observation_space.n) +actions = np.argmax(agent.q_table, axis=1) + +# The states are the inputs and the actions are the outputs +X = states.reshape(-1, 1) +y = actions + +# Train a supervised learning model on the Q-learning data +supervised_model = supervised_learning(X, y) + +def q_learning(env, learning_rate=0.1, discount_factor=0.9, epsilon=0.9, episodes=1000): + # Initializing Q-table + Q = np.zeros((env.observation_space, env.action_space)) + + # Q-learning algorithm + for episode in range(episodes): + state = env.reset() + done = False + while not done: + # Selecting action using epsilon-greedy strategy + if np.random.uniform(0, 1) < epsilon: + action = env.action_space.sample() + else: + action = np.argmax(Q[state, :]) + + # Taking action and observing next state and reward + next_state, reward, done, _ = env.step(action) + + # Updating Q-value + Q[state, action] += learning_rate * (reward + discount_factor * np.max(Q[next_state, :]) - Q[state, action]) + + state = next_state + + return Q + +# Initializing the environment +env = gym.make('CartPole-v1') +state = env.reset() +state = np.reshape(state, (-1, 1)) +# Running the Q-learning algorithm +Q_table = q_learning(env) + +# Using the Q-table for inference +state = env.reset() +done = False +while not done: + action = np.argmax(Q_table[state, :]) + next_state, reward, done, _ = env.step(action) + state = next_state + +env = gym.make('FrozenLake-v1') +agent = QLearningAgent(q_table, observation_space, action_space) +run_q_learning(agent, env, 100) + +env = gym.make('CartPole-v1') +state_size = env.observation_space.shape[0] +action_size = env.action_space.n +agent = DQNAgent(state_size, action_size) +agent.set_input_shape(env.observation_space) + +if __name__ == "__main__": + # Example usage for Q-learning + env_q = gym.make('CartPole-v1') + q_table = q_learning(env_q) + q_agent = QLearningAgent(q_table, env_q.observation_space, env_q.action_space) + q_agent.run_q_learning(env_q, 100) + + # Example usage for DQN + env_dqn = gym.make('CartPole-v1') + state_size_dqn = env_dqn.observation_space.shape[0] + action_size_dqn = env_dqn.action_space.n + agent_dqn = DQNAgent(state_size_dqn, action_size_dqn) + agent_dqn.set_input_shape(env_dqn.observation_space) + + state_dqn = env_dqn.reset() + state_dqn = np.reshape(state_dqn, (1, -1)) + for time in range(500): + action_dqn = agent_dqn.act(state_dqn) + next_state_dqn, reward_dqn, done_dqn, _, _ = env_dqn.step(action_dqn) + reward_dqn = reward_dqn if not done_dqn else -10 + next_state_dqn = np.reshape(next_state_dqn, (1, -1)) + agent_dqn.remember(state_dqn, action_dqn, reward_dqn, next_state_dqn, done_dqn) + state_dqn = next_state_dqn + if done_dqn: + break + if len(agent_dqn.memory) > 32: + agent_dqn.replay(32) + + # Example usage for Q-learning with Supervised Learning + env_q_sl = gym.make('CartPole-v1') + q_table_sl = q_learning(env_q_sl) + q_agent_sl = QLearningAgent(q_table_sl, env_q_sl.observation_space, env_q_sl.action_space) + q_agent_sl.run_q_learning(env_q_sl, 100) + + # Use Q-learning data to train a supervised learning model + states_q_sl = np.arange(env_q_sl.observation_space.n) + actions_q_sl = np.argmax(q_agent_sl.q_table, axis=1) + X_q_sl = states_q_sl.reshape(-1, 1) + y_q_sl = actions_q_sl + + # Train supervised learning model + supervised_model_sl = supervised_learning(X_q_sl, y_q_sl) + +print(f"State: {state}, Action: {action}, Next State: {next_state}") \ No newline at end of file diff --git a/DQNAgent/lpmodule.py b/DQNAgent/lpmodule.py new file mode 100644 index 0000000..6478291 --- /dev/null +++ b/DQNAgent/lpmodule.py @@ -0,0 +1,13 @@ +import random + +def lpmodule(): + pass + +def simple_chatbot(user_input): + responses = { + "How are you?": "I'm good, thank you!", + "What's your name?": "I'm a simple chatbot.", + "Default": "I'm not sure how to respond to that." + } + + return responses.get(user_input, responses["Default"]) \ No newline at end of file diff --git a/DQNAgent/perceptionmodule.py b/DQNAgent/perceptionmodule.py new file mode 100644 index 0000000..ea952df --- /dev/null +++ b/DQNAgent/perceptionmodule.py @@ -0,0 +1,85 @@ +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import Sequential +from sklearn.feature_extraction.text import CountVectorizer + +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + +def text_processing(corpus): + # Tokenize the text data + tokenizer = tf.keras.preprocessing.text.Tokenizer() + tokenizer.fit_on_texts(corpus) + + # Convert text to sequences of integers + sequences = tokenizer.texts_to_sequences(corpus) + + # Pad sequences to have consistent length + padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences) + + # Print the processed data + print("Processed Text Data:") + print(padded_sequences) + +# Sample text data +corpus = ["This is a simple text.", "Text processing example.", "Natural Language Processing is interesting."] + +# Call the text_processing function +text_processing(corpus) + +def image_recognition(): + # Build a simple CNN model for image recognition + model = tf.keras.models.Sequential([ + tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)), + tf.keras.layers.MaxPooling2D((2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10, activation='softmax') + ]) + + # Compile the model + model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) + + # Your image recognition logic goes here + pass + +def perception_module(input_shape): + model = tf.keras.models.Sequential([ + layers.Flatten(input_shape=input_shape), # Flatten the input + layers.Dense(64, activation='relu'), # Dense layer with ReLU activation + layers.Dense(32, activation='relu'), # Additional Dense layer + layers.Dense(1, activation='sigmoid') # Output layer with Sigmoid activation for binary classification + ]) + + # Compile the model + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + + return model + +# Example usage: +input_shape = (64, 64, 3) # Adjust the input shape based on your data +perception_model = perception_module(input_shape) + +# Display the model summary +perception_model.summary() + +# Example usage: +input_shape = (64, 64, 3) # Adjust the input shape based on your data +perception_model = perception_module(input_shape) + +# Display the model summary +perception_model.summary() +# Sample text data +corpus = ["This is a simple text.", "Text processing example.", "Natural Language Processing is interesting."] + +# Create a bag-of-words model using CountVectorizer +vectorizer = CountVectorizer() +X = vectorizer.fit_transform(corpus) + +# Call the respective functions based on your workflow +text_processing(corpus) +image_recognition() +perception_module(input_shape) \ No newline at end of file diff --git a/DQNAgent/reasoningmodule.py b/DQNAgent/reasoningmodule.py new file mode 100644 index 0000000..5521dac --- /dev/null +++ b/DQNAgent/reasoningmodule.py @@ -0,0 +1,13 @@ +def reasoningmodule(): + pass + +def decision_making(): + pass + +def rule_based_reasoning(input_data): + if "condition1" in input_data: + return "Result A" + elif "condition2" in input_data: + return "Result B" + else: + return "Default Result" \ No newline at end of file diff --git a/DQNAgent/rlmodule.py b/DQNAgent/rlmodule.py new file mode 100644 index 0000000..42618b4 --- /dev/null +++ b/DQNAgent/rlmodule.py @@ -0,0 +1,66 @@ +from perceptionmodule import image_recognition, text_processing +from learningmodule import supervised_learning, QLearningAgent +from reasoningmodule import rule_based_reasoning, decision_making +from lpmodule import simple_chatbot +import numpy as np + +def rlmodule(): + pass + +class QLearningAgent: + def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1): + self.num_actions = num_actions + self.learning_rate = learning_rate + self.discount_factor = discount_factor + self.exploration_prob = exploration_prob + self.q_table = np.zeros((num_actions,)) + + def select_action(self, state): + if np.random.rand() < self.exploration_prob: + return np.random.randint(self.num_actions) # Exploration + else: + return np.argmax(self.q_table) # Exploitation + + def update_q_table(self, state, action, reward, next_state): + best_next_action = np.argmax(self.q_table) + td_error = reward + self.discount_factor * self.q_table[best_next_action] - self.q_table[action] + self.q_table[action] += self.learning_rate * td_error + + +def cagi_agent(states): + # Placeholder function, replace with actual state representation logic + return states[0] + +# RL Agent +rl_agent = QLearningAgent(num_actions=3) # Assuming 3 possible actions + +def execute_action_and_get_reward(action): + # Placeholder function, replace with actual action execution and reward logic + return 1.0 # Placeholder reward + +def integrate_modules(image_data, text_data, user_input): + perception_output = image_recognition(image_data) + learning_output = supervised_learning(text_data) + reasoning_output = rule_based_reasoning(user_input) + language_output = simple_chatbot(user_input) + + # Combine or use the outputs as needed + final_output = { + "perception": perception_output, + "learning": learning_output, + "reasoning": reasoning_output, + "language": language_output + } + + return final_output + + # RL Module + current_state = cagi_agent(environment_states) + rl_action = rl_agent.select_action(current_state) + rl_reward = execute_action_and_get_reward(rl_action) + next_state = cagi_agent(environment_states) + rl_agent.update_q_table(current_state, rl_action, rl_reward, next_state) + + final_output["rl_learning"] = {"action": rl_action, "reward": rl_reward} + + return final_output \ No newline at end of file