tests

Limmen · Feb 4, 2024 · 04c6f11 · 04c6f11
1 parent d4fed0b
commit 04c6f11
Show file tree

Hide file tree

Showing 7 changed files with 185 additions and 28 deletions.
diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py
@@ -32,24 +32,24 @@
         decoy_state=True, decoy_optimization=False, cache_visited_states=True, save_trace=False)
     simulation_env_config.simulation_env_input_config = CSLECyborgWrapperConfig(
         gym_env_name="csle-cyborg-scenario-two-wrapper-v1", maximum_steps=100, save_trace=False, scenario=2,
-        reward_shaping=True)
+        reward_shaping=False)
     simulation_env_config.gym_env_name = "csle-cyborg-scenario-two-wrapper-v1"
     csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config)
     A = csle_cyborg_env.get_action_space()
     initial_particles = csle_cyborg_env.initial_particles
-    # rollout_policy = MetastoreFacade.get_ppo_policy(id=58)
+    rollout_policy = MetastoreFacade.get_ppo_policy(id=1)
     # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
     #                             "ppo_test_1706439955.8221297/ppo_model2900_1706522984.6982665.zip")
     # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
     #                             "ppo_test_1706439955.8221297/ppo_model50_1706441287.1284034.zip")
     # ppo_model50_1706441287.1284034.zip
     # rollout_policy.load()
-    rollout_policy = None
+    # rollout_policy = None
     value_function = lambda x: 0
     # value_function = rollout_policy.value
     experiment_config = ExperimentConfig(
         output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test",
-        random_seeds=[555512, 98912, 999, 555],
+        random_seeds=[44145, 98912, 999, 555],
         agent_type=AgentType.POMCP,
         log_every=1,
         hparams={
@@ -65,7 +65,7 @@
                 value=value_function, name=agents_constants.POMCP.VALUE_FUNCTION,
                 descr="the value function to use for truncated rollouts"),
             agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
-            agents_constants.POMCP.GAMMA: HParam(value=1, name=agents_constants.POMCP.GAMMA,
+            agents_constants.POMCP.GAMMA: HParam(value=0.75, name=agents_constants.POMCP.GAMMA,
                                                  descr="the discount factor"),
             agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION,
                                                           descr="whether reinvigoration should be used"),
@@ -75,27 +75,32 @@
             agents_constants.POMCP.INITIAL_PARTICLES: HParam(value=initial_particles,
                                                              name=agents_constants.POMCP.INITIAL_PARTICLES,
                                                              descr="the initial belief"),
-            agents_constants.POMCP.PLANNING_TIME: HParam(value=10, name=agents_constants.POMCP.PLANNING_TIME,
+            agents_constants.POMCP.PLANNING_TIME: HParam(value=30, name=agents_constants.POMCP.PLANNING_TIME,
                                                          descr="the planning time"),
+            agents_constants.POMCP.PRUNE_ACTION_SPACE: HParam(
+                value=False, name=agents_constants.POMCP.PRUNE_ACTION_SPACE,
+                descr="boolean flag indicating whether the action space should be pruned or not"),
+            agents_constants.POMCP.PRUNE_SIZE: HParam(
+                value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"),
             agents_constants.POMCP.MAX_PARTICLES: HParam(value=5000, name=agents_constants.POMCP.MAX_PARTICLES,
                                                          descr="the maximum number of belief particles"),
             agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam(
-                value=4, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"),
+                value=100, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"),
             agents_constants.POMCP.MAX_ROLLOUT_DEPTH: HParam(value=5, name=agents_constants.POMCP.MAX_ROLLOUT_DEPTH,
                                                              descr="the maximum depth for rollout"),
             agents_constants.POMCP.C: HParam(value=1, name=agents_constants.POMCP.C,
                                              descr="the weighting factor for UCB exploration"),
             agents_constants.POMCP.C2: HParam(value=15000, name=agents_constants.POMCP.C2,
                                               descr="the weighting factor for AlphaGo exploration"),
             agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam(
-                value=False, name=agents_constants.POMCP.USE_ROLLOUT_POLICY,
+                value=True, name=agents_constants.POMCP.USE_ROLLOUT_POLICY,
                 descr="boolean flag indicating whether rollout policy should be used"),
-            agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=5, name=agents_constants.POMCP.PRIOR_WEIGHT,
+            agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=1, name=agents_constants.POMCP.PRIOR_WEIGHT,
                                                         descr="the weight on the prior"),
             agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE,
                                                             descr="the prior confidence"),
             agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE: HParam(
-                value=POMCPAcquisitionFunctionType.UCB, name=agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE,
+                value=POMCPAcquisitionFunctionType.ALPHA_GO, name=agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE,
                 descr="the type of acquisition function"),
             agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
                 value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),

diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line_ucb.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line_ucb.py
@@ -0,0 +1,138 @@
+import csle_common.constants.constants as constants
+from csle_common.dao.training.experiment_config import ExperimentConfig
+from csle_common.metastore.metastore_facade import MetastoreFacade
+from csle_common.dao.training.agent_type import AgentType
+from csle_common.dao.training.hparam import HParam
+from csle_common.dao.training.player_type import PlayerType
+from csle_agents.agents.pomcp.pomcp_agent import POMCPAgent
+from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType
+import csle_agents.constants.constants as agents_constants
+from csle_agents.common.objective_type import ObjectiveType
+from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig
+from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig
+from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper
+from gym_csle_cyborg.dao.red_agent_type import RedAgentType
+from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
+
+if __name__ == '__main__':
+    emulation_name = "csle-level9-040"
+    emulation_env_config = None
+    simulation_name = "csle-cyborg-001"
+    simulation_env_config = SimulationEnvConfig(name="", version="", gym_env_name="", simulation_env_input_config="",
+                                                players_config="", joint_action_space_config="",
+                                                joint_observation_space_config="", time_step_type=None,
+                                                reward_function_config=None, transition_operator_config=None,
+                                                observation_function_config=None,
+                                                initial_state_distribution_config=None, env_parameters_config=None,
+                                                plot_transition_probabilities=False, plot_observation_function=False,
+                                                plot_reward_function=False, descr="", state_space_config=None)
+    eval_env_config = CSLECyborgConfig(
+        gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
+        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
+        decoy_state=True, decoy_optimization=False, cache_visited_states=True, save_trace=False)
+    simulation_env_config.simulation_env_input_config = CSLECyborgWrapperConfig(
+        gym_env_name="csle-cyborg-scenario-two-wrapper-v1", maximum_steps=100, save_trace=False, scenario=2,
+        reward_shaping=True)
+    simulation_env_config.gym_env_name = "csle-cyborg-scenario-two-wrapper-v1"
+    csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config)
+    A = csle_cyborg_env.get_action_space()
+    initial_particles = csle_cyborg_env.initial_particles
+    # rollout_policy = MetastoreFacade.get_ppo_policy(id=58)
+    # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
+    #                             "ppo_test_1706439955.8221297/ppo_model2900_1706522984.6982665.zip")
+    # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
+    #                             "ppo_test_1706439955.8221297/ppo_model50_1706441287.1284034.zip")
+    # ppo_model50_1706441287.1284034.zip
+    # rollout_policy.load()
+    rollout_policy = None
+    value_function = lambda x: 0
+    # value_function = rollout_policy.value
+    experiment_config = ExperimentConfig(
+        output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test",
+        random_seeds=[555512, 98912, 999, 555],
+        agent_type=AgentType.POMCP,
+        log_every=1,
+        hparams={
+            agents_constants.POMCP.N: HParam(value=5000, name=agents_constants.POMCP.N,
+                                             descr="the number of episodes"),
+            agents_constants.POMCP.OBJECTIVE_TYPE: HParam(
+                value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE,
+                descr="the type of objective (max or min)"),
+            agents_constants.POMCP.ROLLOUT_POLICY: HParam(
+                value=rollout_policy, name=agents_constants.POMCP.ROLLOUT_POLICY,
+                descr="the policy to use for rollouts"),
+            agents_constants.POMCP.VALUE_FUNCTION: HParam(
+                value=value_function, name=agents_constants.POMCP.VALUE_FUNCTION,
+                descr="the value function to use for truncated rollouts"),
+            agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
+            agents_constants.POMCP.GAMMA: HParam(value=1, name=agents_constants.POMCP.GAMMA,
+                                                 descr="the discount factor"),
+            agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION,
+                                                          descr="whether reinvigoration should be used"),
+            agents_constants.POMCP.REINVIGORATED_PARTICLES_RATIO: HParam(
+                value=0.01, name=agents_constants.POMCP.REINVIGORATED_PARTICLES_RATIO,
+                descr="the ratio of reinvigorated particles in the particle filter"),
+            agents_constants.POMCP.INITIAL_PARTICLES: HParam(value=initial_particles,
+                                                             name=agents_constants.POMCP.INITIAL_PARTICLES,
+                                                             descr="the initial belief"),
+            agents_constants.POMCP.PLANNING_TIME: HParam(value=60, name=agents_constants.POMCP.PLANNING_TIME,
+                                                         descr="the planning time"),
+            agents_constants.POMCP.PRUNE_ACTION_SPACE: HParam(
+                value=False, name=agents_constants.POMCP.PRUNE_ACTION_SPACE,
+                descr="boolean flag indicating whether the action space should be pruned or not"),
+            agents_constants.POMCP.PRUNE_SIZE: HParam(
+                value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"),
+            agents_constants.POMCP.MAX_PARTICLES: HParam(value=5000, name=agents_constants.POMCP.MAX_PARTICLES,
+                                                         descr="the maximum number of belief particles"),
+            agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam(
+                value=100, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"),
+            agents_constants.POMCP.MAX_ROLLOUT_DEPTH: HParam(value=4, name=agents_constants.POMCP.MAX_ROLLOUT_DEPTH,
+                                                             descr="the maximum depth for rollout"),
+            agents_constants.POMCP.C: HParam(value=1, name=agents_constants.POMCP.C,
+                                             descr="the weighting factor for UCB exploration"),
+            agents_constants.POMCP.C2: HParam(value=15000, name=agents_constants.POMCP.C2,
+                                              descr="the weighting factor for AlphaGo exploration"),
+            agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam(
+                value=False, name=agents_constants.POMCP.USE_ROLLOUT_POLICY,
+                descr="boolean flag indicating whether rollout policy should be used"),
+            agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=5, name=agents_constants.POMCP.PRIOR_WEIGHT,
+                                                        descr="the weight on the prior"),
+            agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE,
+                                                            descr="the prior confidence"),
+            agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE: HParam(
+                value=POMCPAcquisitionFunctionType.UCB, name=agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE,
+                descr="the type of acquisition function"),
+            agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
+                value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),
+            agents_constants.POMCP.MAX_NEGATIVE_SAMPLES: HParam(
+                value=20, name=agents_constants.POMCP.MAX_NEGATIVE_SAMPLES,
+                descr="maximum number of negative samples when filling belief particles"),
+            agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam(
+                value=0, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in "
+                                                                               "the search tree"),
+            agents_constants.POMCP.VERBOSE: HParam(value=True, name=agents_constants.POMCP.VERBOSE,
+                                                   descr="verbose logging flag"),
+            agents_constants.POMCP.EVAL_ENV_NAME: HParam(value="csle-cyborg-scenario-two-v1",
+                                                         name=agents_constants.POMCP.EVAL_ENV_NAME,
+                                                         descr="the name of the evaluation environment"),
+            agents_constants.POMCP.EVAL_ENV_CONFIG: HParam(value=eval_env_config,
+                                                           name=agents_constants.POMCP.EVAL_ENV_CONFIG,
+                                                           descr="the configuration of the evaluation environment"),
+            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
+                                                            descr="number of evaluation episodes"),
+            agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
+                value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
+                descr="confidence interval"),
+            agents_constants.COMMON.MAX_ENV_STEPS: HParam(
+                value=100, name=agents_constants.COMMON.MAX_ENV_STEPS,
+                descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
+            agents_constants.COMMON.RUNNING_AVERAGE: HParam(
+                value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
+                descr="the number of samples to include when computing the running avg")
+        },
+        player_type=PlayerType.DEFENDER, player_idx=0
+    )
+    agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
+                       experiment_config=experiment_config, save_to_metastore=False)
+    experiment_execution = agent.train()
+    MetastoreFacade.save_experiment_execution(experiment_execution)
diff --git a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
@@ -99,6 +99,11 @@
             agents_constants.POMCP.PARALLEL_ROLLOUT: HParam(
                 value=False, name=agents_constants.POMCP.PARALLEL_ROLLOUT, descr="boolean flag indicating whether "
                                                                                  "parallel rollout should be used"),
+            agents_constants.POMCP.PRUNE_ACTION_SPACE: HParam(
+                value=False, name=agents_constants.POMCP.PRUNE_ACTION_SPACE,
+                descr="boolean flag indicating whether the action space should be pruned or not"),
+            agents_constants.POMCP.PRUNE_SIZE: HParam(
+                value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"),
             agents_constants.POMCP.NUM_PARALLEL_PROCESSES: HParam(
                 value=50, name=agents_constants.POMCP.NUM_PARALLEL_PROCESSES, descr="number of parallel processes"),
             agents_constants.POMCP.NUM_EVALS_PER_PROCESS: HParam(