diff --git a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab index f22f007f3f..85159c2314 100644 --- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab +++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab @@ -89,7 +89,7 @@ MonoBehaviour: VectorActionDescriptions: [] VectorActionSpaceType: 0 hasUpgradedBrainParametersWithActionSpec: 1 - m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3} + m_Model: {fileID: 11400000, guid: 28ccdfd7cb3d941ce8af0ab89e06130a, type: 3} m_InferenceDevice: 2 m_BehaviorType: 0 m_BehaviorName: Match3VisualObs diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx index 6c008c28c4..7d32504590 100644 Binary files a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx and b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx differ diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn deleted file mode 100644 index 216bb6dd84..0000000000 Binary files a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn and /dev/null differ diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta deleted file mode 100644 index 15fe43af9a..0000000000 --- a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 48d14da88fea74d0693c691c6e3f2e34 -ScriptedImporter: - fileIDToRecycleName: - 11400000: main obj - 11400002: model data - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: - script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3} diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx new file mode 100644 index 0000000000..d8ec297d15 Binary files /dev/null and b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx differ diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta new file mode 100644 index 0000000000..4b2b528221 --- /dev/null +++ b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta @@ -0,0 +1,15 @@ +fileFormatVersion: 2 +guid: 28ccdfd7cb3d941ce8af0ab89e06130a +ScriptedImporter: + fileIDToRecycleName: + 11400000: main obj + 11400002: model data + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: + script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3} + optimizeModel: 1 + forceArbitraryBatchSize: 1 + treatErrorsAsWarnings: 0 + importMode: 1 diff --git a/config/ppo/Match3.yaml b/config/ppo/Match3.yaml index e60a138cfa..88bb5bc288 100644 --- a/config/ppo/Match3.yaml +++ b/config/ppo/Match3.yaml @@ -1,72 +1,48 @@ +default_settings: + trainer_type: ppo + hyperparameters: + batch_size: 16 + buffer_size: 120 + learning_rate: 0.0003 + beta: 0.005 + epsilon: 0.2 + lambd: 0.99 + num_epoch: 3 + learning_rate_schedule: constant + network_settings: + normalize: true + hidden_units: 256 + num_layers: 4 + vis_encode_type: match3 + reward_signals: + extrinsic: + gamma: 0.99 + strength: 1.0 + keep_checkpoints: 5 + max_steps: 5000000 + time_horizon: 128 + summary_freq: 10000 + threaded: true + behaviors: - Match3VectorObs: - trainer_type: ppo - hyperparameters: - batch_size: 64 - buffer_size: 12000 - learning_rate: 0.0003 - beta: 0.001 - epsilon: 0.2 - lambd: 0.99 - num_epoch: 3 - learning_rate_schedule: constant - network_settings: - normalize: true - hidden_units: 128 - num_layers: 2 - vis_encode_type: match3 - reward_signals: - extrinsic: - gamma: 0.99 - strength: 1.0 - keep_checkpoints: 5 - max_steps: 5000000 - time_horizon: 1000 - summary_freq: 10000 - threaded: true - Match3VisualObs: - trainer_type: ppo - hyperparameters: - batch_size: 64 - buffer_size: 12000 - learning_rate: 0.0003 - beta: 0.001 - epsilon: 0.2 - lambd: 0.99 - num_epoch: 3 - learning_rate_schedule: constant - network_settings: - normalize: true - hidden_units: 128 - num_layers: 2 - vis_encode_type: match3 - reward_signals: - extrinsic: - gamma: 0.99 - strength: 1.0 - keep_checkpoints: 5 - max_steps: 5000000 - time_horizon: 1000 - summary_freq: 10000 - threaded: true Match3SimpleHeuristic: # Settings can be very simple since we don't care about actually training the model trainer_type: ppo hyperparameters: - batch_size: 64 - buffer_size: 128 + batch_size: 16 + buffer_size: 120 network_settings: hidden_units: 4 num_layers: 1 max_steps: 5000000 summary_freq: 10000 threaded: true - Match3GreedyHeuristic: + Match3SmartHeuristic: # Settings can be very simple since we don't care about actually training the model trainer_type: ppo hyperparameters: - batch_size: 64 - buffer_size: 128 + batch_size: 16 + buffer_size: 120 network_settings: hidden_units: 4 num_layers: 1 diff --git a/docs/Learning-Environment-Examples.md b/docs/Learning-Environment-Examples.md index 97fb771ddb..5b343b05a4 100644 --- a/docs/Learning-Environment-Examples.md +++ b/docs/Learning-Environment-Examples.md @@ -551,7 +551,7 @@ drop down. New pieces are spawned randomly at the top, with a chance of being - Observations and actions are defined with a sensor and actuator respectively. - Float Properties: None - Benchmark Mean Reward: - - 37.2 for visual observations - - 37.6 for vector observations + - 39.5 for visual observations + - 38.5 for vector observations - 34.2 for simple heuristic (pick a random valid move) - 37.0 for greedy heuristic (pick the highest-scoring valid move)