diff --git a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
index f22f007f3f..85159c2314 100644
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
@@ -89,7 +89,7 @@ MonoBehaviour:
     VectorActionDescriptions: []
     VectorActionSpaceType: 0
     hasUpgradedBrainParametersWithActionSpec: 1
-  m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
+  m_Model: {fileID: 11400000, guid: 28ccdfd7cb3d941ce8af0ab89e06130a, type: 3}
   m_InferenceDevice: 2
   m_BehaviorType: 0
   m_BehaviorName: Match3VisualObs
diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx
index 6c008c28c4..7d32504590 100644
Binary files a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx and b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx differ
diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn
deleted file mode 100644
index 216bb6dd84..0000000000
Binary files a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn and /dev/null differ
diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta
deleted file mode 100644
index 15fe43af9a..0000000000
--- a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.nn.meta
+++ /dev/null
@@ -1,11 +0,0 @@
-fileFormatVersion: 2
-guid: 48d14da88fea74d0693c691c6e3f2e34
-ScriptedImporter:
-  fileIDToRecycleName:
-    11400000: main obj
-    11400002: model data
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
-  script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}
diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx
new file mode 100644
index 0000000000..d8ec297d15
Binary files /dev/null and b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx differ
diff --git a/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta
new file mode 100644
index 0000000000..4b2b528221
--- /dev/null
+++ b/Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VisualObs.onnx.meta
@@ -0,0 +1,15 @@
+fileFormatVersion: 2
+guid: 28ccdfd7cb3d941ce8af0ab89e06130a
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: main obj
+    11400002: model data
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
+  optimizeModel: 1
+  forceArbitraryBatchSize: 1
+  treatErrorsAsWarnings: 0
+  importMode: 1
diff --git a/config/ppo/Match3.yaml b/config/ppo/Match3.yaml
index e60a138cfa..88bb5bc288 100644
--- a/config/ppo/Match3.yaml
+++ b/config/ppo/Match3.yaml
@@ -1,72 +1,48 @@
+default_settings:
+  trainer_type: ppo
+  hyperparameters:
+    batch_size: 16
+    buffer_size: 120
+    learning_rate: 0.0003
+    beta: 0.005
+    epsilon: 0.2
+    lambd: 0.99
+    num_epoch: 3
+    learning_rate_schedule: constant
+  network_settings:
+    normalize: true
+    hidden_units: 256
+    num_layers: 4
+    vis_encode_type: match3
+  reward_signals:
+    extrinsic:
+      gamma: 0.99
+      strength: 1.0
+  keep_checkpoints: 5
+  max_steps: 5000000
+  time_horizon: 128
+  summary_freq: 10000
+  threaded: true
+
 behaviors:
-  Match3VectorObs:
-    trainer_type: ppo
-    hyperparameters:
-      batch_size: 64
-      buffer_size: 12000
-      learning_rate: 0.0003
-      beta: 0.001
-      epsilon: 0.2
-      lambd: 0.99
-      num_epoch: 3
-      learning_rate_schedule: constant
-    network_settings:
-      normalize: true
-      hidden_units: 128
-      num_layers: 2
-      vis_encode_type: match3
-    reward_signals:
-      extrinsic:
-        gamma: 0.99
-        strength: 1.0
-    keep_checkpoints: 5
-    max_steps: 5000000
-    time_horizon: 1000
-    summary_freq: 10000
-    threaded: true
-  Match3VisualObs:
-    trainer_type: ppo
-    hyperparameters:
-      batch_size: 64
-      buffer_size: 12000
-      learning_rate: 0.0003
-      beta: 0.001
-      epsilon: 0.2
-      lambd: 0.99
-      num_epoch: 3
-      learning_rate_schedule: constant
-    network_settings:
-      normalize: true
-      hidden_units: 128
-      num_layers: 2
-      vis_encode_type: match3
-    reward_signals:
-      extrinsic:
-        gamma: 0.99
-        strength: 1.0
-    keep_checkpoints: 5
-    max_steps: 5000000
-    time_horizon: 1000
-    summary_freq: 10000
-    threaded: true
   Match3SimpleHeuristic:
     # Settings can be very simple since we don't care about actually training the model
     trainer_type: ppo
     hyperparameters:
-      batch_size: 64
-      buffer_size: 128
+      batch_size: 16
+      buffer_size: 120
     network_settings:
       hidden_units: 4
       num_layers: 1
     max_steps: 5000000
     summary_freq: 10000
     threaded: true
-  Match3GreedyHeuristic:
+  Match3SmartHeuristic:
     # Settings can be very simple since we don't care about actually training the model
     trainer_type: ppo
     hyperparameters:
-      batch_size: 64
-      buffer_size: 128
+      batch_size: 16
+      buffer_size: 120
     network_settings:
       hidden_units: 4
       num_layers: 1
diff --git a/docs/Learning-Environment-Examples.md b/docs/Learning-Environment-Examples.md
index 97fb771ddb..5b343b05a4 100644
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
@@ -551,7 +551,7 @@ drop down. New pieces are spawned randomly at the top, with a chance of being
   - Observations and actions are defined with a sensor and actuator respectively.
 - Float Properties: None
 - Benchmark Mean Reward:
-  - 37.2 for visual observations
-  - 37.6 for vector observations
+  - 39.5 for visual observations
+  - 38.5 for vector observations
   - 34.2 for simple heuristic (pick a random valid move)
   - 37.0 for greedy heuristic (pick the highest-scoring valid move)