diff --git a/deepspeed/runtime/zero/config.py b/deepspeed/runtime/zero/config.py
index fe81fceebd33..a48dd4e620b4 100755
--- a/deepspeed/runtime/zero/config.py
+++ b/deepspeed/runtime/zero/config.py
@@ -189,4 +189,4 @@ def _initialize(self, zero_config_dict):
         self.round_robin_gradients = get_scalar_param(
             zero_config_dict,
             ZERO_OPTIMIZATION_ROUND_ROBIN_GRADIENTS,
-            ZERO3_OPTIMIZATION_CONTIGUOUS_GRADIENTS_DEFAULT)
+            ZERO_OPTIMIZATION_ROUND_ROBIN_GRADIENTS_DEFAULT)
diff --git a/deepspeed/runtime/zero/stage2.py b/deepspeed/runtime/zero/stage2.py
index 064d59629d87..faa6443bfef4 100755
--- a/deepspeed/runtime/zero/stage2.py
+++ b/deepspeed/runtime/zero/stage2.py
@@ -106,6 +106,7 @@ def __init__(self,
             logger.info(f"Reduce bucket size {reduce_bucket_size}")
             logger.info(f"Allgather bucket size {allgather_bucket_size}")
             logger.info(f"CPU Offload: {cpu_offload}")
+            logger.info(f'Round robin gradient partitioning: {round_robin_gradients}')
         # The fused optimizer does all the work. We need this layer for two reason:
         # 1. maintain same user API from apex.fp16_utils
         # 2. keep common stuff here in case we need to add ne552w fused optimizer later