From aeb34f2fe62eb5711e325c648f149b2c622a5593 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Wed, 25 Jan 2023 16:28:16 -0800
Subject: [PATCH 1/3] Cap batch_size=auto at 128 for CPU training

---
 ludwig/schema/trainer.py   | 4 +++-
 ludwig/trainers/trainer.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py
index 695cfc23dd9..98c47bd4f2b 100644
--- a/ludwig/schema/trainer.py
+++ b/ludwig/schema/trainer.py
@@ -101,7 +101,9 @@ class ECDTrainerConfig(BaseTrainerConfig):
         allow_none=False,
         description=(
             "The number of training examples utilized in one training step of the model. If ’auto’, the "
-            "biggest batch size (power of 2) that can fit in memory will be used."
+            "batch size that maximized training throughput (samples / sec) will be used. For CPU training, the "
+            "tuned batch size is capped at 128 as throughput benefits of large batch sizes are less noticeable without "
+            "a GPU."
         ),
         parameter_metadata=TRAINER_METADATA["batch_size"],
         field_options=[
diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py
index 9cfc97623b0..e717d0b0685 100644
--- a/ludwig/trainers/trainer.py
+++ b/ludwig/trainers/trainer.py
@@ -69,6 +69,8 @@
     ProgressTracker,
 )
 
+MAX_CPU_BATCH_SIZE = 128
+
 logger = logging.getLogger(__name__)
 
 
@@ -372,11 +374,16 @@ def tune_batch_size(
         self.skip_save_progress = True
         self.skip_save_log = True
 
+        # When training on CPU, larger batch sizes offer limited benefits due to lack of effective
+        # parallelization within a batch. As such, to increase changes of stable training, we cap the maximum
+        # batch size at MAX_CPU_BATCH_SIZE
+        max_batch_size = self.max_batch_size if torch.cuda.is_available() else MAX_CPU_BATCH_SIZE
+
         self.dist_model.train()  # Sets model training mode.
 
         evaluator = self._create_batch_size_evaluator()
         try:
-            return evaluator.select_best_batch_size(len(training_set), self.max_batch_size, max_trials)
+            return evaluator.select_best_batch_size(len(training_set), max_batch_size, max_trials)
         finally:
             # Restore original parameters to defaults
             self.skip_save_model = skip_save_model

From 13aeece594630ff7eac0725549b68449e6d8a157 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Wed, 25 Jan 2023 20:07:04 -0800
Subject: [PATCH 2/3] Account for max_batch_size

---
 ludwig/trainers/trainer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py
index e717d0b0685..dd16a98de34 100644
--- a/ludwig/trainers/trainer.py
+++ b/ludwig/trainers/trainer.py
@@ -377,7 +377,9 @@ def tune_batch_size(
         # When training on CPU, larger batch sizes offer limited benefits due to lack of effective
         # parallelization within a batch. As such, to increase changes of stable training, we cap the maximum
         # batch size at MAX_CPU_BATCH_SIZE
-        max_batch_size = self.max_batch_size if torch.cuda.is_available() else MAX_CPU_BATCH_SIZE
+        max_batch_size = (
+            self.max_batch_size if torch.cuda.is_available() else min(self.max_batch_size, MAX_CPU_BATCH_SIZE)
+        )
 
         self.dist_model.train()  # Sets model training mode.
 

From 05105b8a1eee0654b34783817d86f3ff6350b158 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Thu, 26 Jan 2023 09:39:21 -0800
Subject: [PATCH 3/3] Update ludwig/trainers/trainer.py

Co-authored-by: Joppe Geluykens <joppe@predibase.com>
---
 ludwig/trainers/trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py
index dd16a98de34..c12cea34561 100644
--- a/ludwig/trainers/trainer.py
+++ b/ludwig/trainers/trainer.py
@@ -375,7 +375,7 @@ def tune_batch_size(
         self.skip_save_log = True
 
         # When training on CPU, larger batch sizes offer limited benefits due to lack of effective
-        # parallelization within a batch. As such, to increase changes of stable training, we cap the maximum
+        # parallelization within a batch. As such, to increase chances of stable training, we cap the maximum
         # batch size at MAX_CPU_BATCH_SIZE
         max_batch_size = (
             self.max_batch_size if torch.cuda.is_available() else min(self.max_batch_size, MAX_CPU_BATCH_SIZE)