From 07fcb818cb0d94a37e943b63cf79cc7426576916 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:19:48 -0800
Subject: [PATCH 01/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml               | 559 +++++++++++----------
 .pre-commit-config.yaml                    |  18 +-
 setup.cfg                                  |   8 +-
 tests/conftest.py                          |   3 +
 tests/integration_tests/tests_f_control.py | 210 ++++++++
 5 files changed, 510 insertions(+), 288 deletions(-)
 create mode 100644 tests/integration_tests/tests_f_control.py
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ad487232b46..33ba9b0a67c 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -16,186 +16,188 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  pytest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        test-markers: ["not distributed", "distributed"]
-        include:
-          - python-version: "3.8"
-            pytorch-version: 2.0.0
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
-          - python-version: "3.9"
-            pytorch-version: 2.1.1
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
-          - python-version: "3.10"
-            # pytorch-version: nightly
-            pytorch-version: 2.2.1
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
-    env:
-      PYTORCH: ${{ matrix.pytorch-version }}
-      MARKERS: ${{ matrix.test-markers }}
-      NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
-      NEUROPOD_VERISON: "0.3.0-rc6"
-      TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
-      RAY_VERSION: ${{ matrix.ray-version }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
-      KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
-      KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
-      IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
+  # TODO: <Alex>ALEX</Alex>
+  # pytest:
+  #   runs-on: ${{ matrix.os }}
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       os: [ubuntu-latest]
+  #       python-version: ["3.8", "3.9", "3.10"]
+  #       test-markers: ["not distributed", "distributed"]
+  #       include:
+  #         - python-version: "3.8"
+  #           pytorch-version: 2.0.0
+  #           torchscript-version: 1.10.2
+  #           ray-version: 2.3.1
+  #         - python-version: "3.9"
+  #           pytorch-version: 2.1.1
+  #           torchscript-version: 1.10.2
+  #           ray-version: 2.3.1
+  #         - python-version: "3.10"
+  #           # pytorch-version: nightly
+  #           pytorch-version: 2.2.1
+  #           torchscript-version: 1.10.2
+  #           ray-version: 2.3.1
+  #   env:
+  #     PYTORCH: ${{ matrix.pytorch-version }}
+  #     MARKERS: ${{ matrix.test-markers }}
+  #     NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
+  #     NEUROPOD_VERISON: "0.3.0-rc6"
+  #     TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
+  #     RAY_VERSION: ${{ matrix.ray-version }}
+  #     AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
+  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
+  #     KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+  #     KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+  #     IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
+
+  #   name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
+  #   services:
+  #     minio:
+  #       image: fclairamb/minio-github-actions
+  #       env:
+  #         MINIO_ACCESS_KEY: minio
+  #         MINIO_SECRET_KEY: minio123
+  #       ports:
+  #         - 9000:9000
 
-    name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
-    services:
-      minio:
-        image: fclairamb/minio-github-actions
-        env:
-          MINIO_ACCESS_KEY: minio
-          MINIO_SECRET_KEY: minio123
-        ports:
-          - 9000:9000
+  #   timeout-minutes: 150
+  #   steps:
+  #     - name: Setup ludwigai/ludwig-ray container for local testing with act.
+  #       if: ${{ env.ACT }}
+  #       run: |
+  #         curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
+  #         sudo apt-get install -y nodejs
+  #         sudo mkdir -p /opt/hostedtoolcache/
+  #         sudo chmod 777 -R /opt/hostedtoolcache/
+  #     - uses: actions/checkout@v2
+  #     - name: Set up Python ${{ matrix.python-version }}
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
 
-    timeout-minutes: 150
-    steps:
-      - name: Setup ludwigai/ludwig-ray container for local testing with act.
-        if: ${{ env.ACT }}
-        run: |
-          curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
-          sudo apt-get install -y nodejs
-          sudo mkdir -p /opt/hostedtoolcache/
-          sudo chmod 777 -R /opt/hostedtoolcache/
-      - uses: actions/checkout@v2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
-        with:
-          python-version: ${{ matrix.python-version }}
+  #     - name: Setup Linux
+  #       if: runner.os == 'linux'
+  #       run: |
+  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
 
-      - name: Setup Linux
-        if: runner.os == 'linux'
-        run: |
-          sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
+  #     - name: Setup macOS
+  #       if: runner.os == 'macOS'
+  #       run: |
+  #         brew install libuv
 
-      - name: Setup macOS
-        if: runner.os == 'macOS'
-        run: |
-          brew install libuv
+  #     - name: pip cache
+  #       if: ${{ !env.ACT }}
+  #       uses: actions/cache@v2
+  #       with:
+  #         path: ~/.cache/pip
+  #         key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
 
-      - name: pip cache
-        if: ${{ !env.ACT }}
-        uses: actions/cache@v2
-        with:
-          path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
+  #     - name: Debug out of space
+  #       run: |
+  #         du -h -d 1 ~
+  #         df -h
 
-      - name: Debug out of space
-        run: |
-          du -h -d 1 ~
-          df -h
+  #     - name: Install dependencies
+  #       run: |
+  #         python --version
+  #         pip --version
+  #         python -m pip install -U pip
+  #         cmake --version
 
-      - name: Install dependencies
-        run: |
-          python --version
-          pip --version
-          python -m pip install -U pip
-          cmake --version
+  #         # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+  #         cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+  #         cat requirements_distributed.txt | sed '/^ray[\[]/d'
 
-          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
-          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
-          cat requirements_distributed.txt | sed '/^ray[\[]/d'
+  #         if [ "$MARKERS" != "distributed" ]; then
+  #           # Skip distributed and hyperopt requirements to test optional imports
+  #           echo > requirements-temp && mv requirements-temp requirements_distributed.txt
+  #           echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
 
-          if [ "$MARKERS" != "distributed" ]; then
-            # Skip distributed and hyperopt requirements to test optional imports
-            echo > requirements-temp && mv requirements-temp requirements_distributed.txt
-            echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
-
-            # Skip distributed tree requirement (lightgbm-ray)
-            cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
-          else
-            if [ "$RAY_VERSION" == "nightly" ]; then
-              # NOTE: hardcoded for python 3.10 on Linux
-              echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
-            else
-              echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
-            fi
-          fi
-
-          if [ "$PYTORCH" == "nightly" ]; then
-            extra_index_url=https://download.pytorch.org/whl/nightly/cpu
-            pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
-
-          else
-            extra_index_url=https://download.pytorch.org/whl/cpu
-            pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
-          fi
-
-          pip install '.[test]' --extra-index-url $extra_index_url
-          pip list
+  #           # Skip distributed tree requirement (lightgbm-ray)
+  #           cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
+  #         else
+  #           if [ "$RAY_VERSION" == "nightly" ]; then
+  #             # NOTE: hardcoded for python 3.10 on Linux
+  #             echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
+  #           else
+  #             echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
+  #           fi
+  #         fi
 
-          if [ "$PYTORCH" == "nightly" ]; then
-            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
-          else
-            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
-          fi
-
-          if [ "$MARKERS" == "distributed" ]; then
-            python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
-          else
-            python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
-          fi
-        shell: bash
+  #         if [ "$PYTORCH" == "nightly" ]; then
+  #           extra_index_url=https://download.pytorch.org/whl/nightly/cpu
+  #           pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
 
-      - name: Install Neuropod backend
-        run: |
-          sudo mkdir -p "$NEUROPOD_BASE_DIR"
-          curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
-        shell: bash
+  #         else
+  #           extra_index_url=https://download.pytorch.org/whl/cpu
+  #           pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
+  #         fi
 
-      - name: Unit Tests
-        run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
+  #         pip install '.[test]' --extra-index-url $extra_index_url
+  #         pip list
 
-      - name: Regression Tests
-        run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
-
-      # Skip Horovod and replace with DDP.
-      # https://github.com/ludwig-ai/ludwig/issues/3468
-      # - name: Install Horovod if necessary
-      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-      #   env:
-      #     HOROVOD_WITH_PYTORCH: 1
-      #     HOROVOD_WITHOUT_MPI: 1
-      #     HOROVOD_WITHOUT_TENSORFLOW: 1
-      #     HOROVOD_WITHOUT_MXNET: 1
-      #   run: |
-      #     pip install -r requirements_extra.txt
-      #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
-      #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
-      #       pip uninstall -y horovod
-      #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
-      #     fi
-      #     horovodrun --check-build
-      #   shell: bash
-
-      # Skip Horovod tests and replace with DDP.
-      # https://github.com/ludwig-ai/ludwig/issues/3468
-      # - name: Horovod Tests
-      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-      #   run: |
-      #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
-
-      - name: Upload Unit Test Results
-        if: ${{ always() && !env.ACT }}
-        uses: actions/upload-artifact@v2
-        with:
-          name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
-          path: pytest.xml
+  #         if [ "$PYTORCH" == "nightly" ]; then
+  #           python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
+  #         else
+  #           python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
+  #         fi
+
+  #         if [ "$MARKERS" == "distributed" ]; then
+  #           python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
+  #         else
+  #           python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
+  #         fi
+  #       shell: bash
+
+  #     - name: Install Neuropod backend
+  #       run: |
+  #         sudo mkdir -p "$NEUROPOD_BASE_DIR"
+  #         curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
+  #       shell: bash
+
+  #     - name: Unit Tests
+  #       run: |
+  #         RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
+
+  #     - name: Regression Tests
+  #       run: |
+  #         RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
+
+  #     # Skip Horovod and replace with DDP.
+  #     # https://github.com/ludwig-ai/ludwig/issues/3468
+  #     # - name: Install Horovod if necessary
+  #     #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+  #     #   env:
+  #     #     HOROVOD_WITH_PYTORCH: 1
+  #     #     HOROVOD_WITHOUT_MPI: 1
+  #     #     HOROVOD_WITHOUT_TENSORFLOW: 1
+  #     #     HOROVOD_WITHOUT_MXNET: 1
+  #     #   run: |
+  #     #     pip install -r requirements_extra.txt
+  #     #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
+  #     #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
+  #     #       pip uninstall -y horovod
+  #     #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
+  #     #     fi
+  #     #     horovodrun --check-build
+  #     #   shell: bash
+
+  #     # Skip Horovod tests and replace with DDP.
+  #     # https://github.com/ludwig-ai/ludwig/issues/3468
+  #     # - name: Horovod Tests
+  #     #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+  #     #   run: |
+  #     #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
+
+  #     - name: Upload Unit Test Results
+  #       if: ${{ always() && !env.ACT }}
+  #       uses: actions/upload-artifact@v2
+  #       with:
+  #         name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
+  #         path: pytest.xml
+  # TODO: <Alex>ALEX</Alex>
 
   integration-tests:
     name: ${{ matrix.test-markers }}
@@ -204,12 +206,17 @@ jobs:
       fail-fast: false
       matrix:
         test-markers:
-          - "integration_tests_a"
-          - "integration_tests_b"
-          - "integration_tests_c"
-          - "integration_tests_d"
-          - "integration_tests_e"
-          - "integration_tests_f"
+          # TODO: <Alex>ALEX</Alex>
+          # - "integration_tests_a"
+          # - "integration_tests_b"
+          # - "integration_tests_c"
+          # - "integration_tests_d"
+          # - "integration_tests_e"
+          # - "integration_tests_f"
+          # TODO: <Alex>ALEX</Alex>
+          # TODO: <Alex>ALEX</Alex>
+          - "integration_tests_x"
+          # TODO: <Alex>ALEX</Alex>
 
     env:
       AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
@@ -265,127 +272,129 @@ jobs:
         run: |
           RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
 
-  llm-tests:
-    name: LLM Tests
-    runs-on: ubuntu-latest
+  # TODO: <Alex>ALEX</Alex>
+  # llm-tests:
+  #   name: LLM Tests
+  #   runs-on: ubuntu-latest
 
-    timeout-minutes: 60
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
+  #   timeout-minutes: 60
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Set up Python 3.9
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: 3.9
 
-      - name: Setup Linux
-        if: runner.os == 'linux'
-        run: |
-          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+  #     - name: Setup Linux
+  #       if: runner.os == 'linux'
+  #       run: |
+  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
 
-      - name: Setup macOS
-        if: runner.os == 'macOS'
-        run: |
-          brew install libuv
+  #     - name: Setup macOS
+  #       if: runner.os == 'macOS'
+  #       run: |
+  #         brew install libuv
 
-      - name: Install dependencies
-        run: |
-          python --version
-          pip --version
-          python -m pip install -U pip
+  #     - name: Install dependencies
+  #       run: |
+  #         python --version
+  #         pip --version
+  #         python -m pip install -U pip
 
-          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
-          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
-          cat requirements_distributed.txt | sed '/^ray[\[]/d'
-          pip install torch==2.0.0 torchtext torchvision torchaudio
-          pip install ray==2.3.0
-          pip install '.[test]'
-          pip list
-        shell: bash
+  #         # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+  #         cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+  #         cat requirements_distributed.txt | sed '/^ray[\[]/d'
+  #         pip install torch==2.0.0 torchtext torchvision torchaudio
+  #         pip install ray==2.3.0
+  #         pip install '.[test]'
+  #         pip list
+  #       shell: bash
 
-      - name: LLM Tests
-        run: |
-          pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
+  #     - name: LLM Tests
+  #       run: |
+  #         pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
 
-  combinatorial-tests:
-    name: Combinatorial Tests
-    runs-on: ubuntu-latest
+  # combinatorial-tests:
+  #   name: Combinatorial Tests
+  #   runs-on: ubuntu-latest
 
-    timeout-minutes: 60
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
+  #   timeout-minutes: 60
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Set up Python 3.8
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: 3.8
 
-      - name: Setup Linux
-        if: runner.os == 'linux'
-        run: |
-          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+  #     - name: Setup Linux
+  #       if: runner.os == 'linux'
+  #       run: |
+  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
 
-      - name: Setup macOS
-        if: runner.os == 'macOS'
-        run: |
-          brew install libuv
+  #     - name: Setup macOS
+  #       if: runner.os == 'macOS'
+  #       run: |
+  #         brew install libuv
 
-      - name: Install dependencies
-        run: |
-          python --version
-          pip --version
-          python -m pip install -U pip
-          pip install '.[test]'
-          pip list
-        shell: bash
+  #     - name: Install dependencies
+  #       run: |
+  #         python --version
+  #         pip --version
+  #         python -m pip install -U pip
+  #         pip install '.[test]'
+  #         pip list
+  #       shell: bash
 
-      - name: Testing combinatorial config generation code
-        run: |
-          pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
+  #     - name: Testing combinatorial config generation code
+  #       run: |
+  #         pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
 
-      - name: Combinatorial Tests
-        run: |
-          pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
+  #     - name: Combinatorial Tests
+  #       run: |
+  #         pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
 
-  test-minimal-install:
-    name: Test Minimal Install
-    runs-on: ubuntu-latest
+  # test-minimal-install:
+  #   name: Test Minimal Install
+  #   runs-on: ubuntu-latest
 
-    timeout-minutes: 15
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
+  #   timeout-minutes: 15
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Set up Python 3.8
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: 3.8
 
-      - name: Setup Linux
-        if: runner.os == 'linux'
-        run: |
-          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+  #     - name: Setup Linux
+  #       if: runner.os == 'linux'
+  #       run: |
+  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
 
-      - name: Setup macOS
-        if: runner.os == 'macOS'
-        run: |
-          brew install libuv
+  #     - name: Setup macOS
+  #       if: runner.os == 'macOS'
+  #       run: |
+  #         brew install libuv
 
-      - name: Install dependencies
-        run: |
-          python --version
-          pip --version
-          python -m pip install -U pip
-          pip install torch==2.0.0 torchtext
-          pip install ray==2.3.0
-          pip install '.'
-          pip list
-        shell: bash
-      - name: Check Install
-        run: |
-          ludwig check_install
-        shell: bash
+  #     - name: Install dependencies
+  #       run: |
+  #         python --version
+  #         pip --version
+  #         python -m pip install -U pip
+  #         pip install torch==2.0.0 torchtext
+  #         pip install ray==2.3.0
+  #         pip install '.'
+  #         pip list
+  #       shell: bash
+  #     - name: Check Install
+  #       run: |
+  #         ludwig check_install
+  #       shell: bash
 
-      - name: Test Getting Started
-        run: |
-          cd examples/getting_started && sh ./run.sh
-        shell: bash
+  #     - name: Test Getting Started
+  #       run: |
+  #         cd examples/getting_started && sh ./run.sh
+  #       shell: bash
+  # TODO: <Alex>ALEX</Alex>
 
   # start-runner:
   #   name: Start self-hosted EC2 runner
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1c6390db514..27fd6a51fde 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,15 +38,15 @@ repos:
     hooks:
       - id: docformatter
         args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120]
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-        name: Format imports
-  - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
+  #- repo: https://github.com/PyCQA/isort
+  #  rev: 5.12.0
+  #  hooks:
+  #    - id: isort
+  #      name: Format imports
+  #- repo: https://github.com/pycqa/flake8
+  #  rev: 6.0.0
+  #  hooks:
+  #    - id: flake8
   - repo: https://github.com/psf/black
     rev: 23.3.0
     hooks:
diff --git a/setup.cfg b/setup.cfg
index 421f3a791ee..1090ea3197b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-[flake8]
+#[flake8]
 max-line-length = 120
 exclude =
     .tox,
@@ -7,9 +7,9 @@ exclude =
     build,
     temp
 
-select = E,W,F
-doctests = True
-verbose = 2
+#select = E,W,F
+#doctests = True
+#verbose = 2
 # https://pep8.readthedocs.io/en/latest/intro.html#error-codes
 format = pylint
 ignore =
diff --git a/tests/conftest.py b/tests/conftest.py
index 9dae92e2e65..3b2abe99622 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,6 +44,9 @@
     "integration_tests_c",
     "integration_tests_d",
     "integration_tests_e",
+    # TODO: <Alex>ALEX</Alex>
+    "integration_tests_x",
+    # TODO: <Alex>ALEX</Alex>
 }
 
 
diff --git a/tests/integration_tests/tests_f_control.py b/tests/integration_tests/tests_f_control.py
new file mode 100644
index 00000000000..6fb059bd136
--- /dev/null
+++ b/tests/integration_tests/tests_f_control.py
@@ -0,0 +1,210 @@
+import asyncio
+import contextlib
+import copy
+import logging
+import os
+import platform
+import random
+import string
+from typing import List, Union
+from unittest import mock
+
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+from PIL import Image
+from transformers import AutoTokenizer
+
+import ludwig
+from ludwig.api import LudwigModel
+from ludwig.backend import initialize_backend
+from ludwig.callbacks import Callback
+from ludwig.constants import (
+    BASE_MODEL,
+    BATCH_SIZE,
+    COLUMN,
+    DECODER,
+    EPOCHS,
+    FULL,
+    INPUT_FEATURES,
+    MODEL_ECD,
+    MODEL_LLM,
+    MODEL_TYPE,
+    NAME,
+    OUTPUT_FEATURES,
+    PREDICTIONS,
+    PREPROCESSING,
+    PROC_COLUMN,
+    PROMPT,
+    SPLIT,
+    TRAINER,
+    TYPE,
+)
+from ludwig.data.concatenate_datasets import concatenate_df
+from ludwig.data.preprocessing import handle_features_with_prompt_config, preprocess_for_prediction
+from ludwig.schema.llms.prompt import PromptConfig
+from ludwig.schema.model_types.base import ModelConfig
+from ludwig.utils.carton_utils import export_carton
+from tests.integration_tests.utils import (
+    assert_preprocessed_dataset_shape_and_dtype_for_feature,
+    audio_feature,
+    binary_feature,
+    category_feature,
+    generate_data,
+    generate_data_as_dataframe,
+    image_feature,
+    LocalTestBackend,
+    number_feature,
+    sequence_feature,
+    text_feature,
+)
+
+NUM_EXAMPLES = 20
+
+# TODO: <Alex>ALEX</Alex>
+# pytestmark = pytest.mark.integration_tests_x
+# TODO: <Alex>ALEX</Alex>
+
+
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.integration_tests_x
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.skipif(platform.system() == "Windows", reason="Carton is not supported on Windows")
+def test_carton_torchscript(csv_filename, tmpdir):
+    data_csv_path = os.path.join(tmpdir, csv_filename)
+
+    # Configure features to be tested:
+    bin_str_feature = binary_feature()
+    input_features = [
+        bin_str_feature,
+        # binary_feature(),
+        number_feature(),
+        category_feature(encoder={"vocab_size": 3}),
+        # TODO: future support
+        # sequence_feature(vocab_size=3),
+        # text_feature(vocab_size=3),
+        # vector_feature(),
+        # image_feature(image_dest_folder),
+        # audio_feature(audio_dest_folder),
+        # timeseries_feature(),
+        # date_feature(),
+        # h3_feature(),
+        # set_feature(vocab_size=3),
+        # bag_feature(vocab_size=3),
+    ]
+    output_features = [
+        bin_str_feature,
+        # binary_feature(),
+        number_feature(),
+        category_feature(decoder={"vocab_size": 3}, output_feature=True),
+        # TODO: future support
+        # sequence_feature(vocab_size=3),
+        # text_feature(vocab_size=3),
+        # set_feature(vocab_size=3),
+        # vector_feature()
+    ]
+    backend = LocalTestBackend()
+    config = {
+        "input_features": input_features,
+        "output_features": output_features,
+        TRAINER: {"epochs": 2, BATCH_SIZE: 128},
+    }
+
+    # Generate training data
+    training_data_csv_path = generate_data(input_features, output_features, data_csv_path)
+
+    # Convert bool values to strings, e.g., {'Yes', 'No'}
+    df = pd.read_csv(training_data_csv_path)
+    false_value, true_value = "No", "Yes"
+    df[bin_str_feature[NAME]] = df[bin_str_feature[NAME]].map(lambda x: true_value if x else false_value)
+    df.to_csv(training_data_csv_path)
+
+    # Train Ludwig (Pythonic) model:
+    ludwig_model = LudwigModel(config, backend=backend)
+    ludwig_model.train(
+        dataset=training_data_csv_path,
+        skip_save_training_description=True,
+        skip_save_training_statistics=True,
+        skip_save_model=True,
+        skip_save_progress=True,
+        skip_save_log=True,
+        skip_save_processed_input=True,
+    )
+
+    # Obtain predictions from Python model
+    preds_dict, _ = ludwig_model.predict(dataset=training_data_csv_path, return_type=dict)
+
+    # Create graph inference model (Torchscript) from trained Ludwig model.
+    carton_path = os.path.join(tmpdir, "carton")
+    export_carton(ludwig_model, carton_path)
+
+    import cartonml as carton
+
+    # Load the carton model
+    # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
+    # in another function
+    async def load():
+        return await carton.load(carton_path)
+
+    loop = asyncio.get_event_loop()
+    carton_model = loop.run_until_complete(load())
+
+    def to_input(s: pd.Series) -> Union[List[str], torch.Tensor]:
+        if s.dtype == "object":
+            return np.array(s.to_list())
+        return s.to_numpy().astype(np.float32)
+
+    df = pd.read_csv(training_data_csv_path)
+    inputs = {name: to_input(df[feature.column]) for name, feature in ludwig_model.model.input_features.items()}
+
+    # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
+    # in another function
+    async def infer(inputs):
+        return await carton_model.infer(inputs)
+
+    outputs = loop.run_until_complete(infer(inputs))
+
+    # Compare results from Python trained model against Carton
+    assert len(preds_dict) == len(outputs)
+    for feature_name, feature_outputs_expected in preds_dict.items():
+        assert feature_name in outputs
+
+        output_values_expected = feature_outputs_expected[PREDICTIONS]
+        output_values = outputs[feature_name]
+        if output_values.dtype.type in {np.string_, np.str_}:
+            # Strings should match exactly
+            assert np.all(output_values == output_values_expected), f"feature: {feature_name}, output: predictions"
+        else:
+            assert np.allclose(output_values, output_values_expected), f"feature: {feature_name}, output: predictions"
+
+
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.integration_tests_x
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
+def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
+    input_features = [
+        image_feature(
+            os.path.join(tmpdir, "generated_output"),
+            preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
+            encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
+        )
+    ]
+    output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
+
+    data_csv = generate_data(
+        input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
+    )
+
+    config = {
+        INPUT_FEATURES: input_features,
+        OUTPUT_FEATURES: output_features,
+        TRAINER: {"train_steps": 1},
+    }
+
+    model = LudwigModel(config)
+
+    # Failure happens post preprocessing but before training during the ECD model creation phase
+    # so make sure the model can be created properly and training can proceed
+    model.train(dataset=data_csv)

From e62581e8bf4b42f4a11f402e41a7fa5864916ff8 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:26:19 -0800
Subject: [PATCH 02/36] Troubleshooting test failures.

---
 setup.cfg | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 1090ea3197b..421f3a791ee 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-#[flake8]
+[flake8]
 max-line-length = 120
 exclude =
     .tox,
@@ -7,9 +7,9 @@ exclude =
     build,
     temp
 
-#select = E,W,F
-#doctests = True
-#verbose = 2
+select = E,W,F
+doctests = True
+verbose = 2
 # https://pep8.readthedocs.io/en/latest/intro.html#error-codes
 format = pylint
 ignore =

From 9017c3dfdf58bc9daa6bbe92a6e5619ce7f0240d Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:36:19 -0800
Subject: [PATCH 03/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 334 +++++++++++++++++------------------
 1 file changed, 167 insertions(+), 167 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 33ba9b0a67c..ca9d70f5dd3 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -17,186 +17,186 @@ concurrency:
 
 jobs:
   # TODO: <Alex>ALEX</Alex>
-  # pytest:
-  #   runs-on: ${{ matrix.os }}
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       os: [ubuntu-latest]
-  #       python-version: ["3.8", "3.9", "3.10"]
-  #       test-markers: ["not distributed", "distributed"]
-  #       include:
-  #         - python-version: "3.8"
-  #           pytorch-version: 2.0.0
-  #           torchscript-version: 1.10.2
-  #           ray-version: 2.3.1
-  #         - python-version: "3.9"
-  #           pytorch-version: 2.1.1
-  #           torchscript-version: 1.10.2
-  #           ray-version: 2.3.1
-  #         - python-version: "3.10"
-  #           # pytorch-version: nightly
-  #           pytorch-version: 2.2.1
-  #           torchscript-version: 1.10.2
-  #           ray-version: 2.3.1
-  #   env:
-  #     PYTORCH: ${{ matrix.pytorch-version }}
-  #     MARKERS: ${{ matrix.test-markers }}
-  #     NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
-  #     NEUROPOD_VERISON: "0.3.0-rc6"
-  #     TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
-  #     RAY_VERSION: ${{ matrix.ray-version }}
-  #     AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
-  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
-  #     KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
-  #     KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
-  #     IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
-
-  #   name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
-  #   services:
-  #     minio:
-  #       image: fclairamb/minio-github-actions
-  #       env:
-  #         MINIO_ACCESS_KEY: minio
-  #         MINIO_SECRET_KEY: minio123
-  #       ports:
-  #         - 9000:9000
-
-  #   timeout-minutes: 150
-  #   steps:
-  #     - name: Setup ludwigai/ludwig-ray container for local testing with act.
-  #       if: ${{ env.ACT }}
-  #       run: |
-  #         curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
-  #         sudo apt-get install -y nodejs
-  #         sudo mkdir -p /opt/hostedtoolcache/
-  #         sudo chmod 777 -R /opt/hostedtoolcache/
-  #     - uses: actions/checkout@v2
-  #     - name: Set up Python ${{ matrix.python-version }}
-  #       uses: actions/setup-python@v2
-  #       with:
-  #         python-version: ${{ matrix.python-version }}
-
-  #     - name: Setup Linux
-  #       if: runner.os == 'linux'
-  #       run: |
-  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
-
-  #     - name: Setup macOS
-  #       if: runner.os == 'macOS'
-  #       run: |
-  #         brew install libuv
-
-  #     - name: pip cache
-  #       if: ${{ !env.ACT }}
-  #       uses: actions/cache@v2
-  #       with:
-  #         path: ~/.cache/pip
-  #         key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
-
-  #     - name: Debug out of space
-  #       run: |
-  #         du -h -d 1 ~
-  #         df -h
+  pytest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.8", "3.9", "3.10"]
+        test-markers: ["not distributed", "distributed"]
+        include:
+          - python-version: "3.8"
+            pytorch-version: 2.0.0
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+          - python-version: "3.9"
+            pytorch-version: 2.1.1
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+          - python-version: "3.10"
+            # pytorch-version: nightly
+            pytorch-version: 2.2.1
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+    env:
+      PYTORCH: ${{ matrix.pytorch-version }}
+      MARKERS: ${{ matrix.test-markers }}
+      NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
+      NEUROPOD_VERISON: "0.3.0-rc6"
+      TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
+      RAY_VERSION: ${{ matrix.ray-version }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
+      KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+      KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+      IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
 
-  #     - name: Install dependencies
-  #       run: |
-  #         python --version
-  #         pip --version
-  #         python -m pip install -U pip
-  #         cmake --version
+    name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
+    services:
+      minio:
+        image: fclairamb/minio-github-actions
+        env:
+          MINIO_ACCESS_KEY: minio
+          MINIO_SECRET_KEY: minio123
+        ports:
+          - 9000:9000
 
-  #         # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
-  #         cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
-  #         cat requirements_distributed.txt | sed '/^ray[\[]/d'
+    timeout-minutes: 150
+    steps:
+      - name: Setup ludwigai/ludwig-ray container for local testing with act.
+        if: ${{ env.ACT }}
+        run: |
+          curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
+          sudo apt-get install -y nodejs
+          sudo mkdir -p /opt/hostedtoolcache/
+          sudo chmod 777 -R /opt/hostedtoolcache/
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
 
-  #         if [ "$MARKERS" != "distributed" ]; then
-  #           # Skip distributed and hyperopt requirements to test optional imports
-  #           echo > requirements-temp && mv requirements-temp requirements_distributed.txt
-  #           echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
 
-  #           # Skip distributed tree requirement (lightgbm-ray)
-  #           cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
-  #         else
-  #           if [ "$RAY_VERSION" == "nightly" ]; then
-  #             # NOTE: hardcoded for python 3.10 on Linux
-  #             echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
-  #           else
-  #             echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
-  #           fi
-  #         fi
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
 
-  #         if [ "$PYTORCH" == "nightly" ]; then
-  #           extra_index_url=https://download.pytorch.org/whl/nightly/cpu
-  #           pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
+      - name: pip cache
+        if: ${{ !env.ACT }}
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
 
-  #         else
-  #           extra_index_url=https://download.pytorch.org/whl/cpu
-  #           pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
-  #         fi
+      - name: Debug out of space
+        run: |
+          du -h -d 1 ~
+          df -h
 
-  #         pip install '.[test]' --extra-index-url $extra_index_url
-  #         pip list
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+          cmake --version
 
-  #         if [ "$PYTORCH" == "nightly" ]; then
-  #           python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
-  #         else
-  #           python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
-  #         fi
+          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+          cat requirements_distributed.txt | sed '/^ray[\[]/d'
 
-  #         if [ "$MARKERS" == "distributed" ]; then
-  #           python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
-  #         else
-  #           python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
-  #         fi
-  #       shell: bash
+          if [ "$MARKERS" != "distributed" ]; then
+            # Skip distributed and hyperopt requirements to test optional imports
+            echo > requirements-temp && mv requirements-temp requirements_distributed.txt
+            echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
+
+            # Skip distributed tree requirement (lightgbm-ray)
+            cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
+          else
+            if [ "$RAY_VERSION" == "nightly" ]; then
+              # NOTE: hardcoded for python 3.10 on Linux
+              echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
+            else
+              echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
+            fi
+          fi
+
+          if [ "$PYTORCH" == "nightly" ]; then
+            extra_index_url=https://download.pytorch.org/whl/nightly/cpu
+            pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
+
+          else
+            extra_index_url=https://download.pytorch.org/whl/cpu
+            pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
+          fi
+
+          pip install '.[test]' --extra-index-url $extra_index_url
+          pip list
 
-  #     - name: Install Neuropod backend
-  #       run: |
-  #         sudo mkdir -p "$NEUROPOD_BASE_DIR"
-  #         curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
-  #       shell: bash
+          if [ "$PYTORCH" == "nightly" ]; then
+            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
+          else
+            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
+          fi
+
+          if [ "$MARKERS" == "distributed" ]; then
+            python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
+          else
+            python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
+          fi
+        shell: bash
 
-  #     - name: Unit Tests
-  #       run: |
-  #         RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
+      - name: Install Neuropod backend
+        run: |
+          sudo mkdir -p "$NEUROPOD_BASE_DIR"
+          curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
+        shell: bash
 
-  #     - name: Regression Tests
-  #       run: |
-  #         RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
-
-  #     # Skip Horovod and replace with DDP.
-  #     # https://github.com/ludwig-ai/ludwig/issues/3468
-  #     # - name: Install Horovod if necessary
-  #     #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-  #     #   env:
-  #     #     HOROVOD_WITH_PYTORCH: 1
-  #     #     HOROVOD_WITHOUT_MPI: 1
-  #     #     HOROVOD_WITHOUT_TENSORFLOW: 1
-  #     #     HOROVOD_WITHOUT_MXNET: 1
-  #     #   run: |
-  #     #     pip install -r requirements_extra.txt
-  #     #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
-  #     #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
-  #     #       pip uninstall -y horovod
-  #     #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
-  #     #     fi
-  #     #     horovodrun --check-build
-  #     #   shell: bash
-
-  #     # Skip Horovod tests and replace with DDP.
-  #     # https://github.com/ludwig-ai/ludwig/issues/3468
-  #     # - name: Horovod Tests
-  #     #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-  #     #   run: |
-  #     #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
+      - name: Unit Tests
+        run: |
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
 
-  #     - name: Upload Unit Test Results
-  #       if: ${{ always() && !env.ACT }}
-  #       uses: actions/upload-artifact@v2
-  #       with:
-  #         name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
-  #         path: pytest.xml
+      - name: Regression Tests
+        run: |
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
+
+      # Skip Horovod and replace with DDP.
+      # https://github.com/ludwig-ai/ludwig/issues/3468
+      # - name: Install Horovod if necessary
+      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+      #   env:
+      #     HOROVOD_WITH_PYTORCH: 1
+      #     HOROVOD_WITHOUT_MPI: 1
+      #     HOROVOD_WITHOUT_TENSORFLOW: 1
+      #     HOROVOD_WITHOUT_MXNET: 1
+      #   run: |
+      #     pip install -r requirements_extra.txt
+      #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
+      #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
+      #       pip uninstall -y horovod
+      #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
+      #     fi
+      #     horovodrun --check-build
+      #   shell: bash
+
+      # Skip Horovod tests and replace with DDP.
+      # https://github.com/ludwig-ai/ludwig/issues/3468
+      # - name: Horovod Tests
+      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+      #   run: |
+      #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
+
+      - name: Upload Unit Test Results
+        if: ${{ always() && !env.ACT }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
+          path: pytest.xml
   # TODO: <Alex>ALEX</Alex>
 
   integration-tests:

From 1c99e27edddb4d0cf11a0361edab8e6ee19cff75 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:38:28 -0800
Subject: [PATCH 04/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ca9d70f5dd3..24f882f5cde 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -24,21 +24,30 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         python-version: ["3.8", "3.9", "3.10"]
-        test-markers: ["not distributed", "distributed"]
+        # TODO: <Alex>ALEX</Alex>
+        # test-markers: ["not distributed", "distributed"]
+        # TODO: <Alex>ALEX</Alex>
+        # TODO: <Alex>ALEX</Alex>
+        test-markers: ["not distributed"]
+        # TODO: <Alex>ALEX</Alex>
         include:
-          - python-version: "3.8"
-            pytorch-version: 2.0.0
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
-          - python-version: "3.9"
-            pytorch-version: 2.1.1
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
+          # TODO: <Alex>ALEX</Alex>
+          # - python-version: "3.8"
+          #   pytorch-version: 2.0.0
+          #   torchscript-version: 1.10.2
+          #   ray-version: 2.3.1
+          # - python-version: "3.9"
+          #   pytorch-version: 2.1.1
+          #   torchscript-version: 1.10.2
+          #   ray-version: 2.3.1
+          # TODO: <Alex>ALEX</Alex>
+          # TODO: <Alex>ALEX</Alex>
           - python-version: "3.10"
             # pytorch-version: nightly
             pytorch-version: 2.2.1
             torchscript-version: 1.10.2
             ray-version: 2.3.1
+          # TODO: <Alex>ALEX</Alex>
     env:
       PYTORCH: ${{ matrix.pytorch-version }}
       MARKERS: ${{ matrix.test-markers }}

From 9e79c4b44c422e68224559589679c764c38782ea Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:40:45 -0800
Subject: [PATCH 05/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 24f882f5cde..a6253349590 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -23,7 +23,11 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        # TODO: <Alex>ALEX</Alex>
+        # python-version: ["3.8", "3.9", "3.10"]
+        # TODO: <Alex>ALEX</Alex>
+        # TODO: <Alex>ALEX</Alex>
+        python-version: [3.10"]
         # TODO: <Alex>ALEX</Alex>
         # test-markers: ["not distributed", "distributed"]
         # TODO: <Alex>ALEX</Alex>

From 56828f259fcaaff3cf2068c53acbfab9df987710 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 12:52:27 -0800
Subject: [PATCH 06/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index a6253349590..ef59e8e7079 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -27,7 +27,7 @@ jobs:
         # python-version: ["3.8", "3.9", "3.10"]
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
-        python-version: [3.10"]
+        python-version: ["3.10"]
         # TODO: <Alex>ALEX</Alex>
         # test-markers: ["not distributed", "distributed"]
         # TODO: <Alex>ALEX</Alex>

From e44afc825927e8af7d4a9d2a3273f5e797b30796 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 13:09:32 -0800
Subject: [PATCH 07/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 506 -----------------------------------
 1 file changed, 506 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ef59e8e7079..af187eb9806 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -16,202 +16,6 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  # TODO: <Alex>ALEX</Alex>
-  pytest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        # TODO: <Alex>ALEX</Alex>
-        # python-version: ["3.8", "3.9", "3.10"]
-        # TODO: <Alex>ALEX</Alex>
-        # TODO: <Alex>ALEX</Alex>
-        python-version: ["3.10"]
-        # TODO: <Alex>ALEX</Alex>
-        # test-markers: ["not distributed", "distributed"]
-        # TODO: <Alex>ALEX</Alex>
-        # TODO: <Alex>ALEX</Alex>
-        test-markers: ["not distributed"]
-        # TODO: <Alex>ALEX</Alex>
-        include:
-          # TODO: <Alex>ALEX</Alex>
-          # - python-version: "3.8"
-          #   pytorch-version: 2.0.0
-          #   torchscript-version: 1.10.2
-          #   ray-version: 2.3.1
-          # - python-version: "3.9"
-          #   pytorch-version: 2.1.1
-          #   torchscript-version: 1.10.2
-          #   ray-version: 2.3.1
-          # TODO: <Alex>ALEX</Alex>
-          # TODO: <Alex>ALEX</Alex>
-          - python-version: "3.10"
-            # pytorch-version: nightly
-            pytorch-version: 2.2.1
-            torchscript-version: 1.10.2
-            ray-version: 2.3.1
-          # TODO: <Alex>ALEX</Alex>
-    env:
-      PYTORCH: ${{ matrix.pytorch-version }}
-      MARKERS: ${{ matrix.test-markers }}
-      NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
-      NEUROPOD_VERISON: "0.3.0-rc6"
-      TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
-      RAY_VERSION: ${{ matrix.ray-version }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
-      KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
-      KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
-      IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
-
-    name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
-    services:
-      minio:
-        image: fclairamb/minio-github-actions
-        env:
-          MINIO_ACCESS_KEY: minio
-          MINIO_SECRET_KEY: minio123
-        ports:
-          - 9000:9000
-
-    timeout-minutes: 150
-    steps:
-      - name: Setup ludwigai/ludwig-ray container for local testing with act.
-        if: ${{ env.ACT }}
-        run: |
-          curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
-          sudo apt-get install -y nodejs
-          sudo mkdir -p /opt/hostedtoolcache/
-          sudo chmod 777 -R /opt/hostedtoolcache/
-      - uses: actions/checkout@v2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Setup Linux
-        if: runner.os == 'linux'
-        run: |
-          sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
-
-      - name: Setup macOS
-        if: runner.os == 'macOS'
-        run: |
-          brew install libuv
-
-      - name: pip cache
-        if: ${{ !env.ACT }}
-        uses: actions/cache@v2
-        with:
-          path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
-
-      - name: Debug out of space
-        run: |
-          du -h -d 1 ~
-          df -h
-
-      - name: Install dependencies
-        run: |
-          python --version
-          pip --version
-          python -m pip install -U pip
-          cmake --version
-
-          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
-          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
-          cat requirements_distributed.txt | sed '/^ray[\[]/d'
-
-          if [ "$MARKERS" != "distributed" ]; then
-            # Skip distributed and hyperopt requirements to test optional imports
-            echo > requirements-temp && mv requirements-temp requirements_distributed.txt
-            echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
-
-            # Skip distributed tree requirement (lightgbm-ray)
-            cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
-          else
-            if [ "$RAY_VERSION" == "nightly" ]; then
-              # NOTE: hardcoded for python 3.10 on Linux
-              echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
-            else
-              echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
-            fi
-          fi
-
-          if [ "$PYTORCH" == "nightly" ]; then
-            extra_index_url=https://download.pytorch.org/whl/nightly/cpu
-            pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
-
-          else
-            extra_index_url=https://download.pytorch.org/whl/cpu
-            pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
-          fi
-
-          pip install '.[test]' --extra-index-url $extra_index_url
-          pip list
-
-          if [ "$PYTORCH" == "nightly" ]; then
-            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
-          else
-            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
-          fi
-
-          if [ "$MARKERS" == "distributed" ]; then
-            python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
-          else
-            python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
-          fi
-        shell: bash
-
-      - name: Install Neuropod backend
-        run: |
-          sudo mkdir -p "$NEUROPOD_BASE_DIR"
-          curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
-        shell: bash
-
-      - name: Unit Tests
-        run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
-
-      - name: Regression Tests
-        run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
-
-      # Skip Horovod and replace with DDP.
-      # https://github.com/ludwig-ai/ludwig/issues/3468
-      # - name: Install Horovod if necessary
-      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-      #   env:
-      #     HOROVOD_WITH_PYTORCH: 1
-      #     HOROVOD_WITHOUT_MPI: 1
-      #     HOROVOD_WITHOUT_TENSORFLOW: 1
-      #     HOROVOD_WITHOUT_MXNET: 1
-      #   run: |
-      #     pip install -r requirements_extra.txt
-      #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
-      #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
-      #       pip uninstall -y horovod
-      #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
-      #     fi
-      #     horovodrun --check-build
-      #   shell: bash
-
-      # Skip Horovod tests and replace with DDP.
-      # https://github.com/ludwig-ai/ludwig/issues/3468
-      # - name: Horovod Tests
-      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
-      #   run: |
-      #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
-
-      - name: Upload Unit Test Results
-        if: ${{ always() && !env.ACT }}
-        uses: actions/upload-artifact@v2
-        with:
-          name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
-          path: pytest.xml
-  # TODO: <Alex>ALEX</Alex>
-
   integration-tests:
     name: ${{ matrix.test-markers }}
     runs-on: ubuntu-latest
@@ -219,17 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         test-markers:
-          # TODO: <Alex>ALEX</Alex>
-          # - "integration_tests_a"
-          # - "integration_tests_b"
-          # - "integration_tests_c"
-          # - "integration_tests_d"
-          # - "integration_tests_e"
-          # - "integration_tests_f"
-          # TODO: <Alex>ALEX</Alex>
-          # TODO: <Alex>ALEX</Alex>
           - "integration_tests_x"
-          # TODO: <Alex>ALEX</Alex>
 
     env:
       AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
@@ -285,280 +79,6 @@ jobs:
         run: |
           RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
 
-  # TODO: <Alex>ALEX</Alex>
-  # llm-tests:
-  #   name: LLM Tests
-  #   runs-on: ubuntu-latest
-
-  #   timeout-minutes: 60
-  #   steps:
-  #     - uses: actions/checkout@v2
-  #     - name: Set up Python 3.9
-  #       uses: actions/setup-python@v2
-  #       with:
-  #         python-version: 3.9
-
-  #     - name: Setup Linux
-  #       if: runner.os == 'linux'
-  #       run: |
-  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
-
-  #     - name: Setup macOS
-  #       if: runner.os == 'macOS'
-  #       run: |
-  #         brew install libuv
-
-  #     - name: Install dependencies
-  #       run: |
-  #         python --version
-  #         pip --version
-  #         python -m pip install -U pip
-
-  #         # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
-  #         cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
-  #         cat requirements_distributed.txt | sed '/^ray[\[]/d'
-  #         pip install torch==2.0.0 torchtext torchvision torchaudio
-  #         pip install ray==2.3.0
-  #         pip install '.[test]'
-  #         pip list
-  #       shell: bash
-
-  #     - name: LLM Tests
-  #       run: |
-  #         pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
-
-  # combinatorial-tests:
-  #   name: Combinatorial Tests
-  #   runs-on: ubuntu-latest
-
-  #   timeout-minutes: 60
-  #   steps:
-  #     - uses: actions/checkout@v2
-  #     - name: Set up Python 3.8
-  #       uses: actions/setup-python@v2
-  #       with:
-  #         python-version: 3.8
-
-  #     - name: Setup Linux
-  #       if: runner.os == 'linux'
-  #       run: |
-  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
-
-  #     - name: Setup macOS
-  #       if: runner.os == 'macOS'
-  #       run: |
-  #         brew install libuv
-
-  #     - name: Install dependencies
-  #       run: |
-  #         python --version
-  #         pip --version
-  #         python -m pip install -U pip
-  #         pip install '.[test]'
-  #         pip list
-  #       shell: bash
-
-  #     - name: Testing combinatorial config generation code
-  #       run: |
-  #         pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
-
-  #     - name: Combinatorial Tests
-  #       run: |
-  #         pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
-
-  # test-minimal-install:
-  #   name: Test Minimal Install
-  #   runs-on: ubuntu-latest
-
-  #   timeout-minutes: 15
-  #   steps:
-  #     - uses: actions/checkout@v2
-  #     - name: Set up Python 3.8
-  #       uses: actions/setup-python@v2
-  #       with:
-  #         python-version: 3.8
-
-  #     - name: Setup Linux
-  #       if: runner.os == 'linux'
-  #       run: |
-  #         sudo apt-get update && sudo apt-get install -y cmake libsndfile1
-
-  #     - name: Setup macOS
-  #       if: runner.os == 'macOS'
-  #       run: |
-  #         brew install libuv
-
-  #     - name: Install dependencies
-  #       run: |
-  #         python --version
-  #         pip --version
-  #         python -m pip install -U pip
-  #         pip install torch==2.0.0 torchtext
-  #         pip install ray==2.3.0
-  #         pip install '.'
-  #         pip list
-  #       shell: bash
-  #     - name: Check Install
-  #       run: |
-  #         ludwig check_install
-  #       shell: bash
-
-  #     - name: Test Getting Started
-  #       run: |
-  #         cd examples/getting_started && sh ./run.sh
-  #       shell: bash
-  # TODO: <Alex>ALEX</Alex>
-
-  # start-runner:
-  #   name: Start self-hosted EC2 runner
-  #   if: >
-  #     always() && needs.pytest.result != 'failure' && (
-  #     github.event_name == 'schedule' && github.repository == 'ludwig-ai/ludwig' ||
-  #     github.event_name == 'push' && github.repository == 'ludwig-ai/ludwig' ||
-  #     github.event_name == 'pull_request' && github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && !github.event.pull_request.head.repo.fork)
-  #   needs: pytest
-  #   runs-on: ubuntu-latest
-  #   outputs:
-  #     label: ${{ steps.start-ec2-runner.outputs.label }}
-  #     ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
-
-  #   steps:
-  #     - name: Configure AWS credentials
-  #       uses: aws-actions/configure-aws-credentials@v1
-  #       with:
-  #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  #         aws-region: ${{ secrets.AWS_REGION }}
-
-  #     - name: Start EC2 runner
-  #       id: start-ec2-runner
-  #       uses: machulav/ec2-github-runner@v2.3.2
-  #       with:
-  #         mode: start
-  #         github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-  #         ec2-image-id: ami-0759580dedc953d1f
-  #         ec2-instance-type: g4dn.xlarge
-  #         subnet-id: subnet-0983be43
-  #         security-group-id: sg-4cba0d08
-  #         aws-resource-tags: >
-  #           [
-  #             {"Key": "Name", "Value": "ludwig-github-${{ github.head_ref || github.sha }}"},
-  #             {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
-  #             {"Key": "GitHubHeadRef", "Value": "${{ github.head_ref }}"},
-  #             {"Key": "GitHubSHA", "Value": "${{ github.sha }}"}
-  #           ]
-
-  # pytest-gpu:
-  #   if: needs.start-runner.result != 'skipped'
-  #   needs: start-runner # required to start the main job when the runner is ready
-  #   runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       python-version: [3.7]
-  #       include:
-  #         - python-version: 3.7
-  #           pytorch-version: 1.10.0
-  #           torchscript-version: 1.10.2
-  #   env:
-  #     PYTORCH: ${{ matrix.pytorch-version }}
-  #     NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
-  #     NEUROPOD_VERISON: "0.3.0-rc6"
-  #     TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
-
-  #   name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, gpu
-
-  #   timeout-minutes: 70
-  #   steps:
-  #     - uses: actions/checkout@v2
-  #     - name: Set up Python ${{ matrix.python-version }}
-  #       uses: actions/setup-python@v2
-  #       with:
-  #         python-version: ${{ matrix.python-version }}
-
-  #     - name: Setup Linux
-  #       if: runner.os == 'linux'
-  #       run: |
-  #         sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8
-  #         cmake --version
-
-  #     - name: Install CUDA drivers
-  #       run: |
-  #         wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-  #         sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
-  #         wget https://developer.download.nvidia.com/compute/cuda/11.5.1/local_installers/cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
-  #         sudo dpkg -i cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
-  #         sudo apt-key add /var/cuda-repo-ubuntu2004-11-5-local/7fa2af80.pub
-  #         sudo apt-get update
-  #         sudo apt-get -y install cuda
-  #       shell: bash
-
-  #     - name: pip cache
-  #       uses: actions/cache@v2
-  #       with:
-  #         path: ~/.cache/pip
-  #         key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }}
-  #         restore-keys: |
-  #           ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-
-
-  #     - name: Install dependencies
-  #       env:
-  #         HOROVOD_WITH_PYTORCH: 1
-  #         HOROVOD_WITHOUT_MPI: 1
-  #         HOROVOD_WITHOUT_TENSORFLOW: 1
-  #         HOROVOD_WITHOUT_MXNET: 1
-  #       run: |
-  #         python --version
-  #         pip --version
-  #         python -m pip install -U pip
-  #         if [ $PYTORCH == "nightly" ]; then
-  #           cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
-  #           pip install --pre torch torchvision -f https://download.pytorch.org/whl/torch_stable.html
-  #         else
-  #           pip install torch==${PYTORCH}+cu111 -f https://download.pytorch.org/whl/torch_stable.html
-  #         fi
-  #         # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
-  #         pip install dulwich==0.20.26 # workaround for `/usr/bin/ld: cannot find -lpython3.7m`
-  #         pip install '.[test]'
-  #         pip list
-  #       shell: bash
-
-  #     - name: Install Neuropod backend
-  #       run: |
-  #         sudo mkdir -p "$NEUROPOD_BASE_DIR"
-  #         curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
-  #       shell: bash
-
-  #     - name: Reinstall Horovod if necessary
-  #       env:
-  #         HOROVOD_WITH_PYTORCH: 1
-  #         HOROVOD_WITHOUT_MPI: 1
-  #         HOROVOD_WITHOUT_TENSORFLOW: 1
-  #         HOROVOD_WITHOUT_MXNET: 1
-  #       run: |
-  #         HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
-  #         if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
-  #           pip uninstall -y horovod
-  #           pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
-  #         fi
-  #         horovodrun --check-build
-  #       shell: bash
-
-  #     - name: Check CUDA is available
-  #       run: |
-  #         python -c "import torch; assert torch.cuda.is_available()"
-
-  #     - name: Tests
-  #       run: |
-  #         pytest -v --timeout 300 --durations 10 --junitxml pytest.xml tests
-
-  #     - name: Upload Unit Test Results
-  #       if: always()
-  #       uses: actions/upload-artifact@v2
-  #       with:
-  #         name: Unit Test Results (Python ${{ matrix.python-version }} gpu
-  #         path: pytest.xml
-
   event_file:
     name: "Event File"
     runs-on: ubuntu-latest
@@ -570,29 +90,3 @@ jobs:
         with:
           name: Event File
           path: ${{ github.event_path }}
-
-  # stop-runner:
-  #   name: Stop self-hosted EC2 runner
-
-  #   # required to stop the runner even if the error happened in the previous job
-  #   if: always() && needs.start-runner.result != 'skipped'
-  #   needs:
-  #     - start-runner # required to get output from the start-runner job
-  #     - pytest-gpu # required to wait when the main job is done
-  #   runs-on: ubuntu-latest
-
-  #   steps:
-  #     - name: Configure AWS credentials
-  #       uses: aws-actions/configure-aws-credentials@v1
-  #       with:
-  #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  #         aws-region: ${{ secrets.AWS_REGION }}
-
-  #     - name: Stop EC2 runner
-  #       uses: machulav/ec2-github-runner@v2.3.1
-  #       with:
-  #         mode: stop
-  #         github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-  #         label: ${{ needs.start-runner.outputs.label }}
-  #         ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

From 4f1c50b02f2276406203102f1c9e17e5339a0500 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 13:29:29 -0800
Subject: [PATCH 08/36] Troubleshooting test failures.

---
 pytest.ini | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 539a53b1674..61478d9705e 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -7,11 +7,6 @@ markers =
     combinatorial: mark a test as combinatorial.
     horovod: mark a test as a Horovod test.
     llm: mark a test as an LLM test.
-    integration_tests_a: mark a test to be run as part of integration tests, group A.
-    integration_tests_b: mark a test to be run as part of integration tests, group B.
-    integration_tests_c: mark a test to be run as part of integration tests, group C.
-    integration_tests_d: mark a test to be run as part of integration tests, group D.
-    integration_tests_e: mark a test to be run as part of integration tests, group E.
-    integration_tests_f: mark a test to be run as part of integration tests, group F.
+    integration_tests_x: mark a test to be run as part of integration tests, group X.
 filterwarnings =
     ignore::DeprecationWarning

From 5848459515af9f6bc612afe5145d77aba9d7885c Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 20:56:46 -0800
Subject: [PATCH 09/36] Troubleshooting test failures.

---
 pytest.ini | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 61478d9705e..a82421e060c 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,12 +1,5 @@
 [pytest]
 markers =
-    benchmark: mark a test as a benchmarking test.
-    distributed: mark a test as a distributed test.
-    filesystem: mark to test operating system systems.
-    slow: mark test as slow.
-    combinatorial: mark a test as combinatorial.
-    horovod: mark a test as a Horovod test.
-    llm: mark a test as an LLM test.
     integration_tests_x: mark a test to be run as part of integration tests, group X.
 filterwarnings =
     ignore::DeprecationWarning

From 2aefef1671f3d3a8e797f03841bee3ff731dbc26 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Fri, 8 Mar 2024 22:39:28 -0800
Subject: [PATCH 10/36] Troubleshooting test failures.

---
 tests/integration_tests/{tests_f_control.py => test_f_control.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/integration_tests/{tests_f_control.py => test_f_control.py} (100%)

diff --git a/tests/integration_tests/tests_f_control.py b/tests/integration_tests/test_f_control.py
similarity index 100%
rename from tests/integration_tests/tests_f_control.py
rename to tests/integration_tests/test_f_control.py

From f0608a1e8cca108c95505890ce1a63771540ee96 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 09:36:24 -0800
Subject: [PATCH 11/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index f19ad84dae2..b969f949ddc 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -114,6 +114,7 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
         # carton.pack is an async function so we run it and wait until it's complete
         # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
         # in another function
+        # TODO: <Alex>ALEX</Alex>
         async def pack():
             return await carton.pack(
                 input_model_path,
@@ -126,8 +127,26 @@ async def pack():
                 outputs=_get_output_spec(model),
             )
 
+        # TODO: <Alex>ALEX</Alex>
+
         loop = asyncio.get_event_loop()
-        tmp_out_path = loop.run_until_complete(pack())
+        # TODO: <Alex>ALEX</Alex>
+        # tmp_out_path = loop.run_until_complete(pack())
+        # TODO: <Alex>ALEX</Alex>
+        # TODO: <Alex>ALEX</Alex>
+        import sys
+        import traceback
+
+        try:
+            tmp_out_path = loop.run_until_complete(pack())
+        except Exception as e:
+            exception_message: str = "A Sub-Process call Exception occurred.\n"
+            exception_traceback: str = traceback.format_exc()
+            exception_message += f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
+            sys.stderr.write(exception_message)
+            sys.stderr.flush()
+            raise SystemExit(exception_message) from e  # Make sure error is fatal.
+        # TODO: <Alex>ALEX</Alex>
 
         # Move it to the output path
         shutil.move(tmp_out_path, carton_path)

From 5a61f973ec25fb1ef1e8f471e206a0c34e21199e Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 11:34:50 -0800
Subject: [PATCH 12/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index b969f949ddc..18af6dd40be 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -140,7 +140,7 @@ async def pack():
         try:
             tmp_out_path = loop.run_until_complete(pack())
         except Exception as e:
-            exception_message: str = "A Sub-Process call Exception occurred.\n"
+            exception_message: str = "A general Exception occurred.\n"
             exception_traceback: str = traceback.format_exc()
             exception_message += f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
             sys.stderr.write(exception_message)

From f11edff49c4411f925496f0c433194771c355fd7 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 11:37:26 -0800
Subject: [PATCH 13/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index af187eb9806..570271f950e 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -45,10 +45,10 @@ jobs:
     timeout-minutes: 90
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.9
+      - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: 3.9
+          python-version: 3.10
 
       - name: Setup Linux
         if: runner.os == 'linux'

From b385ceb5ba975bc3ce1817a72a47d6f12246f146 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 11:48:58 -0800
Subject: [PATCH 14/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 570271f950e..3397cb29dd0 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -48,7 +48,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10
+          python-version: "3.10"
 
       - name: Setup Linux
         if: runner.os == 'linux'

From c28ac8cc7f5af1f08533b324fddc28391fedaad1 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 15:13:15 -0800
Subject: [PATCH 15/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 18af6dd40be..7f4b1852251 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -115,7 +115,21 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
         # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
         # in another function
         # TODO: <Alex>ALEX</Alex>
-        async def pack():
+        # async def pack():
+        #     return await carton.pack(
+        #         input_model_path,
+        #         runner_name="torchscript",
+        #         # Any 2.x.x version is okay
+        #         # TODO: improve this
+        #         required_framework_version="=2",
+        #         model_name=carton_model_name,
+        #         inputs=_get_input_spec(model),
+        #         outputs=_get_output_spec(model),
+        #     )
+
+        # TODO: <Alex>ALEX</Alex>
+        # TODO: <Alex>ALEX</Alex>
+        async def packster():
             return await carton.pack(
                 input_model_path,
                 runner_name="torchscript",
@@ -129,16 +143,23 @@ async def pack():
 
         # TODO: <Alex>ALEX</Alex>
 
-        loop = asyncio.get_event_loop()
         # TODO: <Alex>ALEX</Alex>
+        loop = asyncio.get_event_loop()
         # tmp_out_path = loop.run_until_complete(pack())
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
+        import time
         import sys
         import traceback
 
         try:
-            tmp_out_path = loop.run_until_complete(pack())
+            # TODO: <Alex>ALEX</Alex>
+            # tmp_out_path = loop.run_until_complete(pack())
+            # TODO: <Alex>ALEX</Alex>
+            # TODO: <Alex>ALEX</Alex>
+            time.sleep(1)
+            tmp_out_path = loop.run_until_complete(packster())
+            # TODO: <Alex>ALEX</Alex>
         except Exception as e:
             exception_message: str = "A general Exception occurred.\n"
             exception_traceback: str = traceback.format_exc()
@@ -149,4 +170,5 @@ async def pack():
         # TODO: <Alex>ALEX</Alex>
 
         # Move it to the output path
+        time.sleep(1)
         shutil.move(tmp_out_path, carton_path)

From 3b0e6c1c58ae7fabc5401469a1513e6f3848a4b5 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 15:18:20 -0800
Subject: [PATCH 16/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 7f4b1852251..e5bc928aa01 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -105,10 +105,11 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
 
     # Generate a torchscript model
     model_ts = generate_carton_torchscript(model)
+    print(f"\n[ALEX_TEST] [WOUTPUT] MODEL_TORCH_SCRIPT:\n{model_ts} ; TYPE: {str(type(model_ts))}")
 
     with tempfile.TemporaryDirectory() as tmpdir:
         # Save the model to a temp dir
-        input_model_path = os.path.join(tmpdir, "model.pt")
+        input_model_path: str = os.path.join(tmpdir, "model.pt")
         torch.jit.save(model_ts, input_model_path)
 
         # carton.pack is an async function so we run it and wait until it's complete
@@ -129,8 +130,9 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
 
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
-        async def packster():
-            return await carton.pack(
+        async def packster() -> str:
+            time.sleep(1)
+            a: str = await carton.pack(
                 input_model_path,
                 runner_name="torchscript",
                 # Any 2.x.x version is okay
@@ -140,11 +142,16 @@ async def packster():
                 inputs=_get_input_spec(model),
                 outputs=_get_output_spec(model),
             )
+            time.sleep(1)
+            print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+            time.sleep(1)
+            return a
 
         # TODO: <Alex>ALEX</Alex>
 
         # TODO: <Alex>ALEX</Alex>
         loop = asyncio.get_event_loop()
+        print(f"\n[ALEX_TEST] [WOUTPUT] LOOP:\n{loop} ; TYPE: {str(type(loop))}")
         # tmp_out_path = loop.run_until_complete(pack())
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
@@ -158,7 +165,7 @@ async def packster():
             # TODO: <Alex>ALEX</Alex>
             # TODO: <Alex>ALEX</Alex>
             time.sleep(1)
-            tmp_out_path = loop.run_until_complete(packster())
+            tmp_out_path: str = loop.run_until_complete(packster())
             # TODO: <Alex>ALEX</Alex>
         except Exception as e:
             exception_message: str = "A general Exception occurred.\n"

From c6ea057951011bf816da9e0f0b63fedde722a260 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 15:29:48 -0800
Subject: [PATCH 17/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index e5bc928aa01..4d1735d47fc 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -132,20 +132,28 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
         # TODO: <Alex>ALEX</Alex>
         async def packster() -> str:
             time.sleep(1)
-            a: str = await carton.pack(
-                input_model_path,
-                runner_name="torchscript",
-                # Any 2.x.x version is okay
-                # TODO: improve this
-                required_framework_version="=2",
-                model_name=carton_model_name,
-                inputs=_get_input_spec(model),
-                outputs=_get_output_spec(model),
-            )
-            time.sleep(1)
-            print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-            time.sleep(1)
-            return a
+            try:
+                a: str = await carton.pack(
+                    input_model_path,
+                    runner_name="torchscript",
+                    # Any 2.x.x version is okay
+                    # TODO: improve this
+                    required_framework_version="=2",
+                    model_name=carton_model_name,
+                    inputs=_get_input_spec(model),
+                    outputs=_get_output_spec(model),
+                )
+                time.sleep(1)
+                print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+                time.sleep(1)
+                return a
+            except Exception as ie:
+                exception_message: str = "A Packster-Inside Exception occurred.\n"
+                exception_traceback: str = traceback.format_exc()
+                exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+                sys.stderr.write(exception_message)
+                sys.stderr.flush()
+                raise ValueError(exception_message) from ie
 
         # TODO: <Alex>ALEX</Alex>
 

From 61e778ae2dc1026f56baf0533ce677e39668601d Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 15:43:30 -0800
Subject: [PATCH 18/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py              | 73 ++++++++++++++++-------
 tests/integration_tests/test_f_control.py | 56 ++++++++---------
 2 files changed, 80 insertions(+), 49 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 4d1735d47fc..17b1f2b3cbc 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -132,28 +132,57 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
         # TODO: <Alex>ALEX</Alex>
         async def packster() -> str:
             time.sleep(1)
-            try:
-                a: str = await carton.pack(
-                    input_model_path,
-                    runner_name="torchscript",
-                    # Any 2.x.x version is okay
-                    # TODO: improve this
-                    required_framework_version="=2",
-                    model_name=carton_model_name,
-                    inputs=_get_input_spec(model),
-                    outputs=_get_output_spec(model),
-                )
-                time.sleep(1)
-                print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-                time.sleep(1)
-                return a
-            except Exception as ie:
-                exception_message: str = "A Packster-Inside Exception occurred.\n"
-                exception_traceback: str = traceback.format_exc()
-                exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-                sys.stderr.write(exception_message)
-                sys.stderr.flush()
-                raise ValueError(exception_message) from ie
+            # TODO: <Alex>ALEX</Alex>
+            # try:
+            #     a: str = await carton.pack(
+            #         input_model_path,
+            #         runner_name="torchscript",
+            #         # Any 2.x.x version is okay
+            #         # TODO: improve this
+            #         required_framework_version="=2",
+            #         model_name=carton_model_name,
+            #         inputs=_get_input_spec(model),
+            #         outputs=_get_output_spec(model),
+            #     )
+            #     time.sleep(1)
+            #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+            #     time.sleep(1)
+            #     return a
+            # except Exception as ie:
+            #     exception_message: str = "A Packster-Inside Exception occurred.\n"
+            #     exception_traceback: str = traceback.format_exc()
+            #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+            #     sys.stderr.write(exception_message)
+            #     sys.stderr.flush()
+            #     raise ValueError(exception_message) from ie
+            # TODO: <Alex>ALEX</Alex>
+            # TODO: <Alex>ALEX</Alex>
+            idx: int
+            for idx in range(5):
+                print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
+                try:
+                    a: str = await carton.pack(
+                        input_model_path,
+                        runner_name="torchscript",
+                        # Any 2.x.x version is okay
+                        # TODO: improve this
+                        required_framework_version="=2",
+                        model_name=carton_model_name,
+                        inputs=_get_input_spec(model),
+                        outputs=_get_output_spec(model),
+                    )
+                    time.sleep(1)
+                    print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+                    time.sleep(1)
+                    return a
+                except Exception as ie:
+                    exception_message: str = "A Packster-Inside Exception occurred.\n"
+                    exception_traceback: str = traceback.format_exc()
+                    exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+                    sys.stderr.write(exception_message)
+                    sys.stderr.flush()
+                    raise ValueError(exception_message) from ie
+            # TODO: <Alex>ALEX</Alex>
 
         # TODO: <Alex>ALEX</Alex>
 
diff --git a/tests/integration_tests/test_f_control.py b/tests/integration_tests/test_f_control.py
index 6fb059bd136..8724732fc8f 100644
--- a/tests/integration_tests/test_f_control.py
+++ b/tests/integration_tests/test_f_control.py
@@ -180,31 +180,33 @@ async def infer(inputs):
 
 
 # TODO: <Alex>ALEX</Alex>
-@pytest.mark.integration_tests_x
+# # TODO: <Alex>ALEX</Alex>
+# @pytest.mark.integration_tests_x
+# # TODO: <Alex>ALEX</Alex>
+# @pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
+# def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
+#     input_features = [
+#         image_feature(
+#             os.path.join(tmpdir, "generated_output"),
+#             preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
+#             encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
+#         )
+#     ]
+#     output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
+
+#     data_csv = generate_data(
+#         input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
+#     )
+
+#     config = {
+#         INPUT_FEATURES: input_features,
+#         OUTPUT_FEATURES: output_features,
+#         TRAINER: {"train_steps": 1},
+#     }
+
+#     model = LudwigModel(config)
+
+#     # Failure happens post preprocessing but before training during the ECD model creation phase
+#     # so make sure the model can be created properly and training can proceed
+#     model.train(dataset=data_csv)
 # TODO: <Alex>ALEX</Alex>
-@pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
-def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
-    input_features = [
-        image_feature(
-            os.path.join(tmpdir, "generated_output"),
-            preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
-            encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
-        )
-    ]
-    output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
-
-    data_csv = generate_data(
-        input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
-    )
-
-    config = {
-        INPUT_FEATURES: input_features,
-        OUTPUT_FEATURES: output_features,
-        TRAINER: {"train_steps": 1},
-    }
-
-    model = LudwigModel(config)
-
-    # Failure happens post preprocessing but before training during the ECD model creation phase
-    # so make sure the model can be created properly and training can proceed
-    model.train(dataset=data_csv)

From b20b64a2518aa85abf8e977c850de384adbb96ed Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 15:54:16 -0800
Subject: [PATCH 19/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 17b1f2b3cbc..7dea3d7c8d5 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -157,8 +157,9 @@ async def packster() -> str:
             #     raise ValueError(exception_message) from ie
             # TODO: <Alex>ALEX</Alex>
             # TODO: <Alex>ALEX</Alex>
+            max_tries: int = 5
             idx: int
-            for idx in range(5):
+            for idx in range(max_tries):
                 print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
                 try:
                     a: str = await carton.pack(
@@ -181,6 +182,8 @@ async def packster() -> str:
                     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
                     sys.stderr.write(exception_message)
                     sys.stderr.flush()
+                    # raise ValueError(exception_message) from ie
+                if idx >= max_tries - 1:
                     raise ValueError(exception_message) from ie
             # TODO: <Alex>ALEX</Alex>
 

From 4b03b61292a0a7673859f53fa2807ac37228b06c Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:02:28 -0800
Subject: [PATCH 20/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 7dea3d7c8d5..194e0b942b7 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -161,6 +161,7 @@ async def packster() -> str:
             idx: int
             for idx in range(max_tries):
                 print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
+                time.sleep(1)
                 try:
                     a: str = await carton.pack(
                         input_model_path,
@@ -185,6 +186,7 @@ async def packster() -> str:
                     # raise ValueError(exception_message) from ie
                 if idx >= max_tries - 1:
                     raise ValueError(exception_message) from ie
+                time.sleep(1)
             # TODO: <Alex>ALEX</Alex>
 
         # TODO: <Alex>ALEX</Alex>

From b7b40f39a361fb9138aae0c2abc7b850de3b6a35 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:03:50 -0800
Subject: [PATCH 21/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 194e0b942b7..38cfb768bad 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -185,7 +185,7 @@ async def packster() -> str:
                     sys.stderr.flush()
                     # raise ValueError(exception_message) from ie
                 if idx >= max_tries - 1:
-                    raise ValueError(exception_message) from ie
+                    raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
                 time.sleep(1)
             # TODO: <Alex>ALEX</Alex>
 

From 13d96e1c59288c50be7b919ea6c8bad1e3adef18 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sun, 10 Mar 2024 08:36:53 -0700
Subject: [PATCH 22/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 150 ++++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 73 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 38cfb768bad..cb9daeb1185 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -98,6 +98,7 @@ def _get_output_spec(model: LudwigModel) -> List[Dict[str, Any]]:
 
 @DeveloperAPI
 def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwig_model"):
+    print(f"\n[ALEX_TEST] [WOUTPUT] CARTON_PATH:\n{carton_path} ; TYPE: {str(type(carton_path))}")
     try:
         import cartonml as carton
     except ImportError:
@@ -108,86 +109,88 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
     print(f"\n[ALEX_TEST] [WOUTPUT] MODEL_TORCH_SCRIPT:\n{model_ts} ; TYPE: {str(type(model_ts))}")
 
     with tempfile.TemporaryDirectory() as tmpdir:
+        print(f"\n[ALEX_TEST] [WOUTPUT] TMPDIR:\n{tmpdir} ; TYPE: {str(type(tmpdir))}")
         # Save the model to a temp dir
         input_model_path: str = os.path.join(tmpdir, "model.pt")
         torch.jit.save(model_ts, input_model_path)
+        print(f"\n[ALEX_TEST] [WOUTPUT] INPUT_MODEL_PATH:\n{input_model_path} ; TYPE: {str(type(input_model_path))}")
 
         # carton.pack is an async function so we run it and wait until it's complete
         # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
         # in another function
         # TODO: <Alex>ALEX</Alex>
-        # async def pack():
-        #     return await carton.pack(
-        #         input_model_path,
-        #         runner_name="torchscript",
-        #         # Any 2.x.x version is okay
-        #         # TODO: improve this
-        #         required_framework_version="=2",
-        #         model_name=carton_model_name,
-        #         inputs=_get_input_spec(model),
-        #         outputs=_get_output_spec(model),
-        #     )
+        async def pack():
+            return await carton.pack(
+                path=input_model_path,
+                runner_name="torchscript",
+                # Any 2.x.x version is okay
+                # TODO: improve this
+                required_framework_version=">=2",
+                model_name=carton_model_name,
+                inputs=_get_input_spec(model),
+                outputs=_get_output_spec(model),
+            )
 
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
-        async def packster() -> str:
-            time.sleep(1)
-            # TODO: <Alex>ALEX</Alex>
-            # try:
-            #     a: str = await carton.pack(
-            #         input_model_path,
-            #         runner_name="torchscript",
-            #         # Any 2.x.x version is okay
-            #         # TODO: improve this
-            #         required_framework_version="=2",
-            #         model_name=carton_model_name,
-            #         inputs=_get_input_spec(model),
-            #         outputs=_get_output_spec(model),
-            #     )
-            #     time.sleep(1)
-            #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-            #     time.sleep(1)
-            #     return a
-            # except Exception as ie:
-            #     exception_message: str = "A Packster-Inside Exception occurred.\n"
-            #     exception_traceback: str = traceback.format_exc()
-            #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-            #     sys.stderr.write(exception_message)
-            #     sys.stderr.flush()
-            #     raise ValueError(exception_message) from ie
-            # TODO: <Alex>ALEX</Alex>
-            # TODO: <Alex>ALEX</Alex>
-            max_tries: int = 5
-            idx: int
-            for idx in range(max_tries):
-                print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
-                time.sleep(1)
-                try:
-                    a: str = await carton.pack(
-                        input_model_path,
-                        runner_name="torchscript",
-                        # Any 2.x.x version is okay
-                        # TODO: improve this
-                        required_framework_version="=2",
-                        model_name=carton_model_name,
-                        inputs=_get_input_spec(model),
-                        outputs=_get_output_spec(model),
-                    )
-                    time.sleep(1)
-                    print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-                    time.sleep(1)
-                    return a
-                except Exception as ie:
-                    exception_message: str = "A Packster-Inside Exception occurred.\n"
-                    exception_traceback: str = traceback.format_exc()
-                    exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-                    sys.stderr.write(exception_message)
-                    sys.stderr.flush()
-                    # raise ValueError(exception_message) from ie
-                if idx >= max_tries - 1:
-                    raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
-                time.sleep(1)
-            # TODO: <Alex>ALEX</Alex>
+        # async def packster() -> str:
+        #     time.sleep(1)
+        #     # TODO: <Alex>ALEX</Alex>
+        #     # try:
+        #     #     a: str = await carton.pack(
+        #     #         input_model_path,
+        #     #         runner_name="torchscript",
+        #     #         # Any 2.x.x version is okay
+        #     #         # TODO: improve this
+        #     #         required_framework_version="=2",
+        #     #         model_name=carton_model_name,
+        #     #         inputs=_get_input_spec(model),
+        #     #         outputs=_get_output_spec(model),
+        #     #     )
+        #     #     time.sleep(1)
+        #     #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+        #     #     time.sleep(1)
+        #     #     return a
+        #     # except Exception as ie:
+        #     #     exception_message: str = "A Packster-Inside Exception occurred.\n"
+        #     #     exception_traceback: str = traceback.format_exc()
+        #     #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+        #     #     sys.stderr.write(exception_message)
+        #     #     sys.stderr.flush()
+        #     #     raise ValueError(exception_message) from ie
+        #     # TODO: <Alex>ALEX</Alex>
+        #     # TODO: <Alex>ALEX</Alex>
+        #     max_tries: int = 5
+        #     idx: int
+        #     for idx in range(max_tries):
+        #         print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
+        #         time.sleep(1)
+        #         try:
+        #             a: str = await carton.pack(
+        #                 input_model_path,
+        #                 runner_name="torchscript",
+        #                 # Any 2.x.x version is okay
+        #                 # TODO: improve this
+        #                 required_framework_version="=2",
+        #                 model_name=carton_model_name,
+        #                 inputs=_get_input_spec(model),
+        #                 outputs=_get_output_spec(model),
+        #             )
+        #             time.sleep(1)
+        #             print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+        #             time.sleep(1)
+        #             return a
+        #         except Exception as ie:
+        #             exception_message: str = "A Packster-Inside Exception occurred.\n"
+        #             exception_traceback: str = traceback.format_exc()
+        #             exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+        #             sys.stderr.write(exception_message)
+        #             sys.stderr.flush()
+        #             # raise ValueError(exception_message) from ie
+        #         if idx >= max_tries - 1:
+        #             raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
+        #         time.sleep(1)
+        # TODO: <Alex>ALEX</Alex>
 
         # TODO: <Alex>ALEX</Alex>
 
@@ -201,13 +204,14 @@ async def packster() -> str:
         import sys
         import traceback
 
+        tmp_out_path: str = None
         try:
             # TODO: <Alex>ALEX</Alex>
-            # tmp_out_path = loop.run_until_complete(pack())
+            tmp_out_path = loop.run_until_complete(pack())
             # TODO: <Alex>ALEX</Alex>
             # TODO: <Alex>ALEX</Alex>
-            time.sleep(1)
-            tmp_out_path: str = loop.run_until_complete(packster())
+            # time.sleep(1)
+            # tmp_out_path: str = loop.run_until_complete(packster())
             # TODO: <Alex>ALEX</Alex>
         except Exception as e:
             exception_message: str = "A general Exception occurred.\n"
@@ -219,5 +223,5 @@ async def packster() -> str:
         # TODO: <Alex>ALEX</Alex>
 
         # Move it to the output path
-        time.sleep(1)
+        # time.sleep(1)
         shutil.move(tmp_out_path, carton_path)

From 958abde9c18ecd17695f5aa5aaead202a2382292 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sun, 10 Mar 2024 08:50:12 -0700
Subject: [PATCH 23/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index cb9daeb1185..fefd9f00c90 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -125,7 +125,7 @@ async def pack():
                 runner_name="torchscript",
                 # Any 2.x.x version is okay
                 # TODO: improve this
-                required_framework_version=">=2",
+                required_framework_version="=2.0",
                 model_name=carton_model_name,
                 inputs=_get_input_spec(model),
                 outputs=_get_output_spec(model),

From f0c286804bbbd76f8d1c1490d842ea955ce015a3 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sun, 10 Mar 2024 09:04:50 -0700
Subject: [PATCH 24/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 3397cb29dd0..f543293ad5b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -70,7 +70,7 @@ jobs:
           cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
           cat requirements_distributed.txt | sed '/^ray[\[]/d'
           pip install torch==2.0.0 torchtext torchvision torchaudio
-          pip install ray==2.3.0
+          pip install ray==2.3.1
           pip install '.[test]'
           pip list
         shell: bash

From ffbc78c3eea87f46cb2e7dd610e89d4595f0c7ee Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sun, 10 Mar 2024 22:37:08 -0700
Subject: [PATCH 25/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 141 ++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 68 deletions(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index fefd9f00c90..2daebb55a67 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -119,77 +119,82 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
         # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
         # in another function
         # TODO: <Alex>ALEX</Alex>
-        async def pack():
-            return await carton.pack(
-                path=input_model_path,
-                runner_name="torchscript",
-                # Any 2.x.x version is okay
-                # TODO: improve this
-                required_framework_version="=2.0",
-                model_name=carton_model_name,
-                inputs=_get_input_spec(model),
-                outputs=_get_output_spec(model),
-            )
+        # async def pack():
+        #     return await carton.pack(
+        #         path=input_model_path,
+        #         runner_name="torchscript",
+        #         # Any 2.x.x version is okay
+        #         # TODO: improve this
+        #         required_framework_version="=2.0",
+        #         model_name=carton_model_name,
+        #         inputs=_get_input_spec(model),
+        #         outputs=_get_output_spec(model),
+        #     )
 
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
-        # async def packster() -> str:
-        #     time.sleep(1)
-        #     # TODO: <Alex>ALEX</Alex>
-        #     # try:
-        #     #     a: str = await carton.pack(
-        #     #         input_model_path,
-        #     #         runner_name="torchscript",
-        #     #         # Any 2.x.x version is okay
-        #     #         # TODO: improve this
-        #     #         required_framework_version="=2",
-        #     #         model_name=carton_model_name,
-        #     #         inputs=_get_input_spec(model),
-        #     #         outputs=_get_output_spec(model),
-        #     #     )
-        #     #     time.sleep(1)
-        #     #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-        #     #     time.sleep(1)
-        #     #     return a
-        #     # except Exception as ie:
-        #     #     exception_message: str = "A Packster-Inside Exception occurred.\n"
-        #     #     exception_traceback: str = traceback.format_exc()
-        #     #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-        #     #     sys.stderr.write(exception_message)
-        #     #     sys.stderr.flush()
-        #     #     raise ValueError(exception_message) from ie
-        #     # TODO: <Alex>ALEX</Alex>
-        #     # TODO: <Alex>ALEX</Alex>
-        #     max_tries: int = 5
-        #     idx: int
-        #     for idx in range(max_tries):
-        #         print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
-        #         time.sleep(1)
-        #         try:
-        #             a: str = await carton.pack(
-        #                 input_model_path,
-        #                 runner_name="torchscript",
-        #                 # Any 2.x.x version is okay
-        #                 # TODO: improve this
-        #                 required_framework_version="=2",
-        #                 model_name=carton_model_name,
-        #                 inputs=_get_input_spec(model),
-        #                 outputs=_get_output_spec(model),
-        #             )
-        #             time.sleep(1)
-        #             print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-        #             time.sleep(1)
-        #             return a
-        #         except Exception as ie:
-        #             exception_message: str = "A Packster-Inside Exception occurred.\n"
-        #             exception_traceback: str = traceback.format_exc()
-        #             exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-        #             sys.stderr.write(exception_message)
-        #             sys.stderr.flush()
-        #             # raise ValueError(exception_message) from ie
-        #         if idx >= max_tries - 1:
-        #             raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
-        #         time.sleep(1)
+        async def packster() -> str:
+            # time.sleep(1)
+            # TODO: <Alex>ALEX</Alex>
+            # try:
+            #     a: str = await carton.pack(
+            #         input_model_path,
+            #         runner_name="torchscript",
+            #         # Any 2.x.x version is okay
+            #         # TODO: improve this
+            #         required_framework_version="=2",
+            #         model_name=carton_model_name,
+            #         inputs=_get_input_spec(model),
+            #         outputs=_get_output_spec(model),
+            #     )
+            #     time.sleep(1)
+            #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+            #     time.sleep(1)
+            #     return a
+            # except Exception as ie:
+            #     exception_message: str = "A Packster-Inside Exception occurred.\n"
+            #     exception_traceback: str = traceback.format_exc()
+            #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+            #     sys.stderr.write(exception_message)
+            #     sys.stderr.flush()
+            #     raise ValueError(exception_message) from ie
+            # TODO: <Alex>ALEX</Alex>
+            # TODO: <Alex>ALEX</Alex>
+            # max_tries: int = 5
+            max_tries: int = 1
+            idx: int
+            em: str = ""
+            for idx in range(max_tries):
+                print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
+                time.sleep(1)
+                try:
+                    a: str = await carton.pack(
+                        input_model_path,
+                        runner_name="torchscript",
+                        # Any 2.x.x version is okay
+                        # TODO: improve this
+                        required_framework_version="=2",
+                        model_name=carton_model_name,
+                        inputs=_get_input_spec(model),
+                        outputs=_get_output_spec(model),
+                    )
+                    # time.sleep(1)
+                    print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
+                    # time.sleep(1)
+                    return a
+                except Exception as ie:
+                    exception_message: str = "A Packster-Inside Exception occurred.\n"
+                    exception_traceback: str = traceback.format_exc()
+                    exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
+                    sys.stderr.write(exception_message)
+                    sys.stderr.flush()
+                    em = exception_message
+                    # raise ValueError(exception_message) from ie
+                if idx >= max_tries - 1:
+                    # raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
+                    raise ValueError(em)
+                # time.sleep(1)
+
         # TODO: <Alex>ALEX</Alex>
 
         # TODO: <Alex>ALEX</Alex>

From aa6f879181f3ab88d66266ebf54c823cc1f842cf Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Sun, 10 Mar 2024 22:38:30 -0700
Subject: [PATCH 26/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 2daebb55a67..21471af6ca1 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -133,7 +133,7 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
 
         # TODO: <Alex>ALEX</Alex>
         # TODO: <Alex>ALEX</Alex>
-        async def packster() -> str:
+        async def pack() -> str:
             # time.sleep(1)
             # TODO: <Alex>ALEX</Alex>
             # try:

From 57bd646822512981365a7b2323a4abc2020e2421 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:29:05 -0700
Subject: [PATCH 27/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 5 +++++
 ludwig/utils/carton_utils.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f543293ad5b..5be57f91db4 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -60,6 +60,11 @@ jobs:
         run: |
           brew install libuv
 
+      - name: Debug out of space
+        run: |
+          du -h -d 1 ~
+          df -h
+
       - name: Install dependencies
         run: |
           python --version
diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 21471af6ca1..ed5e787bfca 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -169,7 +169,7 @@ async def pack() -> str:
                 time.sleep(1)
                 try:
                     a: str = await carton.pack(
-                        input_model_path,
+                        path=input_model_path,
                         runner_name="torchscript",
                         # Any 2.x.x version is okay
                         # TODO: improve this

From 71ad3f9ad91698df1c40b0ecf57ff91c9db91b14 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:47:40 -0700
Subject: [PATCH 28/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 5be57f91db4..f93d1f20aa2 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -60,11 +60,6 @@ jobs:
         run: |
           brew install libuv
 
-      - name: Debug out of space
-        run: |
-          du -h -d 1 ~
-          df -h
-
       - name: Install dependencies
         run: |
           python --version
@@ -80,6 +75,11 @@ jobs:
           pip list
         shell: bash
 
+      - name: Debug out of space
+        run: |
+          du -h -d 1 ~
+          df -h
+
       - name: Integration Tests
         run: |
           RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests

From 23cacf69f3c443cd8f0c1b9b50d04b4a3fa73ef5 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:57:26 -0700
Subject: [PATCH 29/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index ed5e787bfca..3f16f8feea4 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -192,7 +192,7 @@ async def pack() -> str:
                     # raise ValueError(exception_message) from ie
                 if idx >= max_tries - 1:
                     # raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
-                    raise ValueError(em)
+                    raise ValueError(em) from ie
                 # time.sleep(1)
 
         # TODO: <Alex>ALEX</Alex>

From fea546fe10b6fe1375cf9928497e7bf7a8d2e3f1 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Mon, 11 Mar 2024 18:27:45 -0700
Subject: [PATCH 30/36] Troubleshooting test failures.

---
 ludwig/utils/carton_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 3f16f8feea4..3586b0bd5a0 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -164,6 +164,7 @@ async def pack() -> str:
             max_tries: int = 1
             idx: int
             em: str = ""
+            error: Exception | None = None
             for idx in range(max_tries):
                 print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
                 time.sleep(1)
@@ -189,10 +190,11 @@ async def pack() -> str:
                     sys.stderr.write(exception_message)
                     sys.stderr.flush()
                     em = exception_message
+                    error = ie
                     # raise ValueError(exception_message) from ie
                 if idx >= max_tries - 1:
                     # raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
-                    raise ValueError(em) from ie
+                    raise ValueError(em) from error
                 # time.sleep(1)
 
         # TODO: <Alex>ALEX</Alex>

From a04219c4b87a4f258361e615f5e9c0944554a281 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Mon, 11 Mar 2024 18:40:33 -0700
Subject: [PATCH 31/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f93d1f20aa2..4001de00cc5 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -75,15 +75,22 @@ jobs:
           pip list
         shell: bash
 
-      - name: Debug out of space
+      - name: Debug out of space -- A
         run: |
           du -h -d 1 ~
           df -h
+          du -s /tmp
 
       - name: Integration Tests
         run: |
           RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
 
+      - name: Debug out of space -- B
+        run: |
+          du -h -d 1 ~
+          df -h
+          du -s /tmp
+
   event_file:
     name: "Event File"
     runs-on: ubuntu-latest

From 1c6553217e480e142d17df0a87948b7ee36d3708 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Tue, 12 Mar 2024 00:14:32 -0700
Subject: [PATCH 32/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 4001de00cc5..f222d8098eb 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -79,7 +79,17 @@ jobs:
         run: |
           du -h -d 1 ~
           df -h
-          du -s /tmp
+          # du -s /tmp
+
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          swap-storage: true
 
       - name: Integration Tests
         run: |
@@ -89,7 +99,7 @@ jobs:
         run: |
           du -h -d 1 ~
           df -h
-          du -s /tmp
+          # du -s /tmp
 
   event_file:
     name: "Event File"

From 265a9a06eb276d62dc4c2b7287cc462ed360be94 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Tue, 12 Mar 2024 00:30:05 -0700
Subject: [PATCH 33/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f222d8098eb..d83af12cf26 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -75,11 +75,11 @@ jobs:
           pip list
         shell: bash
 
-      - name: Debug out of space -- A
-        run: |
-          du -h -d 1 ~
-          df -h
-          # du -s /tmp
+      # - name: Debug out of space -- A
+      #   run: |
+      #     du -h -d 1 ~
+      #     df -h
+      #     # du -s /tmp
 
       - name: Free Disk Space (Ubuntu)
         uses: jlumbroso/free-disk-space@main
@@ -89,17 +89,22 @@ jobs:
           dotnet: true
           haskell: true
           large-packages: false
+          docker-images: true
           swap-storage: true
 
-      - name: Integration Tests
+      - name: Clean out /tmp directory
         run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
+          rm -rf /tmp/*
 
-      - name: Debug out of space -- B
+      - name:  #Integration Tests
         run: |
-          du -h -d 1 ~
-          df -h
-          # du -s /tmp
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
+
+      # - name: Debug out of space -- B
+      #   run: |
+      #     du -h -d 1 ~
+      #     df -h
+      #     # du -s /tmp
 
   event_file:
     name: "Event File"

From c9e9d3292829a7ccd1149d602159419f6b0f9a80 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Tue, 12 Mar 2024 00:39:04 -0700
Subject: [PATCH 34/36] Troubleshooting test failures.

---
 .github/workflows/pytest.yml              |  2 +-
 tests/integration_tests/test_f_control.py | 60 ++++++++++++-----------
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index d83af12cf26..491ce2a9fd1 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -94,7 +94,7 @@ jobs:
 
       - name: Clean out /tmp directory
         run: |
-          rm -rf /tmp/*
+          sudo rm -rf /tmp/*
 
       - name:  #Integration Tests
         run: |
diff --git a/tests/integration_tests/test_f_control.py b/tests/integration_tests/test_f_control.py
index 8724732fc8f..d8cf5e2deb4 100644
--- a/tests/integration_tests/test_f_control.py
+++ b/tests/integration_tests/test_f_control.py
@@ -180,33 +180,35 @@ async def infer(inputs):
 
 
 # TODO: <Alex>ALEX</Alex>
-# # TODO: <Alex>ALEX</Alex>
-# @pytest.mark.integration_tests_x
-# # TODO: <Alex>ALEX</Alex>
-# @pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
-# def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
-#     input_features = [
-#         image_feature(
-#             os.path.join(tmpdir, "generated_output"),
-#             preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
-#             encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
-#         )
-#     ]
-#     output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
-
-#     data_csv = generate_data(
-#         input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
-#     )
-
-#     config = {
-#         INPUT_FEATURES: input_features,
-#         OUTPUT_FEATURES: output_features,
-#         TRAINER: {"train_steps": 1},
-#     }
-
-#     model = LudwigModel(config)
-
-#     # Failure happens post preprocessing but before training during the ECD model creation phase
-#     # so make sure the model can be created properly and training can proceed
-#     model.train(dataset=data_csv)
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.integration_tests_x
+# TODO: <Alex>ALEX</Alex>
+@pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
+def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
+    input_features = [
+        image_feature(
+            os.path.join(tmpdir, "generated_output"),
+            preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
+            encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
+        )
+    ]
+    output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
+
+    data_csv = generate_data(
+        input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
+    )
+
+    config = {
+        INPUT_FEATURES: input_features,
+        OUTPUT_FEATURES: output_features,
+        TRAINER: {"train_steps": 1},
+    }
+
+    model = LudwigModel(config)
+
+    # Failure happens post preprocessing but before training during the ECD model creation phase
+    # so make sure the model can be created properly and training can proceed
+    model.train(dataset=data_csv)
+
+
 # TODO: <Alex>ALEX</Alex>

From d1f7592655ed82c635888059f7d4152e8f8b8101 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:38:06 -0700
Subject: [PATCH 35/36] Making carton utils more robust.

---
 .pre-commit-config.yaml      |  18 ++---
 ludwig/utils/carton_utils.py | 135 +++++++----------------------------
 pytest.ini                   |  14 +++-
 tests/conftest.py            |   3 -
 4 files changed, 47 insertions(+), 123 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 27fd6a51fde..1c6390db514 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,15 +38,15 @@ repos:
     hooks:
       - id: docformatter
         args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120]
-  #- repo: https://github.com/PyCQA/isort
-  #  rev: 5.12.0
-  #  hooks:
-  #    - id: isort
-  #      name: Format imports
-  #- repo: https://github.com/pycqa/flake8
-  #  rev: 6.0.0
-  #  hooks:
-  #    - id: flake8
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: Format imports
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
   - repo: https://github.com/psf/black
     rev: 23.3.0
     hooks:
diff --git a/ludwig/utils/carton_utils.py b/ludwig/utils/carton_utils.py
index 3586b0bd5a0..d03d0a4cd92 100644
--- a/ludwig/utils/carton_utils.py
+++ b/ludwig/utils/carton_utils.py
@@ -3,7 +3,9 @@
 import logging
 import os
 import shutil
+import sys
 import tempfile
+import traceback
 from typing import Any, Dict, List
 
 import torch
@@ -98,7 +100,6 @@ def _get_output_spec(model: LudwigModel) -> List[Dict[str, Any]]:
 
 @DeveloperAPI
 def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwig_model"):
-    print(f"\n[ALEX_TEST] [WOUTPUT] CARTON_PATH:\n{carton_path} ; TYPE: {str(type(carton_path))}")
     try:
         import cartonml as carton
     except ImportError:
@@ -106,129 +107,43 @@ def export_carton(model: LudwigModel, carton_path: str, carton_model_name="ludwi
 
     # Generate a torchscript model
     model_ts = generate_carton_torchscript(model)
-    print(f"\n[ALEX_TEST] [WOUTPUT] MODEL_TORCH_SCRIPT:\n{model_ts} ; TYPE: {str(type(model_ts))}")
 
     with tempfile.TemporaryDirectory() as tmpdir:
-        print(f"\n[ALEX_TEST] [WOUTPUT] TMPDIR:\n{tmpdir} ; TYPE: {str(type(tmpdir))}")
         # Save the model to a temp dir
         input_model_path: str = os.path.join(tmpdir, "model.pt")
         torch.jit.save(model_ts, input_model_path)
-        print(f"\n[ALEX_TEST] [WOUTPUT] INPUT_MODEL_PATH:\n{input_model_path} ; TYPE: {str(type(input_model_path))}")
 
         # carton.pack is an async function so we run it and wait until it's complete
         # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
         # in another function
-        # TODO: <Alex>ALEX</Alex>
-        # async def pack():
-        #     return await carton.pack(
-        #         path=input_model_path,
-        #         runner_name="torchscript",
-        #         # Any 2.x.x version is okay
-        #         # TODO: improve this
-        #         required_framework_version="=2.0",
-        #         model_name=carton_model_name,
-        #         inputs=_get_input_spec(model),
-        #         outputs=_get_output_spec(model),
-        #     )
-
-        # TODO: <Alex>ALEX</Alex>
-        # TODO: <Alex>ALEX</Alex>
         async def pack() -> str:
-            # time.sleep(1)
-            # TODO: <Alex>ALEX</Alex>
-            # try:
-            #     a: str = await carton.pack(
-            #         input_model_path,
-            #         runner_name="torchscript",
-            #         # Any 2.x.x version is okay
-            #         # TODO: improve this
-            #         required_framework_version="=2",
-            #         model_name=carton_model_name,
-            #         inputs=_get_input_spec(model),
-            #         outputs=_get_output_spec(model),
-            #     )
-            #     time.sleep(1)
-            #     print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-            #     time.sleep(1)
-            #     return a
-            # except Exception as ie:
-            #     exception_message: str = "A Packster-Inside Exception occurred.\n"
-            #     exception_traceback: str = traceback.format_exc()
-            #     exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-            #     sys.stderr.write(exception_message)
-            #     sys.stderr.flush()
-            #     raise ValueError(exception_message) from ie
-            # TODO: <Alex>ALEX</Alex>
-            # TODO: <Alex>ALEX</Alex>
-            # max_tries: int = 5
-            max_tries: int = 1
-            idx: int
-            em: str = ""
-            error: Exception | None = None
-            for idx in range(max_tries):
-                print(f"\n[ALEX_TEST] [WOUTPUT] TRYING_IDX:\n{idx} ; TYPE: {str(type(idx))}")
-                time.sleep(1)
-                try:
-                    a: str = await carton.pack(
-                        path=input_model_path,
-                        runner_name="torchscript",
-                        # Any 2.x.x version is okay
-                        # TODO: improve this
-                        required_framework_version="=2",
-                        model_name=carton_model_name,
-                        inputs=_get_input_spec(model),
-                        outputs=_get_output_spec(model),
-                    )
-                    # time.sleep(1)
-                    print(f"\n[ALEX_TEST] [WOUTPUT] WOUTPUT:\n{a} ; TYPE: {str(type(a))}")
-                    # time.sleep(1)
-                    return a
-                except Exception as ie:
-                    exception_message: str = "A Packster-Inside Exception occurred.\n"
-                    exception_traceback: str = traceback.format_exc()
-                    exception_message += f'{type(ie).__name__}: "{str(ie)}".  Traceback: "{exception_traceback}".'
-                    sys.stderr.write(exception_message)
-                    sys.stderr.flush()
-                    em = exception_message
-                    error = ie
-                    # raise ValueError(exception_message) from ie
-                if idx >= max_tries - 1:
-                    # raise ValueError("THINGS ENDED VERY BADLY!!!!!!!!!!!!!")
-                    raise ValueError(em) from error
-                # time.sleep(1)
-
-        # TODO: <Alex>ALEX</Alex>
-
-        # TODO: <Alex>ALEX</Alex>
-
-        # TODO: <Alex>ALEX</Alex>
-        loop = asyncio.get_event_loop()
-        print(f"\n[ALEX_TEST] [WOUTPUT] LOOP:\n{loop} ; TYPE: {str(type(loop))}")
-        # tmp_out_path = loop.run_until_complete(pack())
-        # TODO: <Alex>ALEX</Alex>
-        # TODO: <Alex>ALEX</Alex>
-        import time
-        import sys
-        import traceback
-
-        tmp_out_path: str = None
+            try:
+                return await carton.pack(
+                    path=input_model_path,
+                    runner_name="torchscript",
+                    # Any 2.x.x version is okay
+                    # TODO: improve this
+                    required_framework_version="=2",
+                    model_name=carton_model_name,
+                    inputs=_get_input_spec(model),
+                    outputs=_get_output_spec(model),
+                )
+            except Exception as e:
+                exception_message: str = 'An Exception inside "pack()" occurred.\n'
+                exception_traceback: str = traceback.format_exc()
+                exception_message += f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
+                sys.stderr.write(exception_message)
+                sys.stderr.flush()
+                raise ValueError(exception_message) from e  # Re-raise error for calling function to handle.
+
         try:
-            # TODO: <Alex>ALEX</Alex>
-            tmp_out_path = loop.run_until_complete(pack())
-            # TODO: <Alex>ALEX</Alex>
-            # TODO: <Alex>ALEX</Alex>
-            # time.sleep(1)
-            # tmp_out_path: str = loop.run_until_complete(packster())
-            # TODO: <Alex>ALEX</Alex>
+            tmp_out_path: str = asyncio.get_event_loop().run_until_complete(pack())
+            # Move it to the output path
+            shutil.move(tmp_out_path, carton_path)
         except Exception as e:
-            exception_message: str = "A general Exception occurred.\n"
+            exception_message: str = 'An Exception inside "export_carton()" occurred.\n'
             exception_traceback: str = traceback.format_exc()
             exception_message += f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
             sys.stderr.write(exception_message)
             sys.stderr.flush()
             raise SystemExit(exception_message) from e  # Make sure error is fatal.
-        # TODO: <Alex>ALEX</Alex>
-
-        # Move it to the output path
-        # time.sleep(1)
-        shutil.move(tmp_out_path, carton_path)
diff --git a/pytest.ini b/pytest.ini
index a82421e060c..539a53b1674 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,17 @@
 [pytest]
 markers =
-    integration_tests_x: mark a test to be run as part of integration tests, group X.
+    benchmark: mark a test as a benchmarking test.
+    distributed: mark a test as a distributed test.
+    filesystem: mark to test operating system systems.
+    slow: mark test as slow.
+    combinatorial: mark a test as combinatorial.
+    horovod: mark a test as a Horovod test.
+    llm: mark a test as an LLM test.
+    integration_tests_a: mark a test to be run as part of integration tests, group A.
+    integration_tests_b: mark a test to be run as part of integration tests, group B.
+    integration_tests_c: mark a test to be run as part of integration tests, group C.
+    integration_tests_d: mark a test to be run as part of integration tests, group D.
+    integration_tests_e: mark a test to be run as part of integration tests, group E.
+    integration_tests_f: mark a test to be run as part of integration tests, group F.
 filterwarnings =
     ignore::DeprecationWarning
diff --git a/tests/conftest.py b/tests/conftest.py
index 3b2abe99622..9dae92e2e65 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,9 +44,6 @@
     "integration_tests_c",
     "integration_tests_d",
     "integration_tests_e",
-    # TODO: <Alex>ALEX</Alex>
-    "integration_tests_x",
-    # TODO: <Alex>ALEX</Alex>
 }
 
 

From 9e5b5cdd39d207588422e16acb23abb5207d3985 Mon Sep 17 00:00:00 2001
From: Alex Sherstinsky <alexsherstinsky@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:45:53 -0700
Subject: [PATCH 36/36] Adding storage cleaning to GitHub Actions for
 integration tests.

---
 .github/workflows/pytest.yml              | 500 +++++++++++++++++++++-
 tests/integration_tests/test_f_control.py | 214 ---------
 2 files changed, 486 insertions(+), 228 deletions(-)
 delete mode 100644 tests/integration_tests/test_f_control.py

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 491ce2a9fd1..4f90d4b138c 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -16,6 +16,187 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  pytest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.8", "3.9", "3.10"]
+        test-markers: ["not distributed", "distributed"]
+        include:
+          - python-version: "3.8"
+            pytorch-version: 2.0.0
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+          - python-version: "3.9"
+            pytorch-version: 2.1.1
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+          - python-version: "3.10"
+            # pytorch-version: nightly
+            pytorch-version: 2.2.1
+            torchscript-version: 1.10.2
+            ray-version: 2.3.1
+    env:
+      PYTORCH: ${{ matrix.pytorch-version }}
+      MARKERS: ${{ matrix.test-markers }}
+      NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
+      NEUROPOD_VERISON: "0.3.0-rc6"
+      TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
+      RAY_VERSION: ${{ matrix.ray-version }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
+      KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+      KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+      IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
+
+    name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
+    services:
+      minio:
+        image: fclairamb/minio-github-actions
+        env:
+          MINIO_ACCESS_KEY: minio
+          MINIO_SECRET_KEY: minio123
+        ports:
+          - 9000:9000
+
+    timeout-minutes: 150
+    steps:
+      - name: Setup ludwigai/ludwig-ray container for local testing with act.
+        if: ${{ env.ACT }}
+        run: |
+          curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
+          sudo apt-get install -y nodejs
+          sudo mkdir -p /opt/hostedtoolcache/
+          sudo chmod 777 -R /opt/hostedtoolcache/
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: pip cache
+        if: ${{ !env.ACT }}
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
+
+      - name: Debug out of space
+        run: |
+          du -h -d 1 ~
+          df -h
+
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+          cmake --version
+
+          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+          cat requirements_distributed.txt | sed '/^ray[\[]/d'
+
+          if [ "$MARKERS" != "distributed" ]; then
+            # Skip distributed and hyperopt requirements to test optional imports
+            echo > requirements-temp && mv requirements-temp requirements_distributed.txt
+            echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
+
+            # Skip distributed tree requirement (lightgbm-ray)
+            cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
+          else
+            if [ "$RAY_VERSION" == "nightly" ]; then
+              # NOTE: hardcoded for python 3.10 on Linux
+              echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
+            else
+              echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
+            fi
+          fi
+
+          if [ "$PYTORCH" == "nightly" ]; then
+            extra_index_url=https://download.pytorch.org/whl/nightly/cpu
+            pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
+
+          else
+            extra_index_url=https://download.pytorch.org/whl/cpu
+            pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
+          fi
+
+          pip install '.[test]' --extra-index-url $extra_index_url
+          pip list
+
+          if [ "$PYTORCH" == "nightly" ]; then
+            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
+          else
+            python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
+          fi
+
+          if [ "$MARKERS" == "distributed" ]; then
+            python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
+          else
+            python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
+          fi
+        shell: bash
+
+      - name: Install Neuropod backend
+        run: |
+          sudo mkdir -p "$NEUROPOD_BASE_DIR"
+          curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
+        shell: bash
+
+      - name: Unit Tests
+        run: |
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
+
+      - name: Regression Tests
+        run: |
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
+
+      # Skip Horovod and replace with DDP.
+      # https://github.com/ludwig-ai/ludwig/issues/3468
+      # - name: Install Horovod if necessary
+      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+      #   env:
+      #     HOROVOD_WITH_PYTORCH: 1
+      #     HOROVOD_WITHOUT_MPI: 1
+      #     HOROVOD_WITHOUT_TENSORFLOW: 1
+      #     HOROVOD_WITHOUT_MXNET: 1
+      #   run: |
+      #     pip install -r requirements_extra.txt
+      #     HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
+      #     if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
+      #       pip uninstall -y horovod
+      #       pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
+      #     fi
+      #     horovodrun --check-build
+      #   shell: bash
+
+      # Skip Horovod tests and replace with DDP.
+      # https://github.com/ludwig-ai/ludwig/issues/3468
+      # - name: Horovod Tests
+      #   if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
+      #   run: |
+      #     RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
+
+      - name: Upload Unit Test Results
+        if: ${{ always() && !env.ACT }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
+          path: pytest.xml
+
   integration-tests:
     name: ${{ matrix.test-markers }}
     runs-on: ubuntu-latest
@@ -23,7 +204,12 @@ jobs:
       fail-fast: false
       matrix:
         test-markers:
-          - "integration_tests_x"
+          - "integration_tests_a"
+          - "integration_tests_b"
+          - "integration_tests_c"
+          - "integration_tests_d"
+          - "integration_tests_e"
+          - "integration_tests_f"
 
     env:
       AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
@@ -70,17 +256,11 @@ jobs:
           cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
           cat requirements_distributed.txt | sed '/^ray[\[]/d'
           pip install torch==2.0.0 torchtext torchvision torchaudio
-          pip install ray==2.3.1
+          pip install ray==2.3.0
           pip install '.[test]'
           pip list
         shell: bash
 
-      # - name: Debug out of space -- A
-      #   run: |
-      #     du -h -d 1 ~
-      #     df -h
-      #     # du -s /tmp
-
       - name: Free Disk Space (Ubuntu)
         uses: jlumbroso/free-disk-space@main
         with:
@@ -96,15 +276,281 @@ jobs:
         run: |
           sudo rm -rf /tmp/*
 
-      - name:  #Integration Tests
+      - name: Integration Tests
         run: |
           RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
 
-      # - name: Debug out of space -- B
-      #   run: |
-      #     du -h -d 1 ~
-      #     df -h
-      #     # du -s /tmp
+  llm-tests:
+    name: LLM Tests
+    runs-on: ubuntu-latest
+
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+
+          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+          cat requirements_distributed.txt | sed '/^ray[\[]/d'
+          pip install torch==2.0.0 torchtext torchvision torchaudio
+          pip install ray==2.3.0
+          pip install '.[test]'
+          pip list
+        shell: bash
+
+      - name: LLM Tests
+        run: |
+          pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
+
+  combinatorial-tests:
+    name: Combinatorial Tests
+    runs-on: ubuntu-latest
+
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+          pip install '.[test]'
+          pip list
+        shell: bash
+
+      - name: Testing combinatorial config generation code
+        run: |
+          pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
+
+      - name: Combinatorial Tests
+        run: |
+          pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
+
+  test-minimal-install:
+    name: Test Minimal Install
+    runs-on: ubuntu-latest
+
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get update && sudo apt-get install -y cmake libsndfile1
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+          pip install torch==2.0.0 torchtext
+          pip install ray==2.3.0
+          pip install '.'
+          pip list
+        shell: bash
+      - name: Check Install
+        run: |
+          ludwig check_install
+        shell: bash
+
+      - name: Test Getting Started
+        run: |
+          cd examples/getting_started && sh ./run.sh
+        shell: bash
+
+  # start-runner:
+  #   name: Start self-hosted EC2 runner
+  #   if: >
+  #     always() && needs.pytest.result != 'failure' && (
+  #     github.event_name == 'schedule' && github.repository == 'ludwig-ai/ludwig' ||
+  #     github.event_name == 'push' && github.repository == 'ludwig-ai/ludwig' ||
+  #     github.event_name == 'pull_request' && github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && !github.event.pull_request.head.repo.fork)
+  #   needs: pytest
+  #   runs-on: ubuntu-latest
+  #   outputs:
+  #     label: ${{ steps.start-ec2-runner.outputs.label }}
+  #     ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
+
+  #   steps:
+  #     - name: Configure AWS credentials
+  #       uses: aws-actions/configure-aws-credentials@v1
+  #       with:
+  #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  #         aws-region: ${{ secrets.AWS_REGION }}
+
+  #     - name: Start EC2 runner
+  #       id: start-ec2-runner
+  #       uses: machulav/ec2-github-runner@v2.3.2
+  #       with:
+  #         mode: start
+  #         github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+  #         ec2-image-id: ami-0759580dedc953d1f
+  #         ec2-instance-type: g4dn.xlarge
+  #         subnet-id: subnet-0983be43
+  #         security-group-id: sg-4cba0d08
+  #         aws-resource-tags: >
+  #           [
+  #             {"Key": "Name", "Value": "ludwig-github-${{ github.head_ref || github.sha }}"},
+  #             {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
+  #             {"Key": "GitHubHeadRef", "Value": "${{ github.head_ref }}"},
+  #             {"Key": "GitHubSHA", "Value": "${{ github.sha }}"}
+  #           ]
+
+  # pytest-gpu:
+  #   if: needs.start-runner.result != 'skipped'
+  #   needs: start-runner # required to start the main job when the runner is ready
+  #   runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       python-version: [3.7]
+  #       include:
+  #         - python-version: 3.7
+  #           pytorch-version: 1.10.0
+  #           torchscript-version: 1.10.2
+  #   env:
+  #     PYTORCH: ${{ matrix.pytorch-version }}
+  #     NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
+  #     NEUROPOD_VERISON: "0.3.0-rc6"
+  #     TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
+
+  #   name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, gpu
+
+  #   timeout-minutes: 70
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Set up Python ${{ matrix.python-version }}
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+
+  #     - name: Setup Linux
+  #       if: runner.os == 'linux'
+  #       run: |
+  #         sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8
+  #         cmake --version
+
+  #     - name: Install CUDA drivers
+  #       run: |
+  #         wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+  #         sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
+  #         wget https://developer.download.nvidia.com/compute/cuda/11.5.1/local_installers/cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
+  #         sudo dpkg -i cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
+  #         sudo apt-key add /var/cuda-repo-ubuntu2004-11-5-local/7fa2af80.pub
+  #         sudo apt-get update
+  #         sudo apt-get -y install cuda
+  #       shell: bash
+
+  #     - name: pip cache
+  #       uses: actions/cache@v2
+  #       with:
+  #         path: ~/.cache/pip
+  #         key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }}
+  #         restore-keys: |
+  #           ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-
+
+  #     - name: Install dependencies
+  #       env:
+  #         HOROVOD_WITH_PYTORCH: 1
+  #         HOROVOD_WITHOUT_MPI: 1
+  #         HOROVOD_WITHOUT_TENSORFLOW: 1
+  #         HOROVOD_WITHOUT_MXNET: 1
+  #       run: |
+  #         python --version
+  #         pip --version
+  #         python -m pip install -U pip
+  #         if [ $PYTORCH == "nightly" ]; then
+  #           cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
+  #           pip install --pre torch torchvision -f https://download.pytorch.org/whl/torch_stable.html
+  #         else
+  #           pip install torch==${PYTORCH}+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+  #         fi
+  #         # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
+  #         pip install dulwich==0.20.26 # workaround for `/usr/bin/ld: cannot find -lpython3.7m`
+  #         pip install '.[test]'
+  #         pip list
+  #       shell: bash
+
+  #     - name: Install Neuropod backend
+  #       run: |
+  #         sudo mkdir -p "$NEUROPOD_BASE_DIR"
+  #         curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
+  #       shell: bash
+
+  #     - name: Reinstall Horovod if necessary
+  #       env:
+  #         HOROVOD_WITH_PYTORCH: 1
+  #         HOROVOD_WITHOUT_MPI: 1
+  #         HOROVOD_WITHOUT_TENSORFLOW: 1
+  #         HOROVOD_WITHOUT_MXNET: 1
+  #       run: |
+  #         HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
+  #         if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
+  #           pip uninstall -y horovod
+  #           pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
+  #         fi
+  #         horovodrun --check-build
+  #       shell: bash
+
+  #     - name: Check CUDA is available
+  #       run: |
+  #         python -c "import torch; assert torch.cuda.is_available()"
+
+  #     - name: Tests
+  #       run: |
+  #         pytest -v --timeout 300 --durations 10 --junitxml pytest.xml tests
+
+  #     - name: Upload Unit Test Results
+  #       if: always()
+  #       uses: actions/upload-artifact@v2
+  #       with:
+  #         name: Unit Test Results (Python ${{ matrix.python-version }} gpu
+  #         path: pytest.xml
 
   event_file:
     name: "Event File"
@@ -117,3 +563,29 @@ jobs:
         with:
           name: Event File
           path: ${{ github.event_path }}
+
+  # stop-runner:
+  #   name: Stop self-hosted EC2 runner
+
+  #   # required to stop the runner even if the error happened in the previous job
+  #   if: always() && needs.start-runner.result != 'skipped'
+  #   needs:
+  #     - start-runner # required to get output from the start-runner job
+  #     - pytest-gpu # required to wait when the main job is done
+  #   runs-on: ubuntu-latest
+
+  #   steps:
+  #     - name: Configure AWS credentials
+  #       uses: aws-actions/configure-aws-credentials@v1
+  #       with:
+  #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  #         aws-region: ${{ secrets.AWS_REGION }}
+
+  #     - name: Stop EC2 runner
+  #       uses: machulav/ec2-github-runner@v2.3.1
+  #       with:
+  #         mode: stop
+  #         github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+  #         label: ${{ needs.start-runner.outputs.label }}
+  #         ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
diff --git a/tests/integration_tests/test_f_control.py b/tests/integration_tests/test_f_control.py
deleted file mode 100644
index d8cf5e2deb4..00000000000
--- a/tests/integration_tests/test_f_control.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import asyncio
-import contextlib
-import copy
-import logging
-import os
-import platform
-import random
-import string
-from typing import List, Union
-from unittest import mock
-
-import numpy as np
-import pandas as pd
-import pytest
-import torch
-from PIL import Image
-from transformers import AutoTokenizer
-
-import ludwig
-from ludwig.api import LudwigModel
-from ludwig.backend import initialize_backend
-from ludwig.callbacks import Callback
-from ludwig.constants import (
-    BASE_MODEL,
-    BATCH_SIZE,
-    COLUMN,
-    DECODER,
-    EPOCHS,
-    FULL,
-    INPUT_FEATURES,
-    MODEL_ECD,
-    MODEL_LLM,
-    MODEL_TYPE,
-    NAME,
-    OUTPUT_FEATURES,
-    PREDICTIONS,
-    PREPROCESSING,
-    PROC_COLUMN,
-    PROMPT,
-    SPLIT,
-    TRAINER,
-    TYPE,
-)
-from ludwig.data.concatenate_datasets import concatenate_df
-from ludwig.data.preprocessing import handle_features_with_prompt_config, preprocess_for_prediction
-from ludwig.schema.llms.prompt import PromptConfig
-from ludwig.schema.model_types.base import ModelConfig
-from ludwig.utils.carton_utils import export_carton
-from tests.integration_tests.utils import (
-    assert_preprocessed_dataset_shape_and_dtype_for_feature,
-    audio_feature,
-    binary_feature,
-    category_feature,
-    generate_data,
-    generate_data_as_dataframe,
-    image_feature,
-    LocalTestBackend,
-    number_feature,
-    sequence_feature,
-    text_feature,
-)
-
-NUM_EXAMPLES = 20
-
-# TODO: <Alex>ALEX</Alex>
-# pytestmark = pytest.mark.integration_tests_x
-# TODO: <Alex>ALEX</Alex>
-
-
-# TODO: <Alex>ALEX</Alex>
-@pytest.mark.integration_tests_x
-# TODO: <Alex>ALEX</Alex>
-@pytest.mark.skipif(platform.system() == "Windows", reason="Carton is not supported on Windows")
-def test_carton_torchscript(csv_filename, tmpdir):
-    data_csv_path = os.path.join(tmpdir, csv_filename)
-
-    # Configure features to be tested:
-    bin_str_feature = binary_feature()
-    input_features = [
-        bin_str_feature,
-        # binary_feature(),
-        number_feature(),
-        category_feature(encoder={"vocab_size": 3}),
-        # TODO: future support
-        # sequence_feature(vocab_size=3),
-        # text_feature(vocab_size=3),
-        # vector_feature(),
-        # image_feature(image_dest_folder),
-        # audio_feature(audio_dest_folder),
-        # timeseries_feature(),
-        # date_feature(),
-        # h3_feature(),
-        # set_feature(vocab_size=3),
-        # bag_feature(vocab_size=3),
-    ]
-    output_features = [
-        bin_str_feature,
-        # binary_feature(),
-        number_feature(),
-        category_feature(decoder={"vocab_size": 3}, output_feature=True),
-        # TODO: future support
-        # sequence_feature(vocab_size=3),
-        # text_feature(vocab_size=3),
-        # set_feature(vocab_size=3),
-        # vector_feature()
-    ]
-    backend = LocalTestBackend()
-    config = {
-        "input_features": input_features,
-        "output_features": output_features,
-        TRAINER: {"epochs": 2, BATCH_SIZE: 128},
-    }
-
-    # Generate training data
-    training_data_csv_path = generate_data(input_features, output_features, data_csv_path)
-
-    # Convert bool values to strings, e.g., {'Yes', 'No'}
-    df = pd.read_csv(training_data_csv_path)
-    false_value, true_value = "No", "Yes"
-    df[bin_str_feature[NAME]] = df[bin_str_feature[NAME]].map(lambda x: true_value if x else false_value)
-    df.to_csv(training_data_csv_path)
-
-    # Train Ludwig (Pythonic) model:
-    ludwig_model = LudwigModel(config, backend=backend)
-    ludwig_model.train(
-        dataset=training_data_csv_path,
-        skip_save_training_description=True,
-        skip_save_training_statistics=True,
-        skip_save_model=True,
-        skip_save_progress=True,
-        skip_save_log=True,
-        skip_save_processed_input=True,
-    )
-
-    # Obtain predictions from Python model
-    preds_dict, _ = ludwig_model.predict(dataset=training_data_csv_path, return_type=dict)
-
-    # Create graph inference model (Torchscript) from trained Ludwig model.
-    carton_path = os.path.join(tmpdir, "carton")
-    export_carton(ludwig_model, carton_path)
-
-    import cartonml as carton
-
-    # Load the carton model
-    # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
-    # in another function
-    async def load():
-        return await carton.load(carton_path)
-
-    loop = asyncio.get_event_loop()
-    carton_model = loop.run_until_complete(load())
-
-    def to_input(s: pd.Series) -> Union[List[str], torch.Tensor]:
-        if s.dtype == "object":
-            return np.array(s.to_list())
-        return s.to_numpy().astype(np.float32)
-
-    df = pd.read_csv(training_data_csv_path)
-    inputs = {name: to_input(df[feature.column]) for name, feature in ludwig_model.model.input_features.items()}
-
-    # See https://pyo3.rs/v0.20.0/ecosystem/async-await#a-note-about-asynciorun for why we wrap it
-    # in another function
-    async def infer(inputs):
-        return await carton_model.infer(inputs)
-
-    outputs = loop.run_until_complete(infer(inputs))
-
-    # Compare results from Python trained model against Carton
-    assert len(preds_dict) == len(outputs)
-    for feature_name, feature_outputs_expected in preds_dict.items():
-        assert feature_name in outputs
-
-        output_values_expected = feature_outputs_expected[PREDICTIONS]
-        output_values = outputs[feature_name]
-        if output_values.dtype.type in {np.string_, np.str_}:
-            # Strings should match exactly
-            assert np.all(output_values == output_values_expected), f"feature: {feature_name}, output: predictions"
-        else:
-            assert np.allclose(output_values, output_values_expected), f"feature: {feature_name}, output: predictions"
-
-
-# TODO: <Alex>ALEX</Alex>
-# TODO: <Alex>ALEX</Alex>
-@pytest.mark.integration_tests_x
-# TODO: <Alex>ALEX</Alex>
-@pytest.mark.parametrize("use_pretrained", [False, True], ids=["false", "true"])
-def test_vit_encoder_different_dimension_image(tmpdir, csv_filename, use_pretrained: bool):
-    input_features = [
-        image_feature(
-            os.path.join(tmpdir, "generated_output"),
-            preprocessing={"in_memory": True, "height": 224, "width": 206, "num_channels": 3},
-            encoder={TYPE: "_vit_legacy", "use_pretrained": use_pretrained},
-        )
-    ]
-    output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")]
-
-    data_csv = generate_data(
-        input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES
-    )
-
-    config = {
-        INPUT_FEATURES: input_features,
-        OUTPUT_FEATURES: output_features,
-        TRAINER: {"train_steps": 1},
-    }
-
-    model = LudwigModel(config)
-
-    # Failure happens post preprocessing but before training during the ECD model creation phase
-    # so make sure the model can be created properly and training can proceed
-    model.train(dataset=data_csv)
-
-
-# TODO: <Alex>ALEX</Alex>