Skip to content

Fix initialized weights resetting in Fabric.setup() when using FSDP #8499

Fix initialized weights resetting in Fabric.setup() when using FSDP

Fix initialized weights resetting in Fabric.setup() when using FSDP #8499

Workflow file for this run

name: Test Fabric
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on:
push:
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- ".actions/*"
- "requirements/ci.txt"
- "requirements/fabric/**"
- "src/lightning/fabric/**"
- "src/lightning_fabric/*"
- "tests/tests_fabric/**"
- "pyproject.toml" # includes pytest config
- ".github/workflows/ci-tests-fabric.yml"
- "!requirements/*/docs.txt"
- "!*.md"
- "!**/*.md"
schedule:
# At the end of every day
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
defaults:
run:
shell: bash
jobs:
fabric-cpu:
runs-on: ${{ matrix.os }}
if: github.event.pull_request.draft == false
strategy:
fail-fast: false
matrix:
include:
- { os: "macOS-11", pkg-name: "lightning", python-version: "3.10", pytorch-version: "1.13" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "1.13" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "1.13" }
# only run PyTorch latest
- { os: "macOS-11", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.1" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.1" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.1" }
- { os: "macOS-11", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
- { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" }
# only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues
- { os: "macOS-12", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.0" }
- { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.0" }
- { os: "windows-2022", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.0" }
- { os: "macOS-12", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.1" }
- { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.1" }
- { os: "windows-2022", pkg-name: "fabric", python-version: "3.11", pytorch-version: "2.1" }
# "oldest" versions tests, only on minimum Python
- {
os: "macOS-11",
pkg-name: "lightning",
python-version: "3.8",
pytorch-version: "1.13",
requires: "oldest",
}
- {
os: "ubuntu-20.04",
pkg-name: "lightning",
python-version: "3.8",
pytorch-version: "1.13",
requires: "oldest",
}
- {
os: "windows-2022",
pkg-name: "lightning",
python-version: "3.8",
pytorch-version: "1.13",
requires: "oldest",
}
# "fabric" installs the standalone package
- { os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.13" }
- { os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.13" }
- { os: "windows-2022", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.13" }
timeout-minutes: 25 # because of building grpcio on Mac
env:
PACKAGE_NAME: ${{ matrix.pkg-name }}
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
PYPI_CACHE_DIR: "_pip-wheels"
TORCH_URL_STABLE: "https://download.pytorch.org/whl/cpu/torch_stable.html"
TORCH_URL_TEST: "https://download.pytorch.org/whl/test/cpu/torch_test.html"
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: basic setup
run: pip install -q -r .actions/requirements.txt
- name: Set min. dependencies
if: ${{ matrix.requires == 'oldest' }}
run: python .actions/assistant.py replace_oldest_ver
- name: Adjust PyTorch versions in requirements files
if: ${{ matrix.requires != 'oldest' }}
run: |
pip install -q -r requirements/ci.txt
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
for fpath in `ls requirements/**/*.txt`; do \
python ./adjust-torch-versions.py $fpath ${{ matrix.pytorch-version }}; \
done
- name: pip wheels cache
uses: actions/cache/restore@v4
with:
path: ${{ env.PYPI_CACHE_DIR }}
key: pypi_wheels
- run: |
mkdir -p $PYPI_CACHE_DIR
ls -lh $PYPI_CACHE_DIR
- name: Env. variables
run: |
# Switch PyTorch URL
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.pytorch-version }}' == '2.2' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
# Switch coverage scope
python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'lightning_fabric'))" >> $GITHUB_ENV
# if you install mono-package set dependency only for this subpackage
python -c "print('EXTRA_PREFIX=' + str('' if '${{matrix.pkg-name}}' != 'lightning' else 'fabric-'))" >> $GITHUB_ENV
- name: Install package & dependencies
timeout-minutes: 20
run: |
pip install -e ".[${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" -U --prefer-binary \
--find-links="${TORCH_URL}" --find-links="${PYPI_CACHE_DIR}"
pip list
- name: Dump handy wheels
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
continue-on-error: true
uses: ./.github/actions/pip-wheels
with:
wheel-dir: ${{ env.PYPI_CACHE_DIR }}
torch-url: ${{ env.TORCH_URL }}
cache-key: "pypi_wheels"
- name: Adjust tests
if: ${{ matrix.pkg-name != 'lightning' }}
run: |
python .actions/assistant.py copy_replace_imports --source_dir="./tests" \
--source_import="lightning.fabric" --target_import="lightning_fabric"
- name: Testing Warnings
working-directory: tests/tests_fabric
# needs to run outside `pytest`
run: python utilities/test_warnings.py
- name: Testing Fabric
working-directory: tests/tests_fabric
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
run: |
echo $GITHUB_RUN_ID
python -m coverage run --source ${{ env.COVERAGE_SCOPE }} \
-m pytest -v --timeout=30 --durations=50 --random-order-seed=$GITHUB_RUN_ID
- name: Statistics
if: success()
working-directory: tests/tests_fabric
run: |
coverage report
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: tests/tests_fabric/coverage.xml
flags: ${{ env.COVERAGE_SCOPE }},cpu,pytest,python${{ matrix.python-version }}
name: CPU-coverage
fail_ci_if_error: false