diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..c4aad597 Binary files /dev/null and b/.DS_Store differ diff --git a/.github/.DS_Store b/.github/.DS_Store new file mode 100644 index 00000000..1f2c43e0 Binary files /dev/null and b/.github/.DS_Store differ diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml new file mode 100644 index 00000000..3f713320 --- /dev/null +++ b/.github/workflows/doc_build.yml @@ -0,0 +1,52 @@ +name: Docs + +on: + push: + branches: + - main + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + architecture: x64 + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Dependencies + run: | + set -eux + + pip install -r docs/requirements.txt + - name: Build Sphinx Docs + working-directory: docs + run: | + set -eux + + make html + - name: Upload static files as artifact + id: deployment + uses: actions/upload-pages-artifact@v3 + with: + path: docs/build/html/ + + deploy: + runs-on: ubuntu-latest + needs: build + if: ${{ github.ref == 'refs/heads/main' }} + permissions: + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 00000000..5008ddfc Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..f68f6093 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,29 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Generate example documentation from Python files +generate-examples: + @echo "Generating example documentation..." + @cd "$(SOURCEDIR)" && python GenerateExamples.py + +# Override html target to run generate-examples first +html: generate-examples + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile generate-examples html + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..747ffb7b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..a2477095 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,13 @@ +sphinx==7.2.6 +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2 +sphinxcontrib.katex==0.9.10 +#breathe==4.34.0 # only if generating C++ +exhale==0.2.3 # only if generating C++ docs +docutils>=0.18.1,<0.21 +sphinx-design==0.6.1 +sphinxcontrib-mermaid==1.0.0 +myst-parser #==0.18.1 # if want to contribute in markdown +sphinx-gallery==0.14.0 # only if hosting interactive tutorials +sphinx-sitemap==2.7.1 +sphinxext-opengraph +nbsphinx \ No newline at end of file diff --git a/docs/source/.DS_Store b/docs/source/.DS_Store new file mode 100644 index 00000000..5008ddfc Binary files /dev/null and b/docs/source/.DS_Store differ diff --git a/docs/source/GenerateExamples.py b/docs/source/GenerateExamples.py new file mode 100644 index 00000000..0655d335 --- /dev/null +++ b/docs/source/GenerateExamples.py @@ -0,0 +1,98 @@ +import os + +# Configuration +EXAMPLES_DIR = "../../examples" # Path to examples directory +RST_DIR = "./examples" # Where to output RST files (relative to Sphinx source dir) + + +def find_python_files(directory): + """Find all Python files in the directory and its subdirectories, excluding __init__.py""" + python_files = [] + + # Walk through the directory tree + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(".py") and file != "__init__.py": + # Get the full path + full_path = os.path.join(root, file) + # Get the path relative to the examples directory + rel_path = os.path.relpath(full_path, directory) + python_files.append(rel_path) + + return python_files + + +def generate_rst_files(): + """Generate RST files for all Python examples""" + # Create the output directory if it doesn't exist + os.makedirs(RST_DIR, exist_ok=True) + + # Find all Python files + example_files = find_python_files(EXAMPLES_DIR) + + # Generate RST files for each Python file + for rel_path in example_files: + # Create subdirectories in the RST directory if needed + rel_dir = os.path.dirname(rel_path) + if rel_dir: + os.makedirs(os.path.join(RST_DIR, rel_dir), exist_ok=True) + + # Get the base name without extension + base = os.path.splitext(os.path.basename(rel_path))[0] + + # Capitalize and replace underscores with spaces for nicer titles + title = base.replace("_", " ").title() + + # Create the RST file path + if rel_dir: + rst_rel_path = os.path.join(rel_dir, f"{base}.rst") + else: + rst_rel_path = f"{base}.rst" + + rst_path = os.path.join(RST_DIR, rst_rel_path) + + # Write the RST file + with open(rst_path, "w") as f: + f.write( + f"""{title} +{'=' * len(title)} + +.. literalinclude:: {os.path.join('..', EXAMPLES_DIR, rel_path)} + :language: python + :linenos: +""" + ) + + print(f"Generated RST file for {rel_path}") + + # Generate a Python examples section in the examples.rst file + examples_rst_path = "./examples.rst" + with open(examples_rst_path, "r") as f: + content = f.read() + + # Check if the Python Examples section already exists + if "Python Examples" not in content: + # Add the Python Examples section + python_examples_section = """ +Python Examples +-------------- + +These Python scripts demonstrate how to use Monarch's APIs directly in Python code: + +""" + # Add a list of Python examples + for rel_path in example_files: + base = os.path.splitext(os.path.basename(rel_path))[0] + title = base.replace("_", " ").title() + python_examples_section += f"- **{title}**: :doc:`examples/{base}`\n" + + # Append the section to the examples.rst file + with open(examples_rst_path, "a") as f: + f.write(python_examples_section) + + print("Added Python Examples section to examples.rst") + + +if __name__ == "__main__": + generate_rst_files() + print("RST generation complete!") diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..bc0365e9 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,117 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "Monarch" +copyright = "2025" +author = "" +release = "" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx_design", + "sphinx_sitemap", + "sphinxcontrib.mermaid", + "pytorch_sphinx_theme2", + "sphinxext.opengraph", + "myst_parser", + "nbsphinx", + #'myst_nb', +] + +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +import os +import sys + +# Add the repository root to the path so Sphinx can find the notebook files +sys.path.insert(0, os.path.abspath(".")) +sys.path.insert(0, os.path.abspath("../..")) +import pytorch_sphinx_theme2 + +html_theme = "pytorch_sphinx_theme2" +html_theme_path = [pytorch_sphinx_theme2.get_html_theme_path()] + +ogp_site_url = "http://pytorch.org/monarch" +ogp_image = "https://pytorch.org/assets/images/social-share.jpg" + +html_theme_options = { + "navigation_with_keys": False, + "analytics_id": "GTM-T8XT4PS", + "logo": { + "text": "", + }, + "icon_links": [ + { + "name": "X", + "url": "https://x.com/PyTorch", + "icon": "fa-brands fa-x-twitter", + }, + { + "name": "GitHub", + "url": "https://github.com/pytorch-labs/monarch", + "icon": "fa-brands fa-github", + }, + { + "name": "Discourse", + "url": "https://dev-discuss.pytorch.org/", + "icon": "fa-brands fa-discourse", + }, + { + "name": "PyPi", + "url": "https://pypi.org/project/monarch/", + "icon": "fa-brands fa-python", + }, + ], + "use_edit_page_button": True, + "navbar_center": "navbar-nav", +} + +theme_variables = pytorch_sphinx_theme2.get_theme_variables() +templates_path = [ + "_templates", + os.path.join(os.path.dirname(pytorch_sphinx_theme2.__file__), "templates"), +] + +html_context = { + "theme_variables": theme_variables, + "display_github": True, + "github_url": "https://github.com", + "github_user": "pytorch-labs", + "github_repo": "monarch", + "feedback_url": "https://github.com/pytorch-labs/monarch", + "github_version": "main", + "doc_path": "docs/source", + "library_links": theme_variables.get("library_links", []), + "community_links": theme_variables.get("community_links", []), + "language_bindings_links": html_theme_options.get("language_bindings_links", []), +} + +# not sure if this is needed +myst_enable_extensions = [ + "colon_fence", + "deflist", + "html_image", +] + + +# The suffix(es) of source filenames. +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# Allow errors in notebook execution +nbsphinx_allow_errors = True diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 00000000..6e290267 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,29 @@ +Monarch Examples +================ + +Welcome to Monarch's examples! This section contains various examples demonstrating how to use Monarch for distributed execution in PyTorch. + +Jupyter Notebooks +---------------- + +These interactive tutorials demonstrate key features and use cases of Monarch, helping you understand how to leverage distributed execution for your PyTorch workloads. + +- **Ping Pong**: A simple demonstration of basic communication between processes in a distributed setting. This example shows how to send and receive tensors between different ranks, illustrating the fundamental building blocks of distributed computing. + +- **SPMD DDP**: An implementation of Single Program Multiple Data (SPMD) and Distributed Data Parallel (DDP) training with Monarch. This notebook shows how to scale your PyTorch models across multiple GPUs and nodes for faster training. + +Each notebook contains detailed explanations, code snippets, and comments to guide you through the implementation. + +.. toctree:: + :maxdepth: 1 + :caption: Examples Notebooks + + examples/notebooks/ping_pong + examples/notebooks/spmd_ddp + +Python Examples +-------------- + +These Python scripts demonstrate how to use Monarch's APIs directly in Python code: + +- **Grpo Actor**: :doc:`examples/grpo_actor` diff --git a/docs/source/examples/grpo_actor.rst b/docs/source/examples/grpo_actor.rst new file mode 100644 index 00000000..5b914b23 --- /dev/null +++ b/docs/source/examples/grpo_actor.rst @@ -0,0 +1,6 @@ +Grpo Actor +========== + +.. literalinclude:: ../../../examples/grpo_actor.py + :language: python + :linenos: diff --git a/docs/source/examples/notebooks/ping_pong.ipynb b/docs/source/examples/notebooks/ping_pong.ipynb new file mode 120000 index 00000000..c926c815 --- /dev/null +++ b/docs/source/examples/notebooks/ping_pong.ipynb @@ -0,0 +1 @@ +/Users/sekyonda/Documents/GitHub/monarch/examples/notebooks/ping_pong.ipynb \ No newline at end of file diff --git a/docs/source/examples/notebooks/spmd_ddp.ipynb b/docs/source/examples/notebooks/spmd_ddp.ipynb new file mode 120000 index 00000000..b7131bf3 --- /dev/null +++ b/docs/source/examples/notebooks/spmd_ddp.ipynb @@ -0,0 +1 @@ +/Users/sekyonda/Documents/GitHub/monarch/examples/notebooks/spmd_ddp.ipynb \ No newline at end of file diff --git a/docs/source/examples/python_examples.rst b/docs/source/examples/python_examples.rst new file mode 100644 index 00000000..06d99b96 --- /dev/null +++ b/docs/source/examples/python_examples.rst @@ -0,0 +1,6 @@ +Python Examples +============== + +This section contains Python examples demonstrating how to use Monarch. + +.. include:: ./grpo_actor.rst diff --git a/docs/source/get_started.md b/docs/source/get_started.md new file mode 100644 index 00000000..168e7c1b --- /dev/null +++ b/docs/source/get_started.md @@ -0,0 +1,192 @@ +# Getting Started with Monarch + +Welcome to Monarch! This guide will help you get up and running with Monarch, a distributed execution engine for PyTorch that delivers high-quality user experience at cluster scale. + +## What is Monarch? + +Monarch is designed to extend PyTorch's capabilities to efficiently run on distributed systems. It maintains the familiar PyTorch API while handling the complexities of distributed execution, making it easier to scale your deep learning workloads across multiple GPUs and nodes. + +## Prerequisites + +Before installing Monarch, ensure you have: + +- A Linux system (Monarch is currently only supported on Linux) +- Python 3.10 or later +- CUDA-compatible GPU(s) +- Basic familiarity with PyTorch + +## Installation + +### Quick Installation + +The simplest way to install Monarch is via pip: + +```bash +pip install torchmonarch-nightly +``` + +### Manual Installation + +For more control or development purposes, you can install Monarch manually: + +```bash +# Create and activate the conda environment +conda create -n monarchenv python=3.10 -y +conda activate monarchenv + +# Install nightly rust toolchain +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +rustup toolchain install nightly +rustup default nightly + +# Install non-python dependencies +conda install libunwind -y + +# Install the correct cuda and cuda-toolkit versions for your machine +sudo dnf install cuda-toolkit-12-0 cuda-12-0 + +# Install clang-dev and nccl-dev +sudo dnf install clang-devel libnccl-devel +# Or, in some environments, the following may be necessary instead +conda install -c conda-forge clangdev nccl +conda update -n monarchenv --all -c conda-forge -y + +# Install build dependencies +pip install -r build-requirements.txt +# Install test dependencies +pip install -r python/tests/requirements.txt + +# Build and install Monarch +pip install --no-build-isolation . +# or setup for development +pip install --no-build-isolation -e . +``` + +## Verifying Your Installation + +After installation, you can verify that Monarch is working correctly by running the unit tests: + +```bash +pytest python/tests/ -v -m "not oss_skip" +``` + +## Basic Usage + +Here's a simple example to get you started with Monarch: + +```python +import torch +import monarch as mon + +# Initialize Monarch +mon.init() + +# Create a simple model +model = torch.nn.Linear(10, 5) + +# Distribute the model using Monarch +distributed_model = mon.distribute(model) + +# Create some input data +input_data = torch.randn(8, 10) + +# Run a forward pass +output = distributed_model(input_data) + +# Clean up +mon.shutdown() +``` + +## Example: Ping Pong + +One of the simplest examples of using Monarch is the "ping pong" example, which demonstrates basic communication between processes: + +```python +import monarch as mon +import torch + +# Initialize Monarch +mon.init() + +# Get the current process rank and world size +rank = mon.get_rank() +world_size = mon.get_world_size() + +# Create a tensor to send +send_tensor = torch.tensor([rank], dtype=torch.float32) + +# Determine the destination rank +dst_rank = (rank + 1) % world_size + +# Send the tensor to the destination rank +mon.send(send_tensor, dst_rank) + +# Receive a tensor from the source rank +src_rank = (rank - 1) % world_size +recv_tensor = torch.zeros(1, dtype=torch.float32) +mon.recv(recv_tensor, src_rank) + +print(f"Rank {rank} received {recv_tensor.item()} from rank {src_rank}") + +# Clean up +mon.shutdown() +``` + +## Distributed Data Parallel Training + +Monarch makes it easy to implement distributed data parallel training: + +```python +import monarch as mon +import torch +import torch.nn as nn +import torch.optim as optim + +# Initialize Monarch +mon.init() + +# Create a simple model +model = nn.Linear(10, 5) +model = mon.distribute(model) + +# Create optimizer +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Create loss function +criterion = nn.MSELoss() + +# Training loop +for epoch in range(10): + # Assume data_loader is your distributed data loader + for data, target in data_loader: + # Forward pass + output = model(data) + loss = criterion(output, target) + + # Backward pass and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + +# Clean up +mon.shutdown() +``` + +## Next Steps + +Now that you've got the basics, you can: + +1. Check out the [examples](examples) directory for more detailed demonstrations +2. Explore the [API documentation](api) for a complete reference +3. Learn about advanced features in the [How-to guides](howtos) + +## Troubleshooting + +If you encounter issues: + +- Make sure your CUDA environment is properly set up +- Check that you're using a compatible version of PyTorch +- Verify that all dependencies are installed correctly +- Consult the [GitHub repository](https://github.com/pytorch-labs/monarch) for known issues + +Remember that Monarch is currently in an experimental stage, so you may encounter bugs or incomplete features. Contributions and bug reports are welcome! diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 00000000..096e506c --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,59 @@ +# Monarch 🦋 + +**Monarch** is a distributed execution engine for PyTorch. Our overall goal is +to deliver the high-quality user experience that people get from single-GPU +PyTorch, but at cluster scale. + +> ⚠️ **Early Development Warning** Monarch is currently in an experimental +> stage. You should expect bugs, incomplete features, and APIs that may change +> in future versions. The project welcomes bugfixes, but to make sure things are +> well coordinated you should discuss any significant change before starting the +> work. It's recommended that you signal your intention to contribute in the +> issue tracker, either by filing a new issue or by claiming an existing one. + +## What is Monarch? + +Monarch extends PyTorch's capabilities to efficiently run on distributed systems. It maintains the familiar PyTorch API while handling the complexities of distributed execution, making it easier to scale your deep learning workloads across multiple GPUs and nodes. + +Key features: +- **Familiar PyTorch API** - Use the same PyTorch code you're already familiar with +- **Efficient Distribution** - Scale your models across multiple GPUs and nodes +- **Simplified Communication** - Built-in primitives for distributed communication +- **Performance Optimized** - Designed for high performance at scale + +**Note:** Monarch is currently only supported on Linux systems. + +## Getting Started + +Here are some suggested steps to get started with Monarch: + +1. **Installation**: Install Monarch using pip: `pip install torchmonarch-nightly` +2. **Learn the Basics**: Check out the [Getting Started](get_started) guide to learn the basics of Monarch +3. **Explore Examples**: Review the [Examples](examples) to see Monarch in action +4. **Dive Deeper**: Explore the [API Documentation](api) for more detailed information + +## Documentation Contents + +```{toctree} +:maxdepth: 2 +:caption: Contents + +get_started +examples +api +howtos +glossary +``` + +## License + +Monarch is BSD-3 licensed, as found in the [LICENSE](https://github.com/pytorch-labs/monarch/blob/main/LICENSE) file. + +## Community + +We welcome contributions from the community! If you're interested in contributing, please: + +1. Check the [GitHub repository](https://github.com/pytorch-labs/monarch) +2. Review existing issues or create a new one +3. Discuss your proposed changes before starting work +4. Submit a pull request with your changes