Skip to content

Commit ff2cf71

Browse files
committed
Merge branch 'main' of github.com:tile-ai/tilelang into gemm_sp_v2
2 parents 34d1af6 + 6bae64f commit ff2cf71

File tree

229 files changed

+7805
-2766
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

229 files changed

+7805
-2766
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
timeout-minutes: 30
4141
steps:
4242
- name: Checkout repository
43-
uses: actions/checkout@v5
43+
uses: actions/checkout@v6
4444
with:
4545
fetch-depth: 0
4646
submodules: recursive
@@ -104,7 +104,7 @@ jobs:
104104

105105
steps:
106106
- name: Checkout repository
107-
uses: actions/checkout@v5
107+
uses: actions/checkout@v6
108108
with:
109109
fetch-depth: 0
110110
submodules: recursive

.github/workflows/dist.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
name: Dist
22
on:
3+
workflow_dispatch:
34
schedule:
45
# gemini said this is 6:00 china time
56
- cron: "0 22 * * *"
@@ -52,7 +53,7 @@ jobs:
5253

5354
steps:
5455
- name: Checkout repository
55-
uses: actions/checkout@v5
56+
uses: actions/checkout@v6
5657
with:
5758
fetch-depth: 1
5859
submodules: recursive
@@ -122,7 +123,7 @@ jobs:
122123

123124
steps:
124125
- name: Checkout repository
125-
uses: actions/checkout@v5
126+
uses: actions/checkout@v6
126127
with:
127128
fetch-depth: 1
128129
submodules: recursive
@@ -160,7 +161,7 @@ jobs:
160161
fi
161162
162163
- name: Build wheels
163-
uses: pypa/cibuildwheel@v3.2
164+
uses: pypa/cibuildwheel@v3.3
164165
with:
165166
package-dir: .
166167
output-dir: wheelhouse

.github/workflows/pr-perfbench-bot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
runs-on: [self-hosted, nvidia]
3434
steps:
3535
- name: Checkout repository
36-
uses: actions/checkout@v5
36+
uses: actions/checkout@v6
3737
with:
3838
ref: refs/pull/${{ github.event.issue.number }}/merge
3939
fetch-depth: 0

.github/workflows/publish-docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
runs-on: [self-hosted, nvidia]
2626
steps:
2727
- name: Checkout repository
28-
uses: actions/checkout@v5
28+
uses: actions/checkout@v6
2929
with:
3030
fetch-depth: 0
3131
submodules: recursive

3rdparty/tvm

Submodule tvm updated from 093b2cd to e3af400

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ file(GLOB TILE_LANG_SRCS
138138
src/transform/*.cc
139139
src/op/*.cc
140140
src/target/utils.cc
141+
src/target/codegen_c_host.cc
141142
src/target/codegen_cpp.cc
142143
src/target/rt_mod_cpp.cc
143144
# intrin_rule doesn't have system dependency

benchmark/blocksparse_attention/benchmark_tilelang_block_sparse_fmha.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def Softmax(
9595
T.copy(scores_max, scores_max_prev)
9696
T.fill(scores_max, -T.infinity(accum_dtype))
9797
T.reduce_max(acc_s, scores_max, dim=1, clear=False)
98+
for i in T.Parallel(block_M):
99+
scores_max[i] = T.max(scores_max[i], scores_max_prev[i])
98100
# To do causal softmax, we need to set the scores_max to 0 if it is -inf
99101
# This process is called Check_inf in FlashAttention3 code, and it only need to be done
100102
# in the first ceil_div(kBlockM, kBlockN) steps.

benchmark/matmul_fp8/benchmark_matmul.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import itertools
3+
import torch
34
import logging
45
import tilelang
56
import tilelang.language as T
@@ -99,6 +100,7 @@ def get_configs(args, kwargs):
99100
block_K=[64, 128],
100101
num_stages=[0, 1, 2, 3],
101102
thread_num=[128, 256],
103+
k_pack=[1, 2],
102104
policy=[T.GemmWarpPolicy.Square],
103105
enable_rasteration=[True, False],
104106
)
@@ -125,6 +127,7 @@ def matmul(
125127
block_K=None,
126128
num_stages=None,
127129
thread_num=None,
130+
k_pack=None,
128131
policy=None,
129132
enable_rasteration=None,
130133
):
@@ -156,7 +159,7 @@ def matmul(
156159

157160
# Use half-precision for input data to reduce memory bandwidth,
158161
# accumulate in float for better numerical accuracy
159-
dtype = "float8_e4m3"
162+
dtype = "float8_e4m3fnuz" if torch.version.hip is not None else "float8_e4m3"
160163
accum_dtype = "float"
161164

162165
@T.prim_func
@@ -210,6 +213,7 @@ def main(
210213
C_local,
211214
transpose_B=True,
212215
policy=policy,
216+
k_pack=k_pack,
213217
)
214218
# Write back the results from C_local to the global memory C
215219
T.copy(C_local, C_shared)

cmake/load_tvm.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33
set(TVM_BUILD_FROM_SOURCE TRUE)
44
set(TVM_SOURCE ${CMAKE_SOURCE_DIR}/3rdparty/tvm)
55

6-
if(DEFINED $ENV{TVM_ROOT})
6+
if(DEFINED ENV{TVM_ROOT})
77
if(EXISTS $ENV{TVM_ROOT}/cmake/config.cmake)
88
set(TVM_SOURCE $ENV{TVM_ROOT})
9+
message(STATUS "Using TVM_ROOT from environment variable: ${TVM_SOURCE}")
910
endif()
1011
endif()
1112

13+
message(STATUS "Using TVM source: ${TVM_SOURCE}")
14+
1215
set(TVM_INCLUDES
1316
${TVM_SOURCE}/include
1417
${TVM_SOURCE}/src

docs/get_started/Installation.md

Lines changed: 44 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,25 @@
88
- **Python Version**: >= 3.8
99
- **CUDA Version**: 12.0 <= CUDA < 13
1010

11-
The easiest way to install **tile-lang** is directly from PyPI using pip. To install the latest version, run the following command in your terminal:
11+
The easiest way to install tilelang is directly from PyPI using pip. To install the latest version, run the following command in your terminal:
1212

1313
```bash
1414
pip install tilelang
1515
```
1616

17-
Alternatively, you may choose to install **tile-lang** using prebuilt packages available on the Release Page:
17+
Alternatively, you may choose to install tilelang using prebuilt packages available on the Release Page:
1818

1919
```bash
2020
pip install tilelang-0.0.0.dev0+ubuntu.20.4.cu120-py3-none-any.whl
2121
```
2222

23-
To install the latest version of **tile-lang** from the GitHub repository, you can run the following command:
23+
To install the latest version of tilelang from the GitHub repository, you can run the following command:
2424

2525
```bash
2626
pip install git+https://github.com/tile-ai/tilelang.git
2727
```
2828

29-
After installing **tile-lang**, you can verify the installation by running:
29+
After installing tilelang, you can verify the installation by running:
3030

3131
```bash
3232
python -c "import tilelang; print(tilelang.__version__)"
@@ -40,31 +40,37 @@ python -c "import tilelang; print(tilelang.__version__)"
4040
- **Python Version**: >= 3.8
4141
- **CUDA Version**: >= 10.0
4242

43-
```bash
44-
docker run -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.01-py3
45-
```
43+
If you prefer Docker, please skip to the [Install Using Docker](#install-using-docker) section. This section focuses on building from source on a native Linux environment.
4644

47-
To build and install **tile-lang** directly from source, follow these steps. This process requires certain pre-requisites from Apache TVM, which can be installed on Ubuntu/Debian-based systems using the following commands:
45+
First, install the OS-level prerequisites on Ubuntu/Debian-based systems using the following commands:
4846

4947
```bash
5048
apt-get update
5149
apt-get install -y python3 python3-dev python3-setuptools gcc zlib1g-dev build-essential cmake libedit-dev
5250
```
5351

54-
After installing the prerequisites, you can clone the **tile-lang** repository and install it using pip:
52+
Then, clone the tilelang repository and install it using pip. The `-v` flag enables verbose output during the build process.
53+
54+
> **Note**: Use the `--recursive` flag to include necessary submodules. Tilelang currently depends on a customized version of TVM, which is included as a submodule. If you prefer [Building with Existing TVM Installation](#using-existing-tvm), you can skip cloning the TVM submodule (but still need other dependencies).
5555
5656
```bash
5757
git clone --recursive https://github.com/tile-ai/tilelang.git
5858
cd tilelang
5959
pip install . -v
6060
```
6161

62-
If you want to install **tile-lang** in development mode, you can run the following command:
62+
If you want to install tilelang in development mode, you can use the `-e` flag so that any changes to the Python files will be reflected immediately without reinstallation.
6363

6464
```bash
6565
pip install -e . -v
6666
```
6767

68+
> **Note**: changes to C++ files require rebuilding the tilelang C++ library. See [Faster Rebuild for Developers](#faster-rebuild-for-developers) below. A default `build` directory will be created if you use `pip install`, so you can also directly run `make` in the `build` directory to rebuild it as [Working from Source via PYTHONPATH](#working-from-source-via-pythonpath) suggested below.
69+
70+
(working-from-source-via-pythonpath)=
71+
72+
### Working from Source via `PYTHONPATH`
73+
6874
If you prefer to work directly from the source tree via `PYTHONPATH`, make sure the native extension is built first:
6975

7076
```bash
@@ -85,17 +91,23 @@ Some useful CMake options you can toggle while configuring:
8591
- `-DUSE_ROCM=ON` selects ROCm support when building on AMD GPUs.
8692
- `-DNO_VERSION_LABEL=ON` disables the backend/git suffix in `tilelang.__version__`.
8793

88-
We currently provide four methods to install **tile-lang**:
94+
(using-existing-tvm)=
8995

90-
1. [Install Using Docker](#install-method-1) (Recommended)
91-
2. [Install from Source (using the bundled TVM submodule)](#install-method-2)
92-
3. [Install from Source (using your own TVM installation)](#install-method-3)
96+
### Building with Customized TVM Path
9397

94-
(install-method-1)=
98+
If you already have a TVM codebase, use the `TVM_ROOT` environment variable to specify the location of your existing TVM repository when building tilelang:
9599

96-
### Method 1: Install Using Docker (Recommended)
100+
```bash
101+
TVM_ROOT=<your-tvm-repo> pip install . -v
102+
```
103+
104+
> **Note**: This will still rebuild the TVM-related libraries (stored in `TL_LIBS`). And this method often leads to some path issues. Check `env.py` to see some environment variables which are not set properly.
105+
106+
(install-using-docker)=
107+
108+
## Install Using Docker
97109

98-
For users who prefer a containerized environment with all dependencies pre-configured, **tile-lang** provides Docker images for different CUDA versions. This method is particularly useful for ensuring consistent environments across different systems and is the **recommended approach** for most users.
110+
For users who prefer a containerized environment with all dependencies pre-configured, tilelang provides Docker images for different CUDA versions. This method is particularly useful for ensuring consistent environments across different systems.
99111

100112
**Prerequisites:**
101113
- Docker installed on your system
@@ -142,82 +154,17 @@ docker run -itd \
142154
- `--name tilelang_b200`: Assigns a name to the container for easy management
143155
- `/bin/zsh`: Uses zsh as the default shell
144156

145-
4. **Access the Container**:
157+
4. **Access the Container and Verify Installation**:
146158

147159
```bash
148160
docker exec -it tilelang_b200 /bin/zsh
149-
```
150-
151-
5. **Verify Installation**:
152-
153-
Once inside the container, verify that **tile-lang** is working correctly:
154-
155-
```bash
161+
# Inside the container:
156162
python -c "import tilelang; print(tilelang.__version__)"
157163
```
158164

159-
You can now run TileLang examples and develop your applications within the containerized environment. The Docker image comes with all necessary dependencies pre-installed, including CUDA toolkit, TVM, and TileLang itself.
160-
161-
**Example Usage:**
162-
163-
After accessing the container, you can run TileLang examples:
164-
165-
```bash
166-
cd /home/tilelang/examples
167-
python elementwise/test_example_elementwise.py
168-
```
169-
170-
This Docker-based installation method provides a complete, isolated environment that works seamlessly on systems with compatible NVIDIA GPUs like the B200, ensuring optimal performance for TileLang applications.
171-
172-
(install-method-2)=
173-
174-
### Method 2: Install from Source (Using the Bundled TVM Submodule)
175-
176-
If you already have a compatible TVM installation, follow these steps:
177-
178-
1. **Clone the Repository**:
179-
180-
```bash
181-
git clone --recursive https://github.com/tile-ai/tilelang
182-
cd tilelang
183-
```
184-
185-
**Note**: Use the `--recursive` flag to include necessary submodules.
186-
187-
2. **Configure Build Options**:
188-
189-
Create a build directory and specify your existing TVM path:
190-
191-
```bash
192-
pip install . -v
193-
```
194-
195-
(install-method-3)=
196-
197-
### Method 3: Install from Source (Using Your Own TVM Installation)
198-
199-
If you prefer to use the built-in TVM version, follow these instructions:
200-
201-
1. **Clone the Repository**:
202-
203-
```bash
204-
git clone --recursive https://github.com/tile-ai/tilelang
205-
cd tilelang
206-
```
207-
208-
**Note**: Ensure the `--recursive` flag is included to fetch submodules.
209-
210-
2. **Configure Build Options**:
211-
212-
Copy the configuration file and enable the desired backends (e.g., LLVM and CUDA):
213-
214-
```bash
215-
TVM_ROOT=<your-tvm-repo> pip install . -v
216-
```
217-
218165
## Install with Nightly Version
219166

220-
For users who want access to the latest features and improvements before official releases, we provide nightly builds of **tile-lang**.
167+
For users who want access to the latest features and improvements before official releases, we provide nightly builds of tilelang.
221168

222169
```bash
223170
pip install tilelang -f https://tile-ai.github.io/whl/nightly/cu121/
@@ -252,24 +199,28 @@ Set `NO_TOOLCHAIN_VERSION=ON` to disable this.
252199

253200
### Run-time environment variables
254201

255-
<!-- TODO: tvm -->
202+
Please refer to the `env.py` file for a full list of supported run-time environment variables.
203+
204+
## Other Tips
256205

257-
## IDE Configs
206+
### IDE Configs
258207

259-
Building tilelang locally will automatically `compile_commands.json` file in `build` dir.
208+
Building tilelang locally will automatically generate a `compile_commands.json` file in `build` dir.
260209
VSCode with clangd and [clangd extension](https://marketplace.visualstudio.com/items?itemName=llvm-vs-code-extensions.vscode-clangd) should be able to index that without extra configuration.
261210

262-
## Compile cache
211+
### Compile Cache
263212

264-
`ccache` will be automatically used if found.
213+
The default path of the compile cache is `~/.tilelang/cache`. `ccache` will be automatically used if found.
265214

266-
## Repairing wheels
215+
### Repairing Wheels
267216

268217
If you plan to use your wheel in other environment,
269-
it's recommend to use auditwheel (on Linux) or delocate (on Darwin)
218+
it's recommended to use auditwheel (on Linux) or delocate (on Darwin)
270219
to repair them.
271220

272-
## Faster rebuild for developers
221+
(faster-rebuild-for-developers)=
222+
223+
### Faster Rebuild for Developers
273224

274225
`pip install` introduces extra [un]packaging and takes ~30 sec to complete,
275226
even if no source change.

0 commit comments

Comments
 (0)