Skip to content

Commit f887fb4

Browse files
committed
Merge branch 'main' into frontend-v2
2 parents 9d6659c + 5475f8e commit f887fb4

File tree

14 files changed

+948
-42
lines changed

14 files changed

+948
-42
lines changed

.clang-tidy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ExtraArgs: ['-v']
44
FormatStyle: file
55
UseColor: true
66
WarningsAsErrors: '*'
7-
ExcludeHeaderFilterRegex: '^(3rdparty|tvm)/.*$'
7+
HeaderFilterRegex: '^(?!.*(?:/|^)(3rdparty|tvm)/).*'
88

99
# NOTE: there must be no spaces before the '-', so put the comma last.
1010
Checks: >-

.github/workflows/dist.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ jobs:
9494
- name: Upload wheels
9595
# Not PR to save artifact storage, as wheels are only needed for releases.
9696
if: github.event_name != 'pull_request'
97-
uses: actions/upload-artifact@v4
97+
uses: actions/upload-artifact@v5
9898
with:
9999
name: wheels-${{ matrix.python-version }}-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
100100
path: wheelhouse/*.whl
@@ -109,7 +109,7 @@ jobs:
109109
timeout-minutes: 15
110110
steps:
111111
- name: Download built wheels
112-
uses: actions/download-artifact@v5
112+
uses: actions/download-artifact@v6
113113
with:
114114
pattern: wheels-*
115115
path: dist
@@ -119,7 +119,7 @@ jobs:
119119
run: ls -lh dist/*
120120

121121
- name: Upload artifacts
122-
uses: actions/upload-artifact@v4
122+
uses: actions/upload-artifact@v5
123123
with:
124124
name: artifacts
125125
path: dist/*

src/op/builtin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ TVM_DLL const Op &initialize_descriptor();
503503
* This op is used to represent a descriptor start address setting operation in
504504
* tilelang.
505505
*/
506+
506507
TVM_DLL const Op &increase_descriptor_offset();
507508
/*!
508509
* \brief tilelang intrinsic for element-wise atomic addition.

src/tl_templates/cuda/atomic.h

Lines changed: 293 additions & 26 deletions
Large diffs are not rendered by default.

src/tl_templates/cuda/barrier.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ TL_DEVICE void fence_proxy_async() {
133133
asm volatile("fence.proxy.async.shared::cta;" : :);
134134
}
135135

136+
TL_DEVICE void fence_barrier_init() {
137+
asm volatile("fence.mbarrier_init.release.cluster;" : :);
138+
}
139+
136140
// Indicate arrival of warp issuing TMA_STORE
137141
TL_DEVICE void tma_store_arrive() {
138142
asm volatile("cp.async.bulk.commit_group;");

src/tl_templates/cuda/debug.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,12 @@ __device__ void debug_print_buffer_value<int16_t>(const char *msg,
257257
msg, blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, threadIdx.y,
258258
threadIdx.z, buf_name, index, (int32_t)var);
259259
}
260+
261+
TL_DEVICE void device_assert(bool cond) { assert(cond); }
262+
263+
TL_DEVICE void device_assert_with_msg(bool cond, const char *msg) {
264+
if (!cond) {
265+
printf("Device assert failed: %s\n", msg);
266+
assert(0);
267+
}
268+
}

0 commit comments

Comments
 (0)