10
10
#endif
11
11
12
12
// CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
13
+ // CHECK: @Kernel2() #[[ATTR1:[0-9]+]]
14
+ // CHECK: @{{.*}}Kernel3{{.*}}() #[[ATTR1]]
13
15
// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
14
- // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
15
- // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
16
- // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
17
-
18
- // CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
19
- // CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
20
- // CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
21
-
22
- // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
23
- // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
24
- // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
25
- // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
26
-
27
- // CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
28
- // CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
29
- // CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
16
+ // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR2:[0-9]+]]
17
+ // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR3:[0-9]+]]
18
+ // CHECK: @{{.*}}Kernel7{{.*}}() #[[ATTR1]]
19
+ // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR4:[0-9]+]]
20
+
21
+ // CHECK-DAG: attributes #[[ATTR0]] = {{{.*}} "nvvm.maxntid"="256" "nvvm.minctasm"="2" {{.*}}}
22
+ // CHECK-DAG: attributes #[[ATTR1]] = {{{.*}} "nvvm.maxntid"="256" {{.*}}}
23
+ // CHECK-DAG: attributes #[[ATTR2]] = {{{.*}} "nvvm.maxntid"="356" "nvvm.minctasm"="258" {{.*}}}
24
+ // CHECK-DAG: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
25
+ // CHECK-DAG: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxntid"="100" "nvvm.minctasm"="12" {{.*}}}
26
+
27
+ // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR0:[0-9]+]]
28
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR0]]
29
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR1:[0-9]+]]
30
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel7_sm_90{{.*}} #[[ATTR2:[0-9]+]]
31
+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR3:[0-9]+]]
32
+
33
+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR0]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.maxntid"="256" "nvvm.minctasm"="2" {{.*}}}
34
+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR1]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.maxntid"="356" "nvvm.minctasm"="258" {{.*}}}
35
+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR2]] = {{{.*}} "nvvm.maxntid"="256" {{.*}}}
36
+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR3]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.maxntid"="100" "nvvm.minctasm"="12" {{.*}}}
30
37
31
38
// Test both max threads per block and Min cta per sm.
32
39
extern " C" {
@@ -37,8 +44,6 @@ Kernel1()
37
44
}
38
45
}
39
46
40
- // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
41
-
42
47
#ifdef USE_MAX_BLOCKS
43
48
// Test max threads per block and min/max cta per sm.
44
49
extern " C" {
@@ -48,8 +53,6 @@ Kernel1_sm_90()
48
53
{
49
54
}
50
55
}
51
-
52
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
53
56
#endif // USE_MAX_BLOCKS
54
57
55
58
// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -62,8 +65,6 @@ Kernel2()
62
65
}
63
66
}
64
67
65
- // CHECK: !{{[0-9]+}} = !{ptr @Kernel2, !"maxntidx", i32 256}
66
-
67
68
template <int max_threads_per_block>
68
69
__global__ void
69
70
__launch_bounds__ (max_threads_per_block)
@@ -72,7 +73,6 @@ Kernel3()
72
73
}
73
74
74
75
template __global__ void Kernel3<MAX_THREADS_PER_BLOCK>();
75
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
76
76
77
77
template <int max_threads_per_block, int min_blocks_per_mp>
78
78
__global__ void
@@ -82,7 +82,6 @@ Kernel4()
82
82
}
83
83
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
84
84
85
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
86
85
87
86
#ifdef USE_MAX_BLOCKS
88
87
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -93,7 +92,6 @@ Kernel4_sm_90()
93
92
}
94
93
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
95
94
96
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
97
95
#endif // USE_MAX_BLOCKS
98
96
99
97
const int constint = 100 ;
@@ -106,8 +104,6 @@ Kernel5()
106
104
}
107
105
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
108
106
109
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
110
-
111
107
#ifdef USE_MAX_BLOCKS
112
108
113
109
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -120,7 +116,6 @@ Kernel5_sm_90()
120
116
}
121
117
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
122
118
123
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
124
119
#endif // USE_MAX_BLOCKS
125
120
126
121
// Make sure we don't emit negative launch bounds values.
@@ -129,33 +124,25 @@ __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
129
124
Kernel6()
130
125
{
131
126
}
132
- // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
133
127
134
128
__global__ void
135
129
__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
136
130
Kernel7()
137
131
{
138
132
}
139
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
140
- // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",
141
133
142
134
#ifdef USE_MAX_BLOCKS
143
135
__global__ void
144
136
__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
145
137
Kernel7_sm_90()
146
138
{
147
139
}
148
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
149
- // CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
150
- // CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
151
140
#endif // USE_MAX_BLOCKS
152
141
153
142
const char constchar = 12 ;
154
143
__global__ void __launch_bounds__ (constint, constchar) Kernel8() {}
155
- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
156
144
157
145
#ifdef USE_MAX_BLOCKS
158
146
const char constchar_2 = 14 ;
159
147
__global__ void __launch_bounds__ (constint, constchar, constchar_2) Kernel8_sm_90() {}
160
- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
161
148
#endif // USE_MAX_BLOCKS
0 commit comments