1
1
2
2
# FPGA lsu
3
3
4
- The Intel FPGA ` lsu ` class is implemented in ` CL/sycl/intel /fpga_lsu.hpp ` which
5
- is included in ` CL/sycl/intel /fpga_extensions.hpp ` .
4
+ The Intel FPGA ` lsu ` class is implemented in ` CL/sycl/INTEL /fpga_lsu.hpp ` which
5
+ is included in ` CL/sycl/INTEL /fpga_extensions.hpp ` .
6
6
7
- The class ` cl::sycl::intel ::lsu ` allows users to explicitly request that the
7
+ The class ` cl::sycl::INTEL ::lsu ` allows users to explicitly request that the
8
8
implementation of a global memory access is configured in a certain way. The
9
9
class has two member functions, ` load() ` and ` store() ` which allow loading from
10
10
and storing to a ` global_ptr ` , respectively, and is templated on the following
11
11
4 optional paremeters:
12
12
13
- 1 . ** ` cl::sycl::intel ::burst_coalesce<B> ` , where ` B ` is a boolean** : request,
13
+ 1 . ** ` cl::sycl::INTEL ::burst_coalesce<B> ` , where ` B ` is a boolean** : request,
14
14
to the extent possible, that a dynamic burst coalescer be implemented when
15
15
` load ` or ` store ` are called. The default value of this parameter is ` false ` .
16
- 2 . ** ` cl::sycl::intel ::cache<N> ` , where ` N ` is an integer greater or equal to
16
+ 2 . ** ` cl::sycl::INTEL ::cache<N> ` , where ` N ` is an integer greater or equal to
17
17
0** : request, to the extent possible, that a read-only cache of the specified
18
18
size in bytes be implemented when when ` load ` is called. It is not allowed to
19
19
use that parameter for ` store ` . The default value of this parameter is ` 0 ` .
20
- 3 . ** ` cl::sycl::intel ::statically_coalesce<N > ` , where ` B ` is a boolean** :
20
+ 3 . ** ` cl::sycl::INTEL ::statically_coalesce<B > ` , where ` B ` is a boolean** :
21
21
request, to the extent possible, that ` load ` or ` store ` accesses, is allowed to
22
22
be statically coalesced with other memory accesses at compile time. The default
23
23
value of this parameter is ` true ` .
24
- 4 . ** ` cl::sycl::intel ::prefetch<B> ` , where ` N ` is a boolean** : request, to the
24
+ 4 . ** ` cl::sycl::INTEL ::prefetch<B> ` , where ` B ` is a boolean** : request, to the
25
25
extent possible, that a prefetcher be implemented when ` load ` is called. It is
26
26
not allowed to use that parameter for ` store ` . The default value of this
27
27
parameter is ` false ` .
28
28
29
29
Currently, not every combination of parameters is allowed due to limitations in
30
30
the backend. The following rules apply:
31
- 1 . For ` store ` , ` cl::sycl::intel ::cache ` must be ` 0 ` and
32
- ` cl::sycl::intel ::prefetch ` must be ` false ` .
33
- 2 . For ` load ` , if ` cl::sycl::intel ::cache ` is set to a value greater than ` 0 ` ,
34
- then ` cl::sycl::intel ::burst_coalesce ` must be set to ` true ` .
35
- 3 . For ` load ` , exactly one of ` cl::sycl::intel ::prefetch ` and
36
- ` cl::sycl::intel ::burst_coalesce ` is allowed to be ` true ` .
37
- 4 . For ` load ` , exactly one of ` cl::sycl::intel ::prefetch ` and
38
- ` cl::sycl::intel ::cache ` is allowed to be ` true ` .
31
+ 1 . For ` store ` , ` cl::sycl::INTEL ::cache ` must be ` 0 ` and
32
+ ` cl::sycl::INTEL ::prefetch ` must be ` false ` .
33
+ 2 . For ` load ` , if ` cl::sycl::INTEL ::cache ` is set to a value greater than ` 0 ` ,
34
+ then ` cl::sycl::INTEL ::burst_coalesce ` must be set to ` true ` .
35
+ 3 . For ` load ` , exactly one of ` cl::sycl::INTEL ::prefetch ` and
36
+ ` cl::sycl::INTEL ::burst_coalesce ` is allowed to be ` true ` .
37
+ 4 . For ` load ` , exactly one of ` cl::sycl::INTEL ::prefetch ` and
38
+ ` cl::sycl::INTEL ::cache ` is allowed to be ` true ` .
39
39
40
40
## Implementation
41
41
@@ -47,7 +47,7 @@ template <class... mem_access_params> class lsu final {
47
47
public:
48
48
lsu() = delete;
49
49
50
- template <typename T > static T & load(sycl::global_ptr<T > Ptr) {
50
+ template <typename T > static T load(sycl::global_ptr<T > Ptr) {
51
51
check_load();
52
52
#if defined(__ SYCL_DEVICE_ONLY__ ) && __ has_builtin(__ builtin_intel_fpga_mem)
53
53
return * __ builtin_intel_fpga_mem((T * )Ptr,
@@ -77,7 +77,7 @@ public:
77
77
## Usage
78
78
79
79
```c++
80
- #include <CL/sycl/intel /fpga_extensions.hpp>
80
+ #include <CL/sycl/INTEL /fpga_extensions.hpp>
81
81
...
82
82
cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
83
83
cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
@@ -91,19 +91,19 @@ Queue.submit([&](cl::sycl::handler &cgh) {
91
91
auto output_ptr = output_accessor.get_pointer();
92
92
93
93
using PrefetchingLSU =
94
- cl::sycl::intel ::lsu<cl::sycl::intel ::prefetch<true>,
95
- cl::sycl::intel ::statically_coalesce<false>>;
94
+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::prefetch<true>,
95
+ cl::sycl::INTEL ::statically_coalesce<false>>;
96
96
97
97
using BurstCoalescedLSU =
98
- cl::sycl::intel ::lsu<cl::sycl::intel ::burst_coalesce<false>,
99
- cl::sycl::intel ::statically_coalesce<false>>;
98
+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::burst_coalesce<false>,
99
+ cl::sycl::INTEL ::statically_coalesce<false>>;
100
100
101
101
using CachingLSU =
102
- cl::sycl::intel ::lsu<cl::sycl::intel ::burst_coalesce<true>,
103
- cl::sycl::intel ::cache<1024>,
104
- cl::sycl::intel ::statically_coalesce<true>>;
102
+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::burst_coalesce<true>,
103
+ cl::sycl::INTEL ::cache<1024>,
104
+ cl::sycl::INTEL ::statically_coalesce<true>>;
105
105
106
- using PipelinedLSU = cl::sycl::intel ::lsu<>;
106
+ using PipelinedLSU = cl::sycl::INTEL ::lsu<>;
107
107
108
108
int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
109
109
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
0 commit comments