forked from aws/aws-fpga
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshim.h
380 lines (333 loc) · 14.5 KB
/
shim.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
/**
* Copyright (C) 2017-2018 Xilinx, Inc
* Author: Sonal Santan
* AWS HAL Driver layered on top of kernel drivers
*
* Code copied from SDAccel XDMA based HAL driver
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may
* not use this file except in compliance with the License. A copy of the
* License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
#ifndef _XDMA_SHIM_H_
#define _XDMA_SHIM_H_
#include "xclhal.h"
#include "xclperf.h"
#include "drm.h"
#include <fstream>
#include <list>
#include <map>
#include <vector>
#include <string>
#include <mutex>
#include <cassert>
#ifndef INTERNAL_TESTING
#include "fpga_pci.h"
#include "fpga_mgmt.h"
#endif
// Work around GCC 4.8 + XDMA BAR implementation bugs
// With -O3 PCIe BAR read/write are not reliable hence force -O2 as max
// optimization level for pcieBarRead() and pcieBarWrite()
#if defined(__GNUC__) && defined(NDEBUG)
#define SHIM_O2 __attribute__ ((optimize("-O2")))
#else
#define SHIM_O2
#endif
namespace awsbwhal {
struct AddresRange;
std::ostream& operator<< (std::ostream &strm, const AddresRange &rng);
/**
* Simple tuple struct to store non overlapping address ranges: address and size
*/
struct AddresRange : public std::pair<uint64_t, size_t> {
// size will be zero when we are looking up an address that was passed by the user
AddresRange(uint64_t addr, size_t size = 0) : std::pair<uint64_t, size_t>(std::make_pair(addr, size)) {
//std::cout << "CTOR(" << addr << ',' << size << ")\n";
}
AddresRange(AddresRange && rhs) : std::pair<uint64_t, size_t>(std::move(rhs)) {
//std::cout << "MOVE CTOR(" << rhs.first << ',' << rhs.second << ")\n";
}
AddresRange(const AddresRange &rhs) = delete;
AddresRange& operator=(const AddresRange &rhs) = delete;
// Comparison operator is useful when using AddressRange as a key in std::map
// Note one operand in the comparator may have only the address without the size
// However both operands in the comparator will not have zero size
bool operator < (const AddresRange& other) const {
//std::cout << *this << " < " << other << "\n";
if ((this->second != 0) && (other.second != 0))
// regular ranges
return (this->first < other.first);
if (other.second == 0)
// second range just has an address
// (1000, 100) < (1200, 0)
// (1000, 100) < (1100, 0) first range ends at 1099
return ((this->first + this->second) <= other.first);
assert(this->second == 0);
// this range just has an address
// (1100, 0) < (1200, 100)
return (this->first < other.first);
}
};
/**
* Simple map of address range to its bo handle and mapped virtual address
*/
static const std::pair<unsigned, char *> mNullValue = std::make_pair(0xffffffff, nullptr);
class RangeTable {
std::map<AddresRange, std::pair<unsigned, char *>> mTable;
mutable std::mutex mMutex;
public:
void insert(uint64_t addr, size_t size, std::pair<unsigned, char *> bo) {
// assert(find(addr) == 0xffffffff);
std::lock_guard<std::mutex> lock(mMutex);
mTable[AddresRange(addr, size)] = bo;
}
std::pair<unsigned, char *> erase(uint64_t addr) {
std::lock_guard<std::mutex> lock(mMutex);
std::map<AddresRange, std::pair<unsigned, char *>>::const_iterator i = mTable.find(AddresRange(addr));
if (i == mTable.end())
return mNullValue;
std::pair<unsigned, char *> result = i->second;
mTable.erase(i);
return result;
}
std::pair<unsigned, char *> find(uint64_t addr) const {
std::lock_guard<std::mutex> lock(mMutex);
std::map<AddresRange, std::pair<unsigned, char *>>::const_iterator i = mTable.find(AddresRange(addr));
if (i == mTable.end())
return mNullValue;
return i->second;
}
};
// Memory alignment for DDR and AXI-MM trace access
template <typename T> class AlignedAllocator {
void *mBuffer;
size_t mCount;
public:
T *getBuffer() {
return (T *)mBuffer;
}
size_t size() const {
return mCount * sizeof(T);
}
AlignedAllocator(size_t alignment, size_t count) : mBuffer(0), mCount(count) {
if (posix_memalign(&mBuffer, alignment, count * sizeof(T))) {
mBuffer = 0;
}
}
~AlignedAllocator() {
if (mBuffer)
free(mBuffer);
}
};
const uint64_t mNullAddr = 0xffffffffffffffffull;
const uint64_t mNullBO = 0xffffffff;
// XDMA Shim
class AwsXcl{
struct ELARecord {
unsigned mStartAddress;
unsigned mEndAddress;
unsigned mDataCount;
std::streampos mDataPos;
ELARecord() : mStartAddress(0), mEndAddress(0),
mDataCount(0), mDataPos(0) {}
};
typedef std::list<ELARecord> ELARecordList;
typedef std::list<std::pair<uint64_t, uint64_t> > PairList;
public:
//Sarab: Added for HAL2 XOCL Driver support
//int xclGetErrorStatus(xclErrorStatus *info); Not supported for AWS
bool xclUnlockDevice();
unsigned int xclAllocBO(size_t size, xclBOKind domain, unsigned flags);
unsigned int xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags);
void xclFreeBO(unsigned int boHandle);
int xclWriteBO(unsigned int boHandle,
const void *src, size_t size, size_t seek);
int xclReadBO(unsigned int boHandle,
void *dst, size_t size, size_t skip);
void *xclMapBO(unsigned int boHandle, bool write);
int xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir,
size_t size, size_t offset);
int xclExportBO(unsigned int boHandle);
unsigned int xclImportBO(int fd, unsigned flags);
int xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties);
ssize_t xclUnmgdPread(unsigned flags, void *buf,
size_t count, uint64_t offset);
ssize_t xclUnmgdPwrite(unsigned flags, const void *buf,
size_t count, uint64_t offset);
// Bitstreams
int xclGetXclBinIdFromSysfs(uint64_t &xclbinid);
int xclLoadXclBin(const xclBin *buffer);
int xclLoadAxlf(const axlf *buffer);
int xclUpgradeFirmware(const char *fileName);
int xclUpgradeFirmware2(const char *file1, const char* file2);
//int xclUpgradeFirmwareXSpi(const char *fileName, int device_index=0); Not supported by AWS
int xclTestXSpi(int device_index);
int xclBootFPGA();
int xclRemoveAndScanFPGA();
int resetDevice(xclResetKind kind);
int xclReClock2(unsigned short region, const unsigned short *targetFreqMHz);
// Raw read/write
size_t xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size);
size_t xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size);
// Buffer management
uint64_t xclAllocDeviceBuffer(size_t size);
uint64_t xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags);
void xclFreeDeviceBuffer(uint64_t buf);
size_t xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek);
size_t xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip);
// Performance monitoring
// Control
double xclGetDeviceClockFreqMHz();
double xclGetReadMaxBandwidthMBps();
double xclGetWriteMaxBandwidthMBps();
//void xclSetOclRegionProfilingNumberSlots(uint32_t numSlots);
void xclSetProfilingNumberSlots(xclPerfMonType type, uint32_t numSlots);
size_t xclPerfMonClockTraining(xclPerfMonType type);
// Counters
size_t xclPerfMonStartCounters(xclPerfMonType type);
size_t xclPerfMonStopCounters(xclPerfMonType type);
size_t xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults);
//debug related
uint32_t getCheckerNumberSlots(int type);
uint32_t getIPCountAddrNames(int type, uint64_t *baseAddress, std::string * portNames);
size_t xclDebugReadCounters(xclDebugCountersResults* debugResult);
size_t xclDebugReadCheckers(xclDebugCheckersResults* checkerResult);
void readDebugIpLayout();
// Trace
size_t xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger);
size_t xclPerfMonStopTrace(xclPerfMonType type);
uint32_t xclPerfMonGetTraceCount(xclPerfMonType type);
size_t xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector);
// Sanity checks
int xclGetDeviceInfo2(xclDeviceInfo2 *info);
static AwsXcl *handleCheck(void *handle);
static unsigned xclProbe();
bool xclLockDevice();
unsigned getTAG() const {
return mTag;
}
bool isGood() const;
~AwsXcl();
AwsXcl(unsigned index, const char *logfileName, xclVerbosityLevel verbosity);
private:
size_t xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size);
size_t xclReadSkipCopy(uint64_t offset, void *hostBuf, size_t size);
bool zeroOutDDR();
bool isXPR() const {
return ((mDeviceInfo.mSubsystemId >> 12) == 4);
}
bool isMultipleOCLClockSupported() {
unsigned dsaNum = ((mDeviceInfo.mDeviceId << 16) | mDeviceInfo.mSubsystemId);
// 0x82384431 : TUL KU115 4ddr 3.1 DSA
return ((dsaNum == 0x82384431) || (dsaNum == 0x82384432))? true : false;
}
bool isUltraScale() const {
return (mDeviceInfo.mDeviceId & 0x8000);
}
// Core DMA code
SHIM_O2 int pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length);
SHIM_O2 int pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length);
int freezeAXIGate();
int freeAXIGate();
// PROM flashing
int prepare(unsigned startAddress, unsigned endAddress);
int program(std::ifstream& mcsStream, const ELARecord& record);
int program(std::ifstream& mcsStream);
int waitForReady(unsigned code, bool verbose = true);
int waitAndFinish(unsigned code, unsigned data, bool verbose = true);
//XSpi flashing.
bool prepareXSpi();
int programXSpi(std::ifstream& mcsStream, const ELARecord& record);
int programXSpi(std::ifstream& mcsStream);
bool waitTxEmpty();
bool isFlashReady();
//bool windDownWrites();
bool bulkErase();
bool sectorErase(unsigned Addr);
bool writeEnable();
#if 0
bool dataTransfer(bool read);
#endif
bool readPage(unsigned addr, uint8_t readCmd = 0xff);
bool writePage(unsigned addr, uint8_t writeCmd = 0xff);
unsigned readReg(unsigned offset);
int writeReg(unsigned regOffset, unsigned value);
bool finalTransfer(uint8_t *sendBufPtr, uint8_t *recvBufPtr, int byteCount);
bool getFlashId();
//All remaining read /write register commands can be issued through this function.
bool readRegister(unsigned commandCode, unsigned bytes);
bool writeRegister(unsigned commandCode, unsigned value, unsigned bytes);
bool select4ByteAddressMode();
bool deSelect4ByteAddressMode();
// Performance monitoring helper functions
bool isDSAVersion(unsigned majorVersion, unsigned minorVersion, bool onlyThisVersion);
unsigned getBankCount();
uint64_t getHostTraceTimeNsec();
uint64_t getPerfMonBaseAddress(xclPerfMonType type, uint32_t slotNum);
uint64_t getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum);
uint64_t getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum);
uint32_t getPerfMonNumberSlots(xclPerfMonType type);
uint32_t getPerfMonNumberSamples(xclPerfMonType type);
uint32_t getPerfMonNumberFifos(xclPerfMonType type);
uint32_t getPerfMonByteScaleFactor(xclPerfMonType type);
uint8_t getPerfMonShowIDS(xclPerfMonType type);
uint8_t getPerfMonShowLEN(xclPerfMonType type);
uint32_t getPerfMonSlotStartBit(xclPerfMonType type, uint32_t slotnum);
uint32_t getPerfMonSlotDataWidth(xclPerfMonType type, uint32_t slotnum);
size_t resetFifos(xclPerfMonType type);
uint32_t bin2dec(std::string str, int start, int number);
uint32_t bin2dec(const char * str, int start, int number);
std::string dec2bin(uint32_t n);
std::string dec2bin(uint32_t n, unsigned bits);
static std::string getDSAName(unsigned short deviceId, unsigned short subsystemId);
private:
// This is a hidden signature of this class and helps in preventing
// user errors when incorrect pointers are passed in as handles.
const unsigned mTag;
const int mBoardNumber;
const size_t maxDMASize;
bool mLocked;
const uint64_t mOffsets[XCL_ADDR_SPACE_MAX];
int mUserHandle;
#ifdef INTERNAL_TESTING
int mMgtHandle;
#else
pci_bar_handle_t ocl_kernel_bar; // AppPF BAR0 for OpenCL kernels
pci_bar_handle_t sda_mgmt_bar; // MgmtPF BAR4, for SDAccel Perf mon etc
pci_bar_handle_t ocl_global_mem_bar; // AppPF BAR4
#endif
uint32_t mMemoryProfilingNumberSlots;
uint32_t mOclRegionProfilingNumberSlots;
std::string mDevUserName;
// Information extracted from platform linker
bool mIsDebugIpLayoutRead = false;
bool mIsDeviceProfiling = false;
uint64_t mPerfMonFifoCtrlBaseAddress;
uint64_t mPerfMonFifoReadBaseAddress;
uint64_t mPerfMonBaseAddress[XSPM_MAX_NUMBER_SLOTS];
std::string mPerfMonSlotName[XSPM_MAX_NUMBER_SLOTS];
char *mUserMap;
std::ofstream mLogStream;
xclVerbosityLevel mVerbosity;
std::string mBinfile;
ELARecordList mRecordList;
xclDeviceInfo2 mDeviceInfo;
RangeTable mLegacyAddressTable;
#ifndef INTERNAL_TESTING
int sleepUntilLoaded( std::string afi );
int checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *info );
int loadDefaultAfiIfCleared( void );
#endif
public:
static const unsigned TAG;
};
}
#endif