Skip to content

Commit

Permalink
[DRIVER][RUNTIME] Make runtime fully device agnostic (apache#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen authored and sergei-mironov committed Aug 8, 2018
1 parent bf0b70a commit 88d743c
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 150 deletions.
87 changes: 47 additions & 40 deletions vta/include/vta/driver.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
/*!
* Copyright (c) 2018 by Contributors
* \file vta_driver.h
* \brief General driver interface.
* \brief Driver interface that is used by runtime.
*
* Driver's implementation is device specific.
*/

#ifndef VTA_DRIVER_H_
Expand All @@ -11,16 +13,50 @@
extern "C" {
#endif

#include <stdlib.h>
#include <stdint.h>
#include <stdlib.h>

/*! \brief Memory management constants */
/*! \brief Memory management constants for cached memory */
#define VTA_CACHED 1
/*! \brief Memory management constants */
/*! \brief Memory management constants for non-cached memory */
#define VTA_NOT_CACHED 0

/*! \brief VTA command handle */
typedef void * VTAHandle;
/*! \brief Physically contiguous buffer size limit */
#ifndef VTA_MAX_XFER
#define VTA_MAX_XFER (1<<22)
#endif

/*! \brief Device resource context */
typedef void * VTADeviceHandle;

/*! \brief physical address */
typedef uint32_t vta_phy_addr_t;

/*!
* \brief Allocate a device resource handle
* \return The device handle.
*/
VTADeviceHandle VTADeviceAlloc();

/*!
* \brief Free a device handle
* \param handle The device handle to be freed.
*/
void VTADeviceFree(VTADeviceHandle handle);

/*!
* \brief Launch the instructions block until done.
* \param The device handle.
* \param insn_phy_addr The physical address of instruction stream.
* \param insn_count Instruction count.
* \param wait_cycles The maximum of cycles to wait
*
* \return 0 if running is successful, 1 if timeout.
*/
int VTADeviceRun(VTADeviceHandle device,
vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles);

/*!
* \brief Allocates physically contiguous region in memory (limited by MAX_XFER).
Expand All @@ -41,52 +77,23 @@ void VTAMemFree(void* buf);
* \param buf Pointer to memory region allocated with VTAMemAlloc.
* \return The physical address of the memory region.
*/
uint32_t VTAGetMemPhysAddr(void* buf);
vta_phy_addr_t VTAGetMemPhysAddr(void* buf);

/*!
* \brief Flushes the region of memory out of the CPU cache to DRAM.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed.
* This need to be the physical address.
* \param size Size of the region to flush in Bytes.
*/
void VTAFlushCache(void* buf, int size);
void VTAFlushCache(vta_phy_addr_t buf, int size);

/*!
* \brief Invalidates the region of memory that is cached.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be invalidated.
* This need to be the physical address.
* \param size Size of the region to invalidate in Bytes.
*/
void VTAInvalidateCache(void* buf, int size);

/*!
* \brief Returns a memory map to FPGA configuration registers.
* \param addr The base physical address of the configuration registers.
* \param length The size of the memory mapped region in bytes.
* \return A pointer to the memory mapped region.
*/
void *VTAMapRegister(unsigned addr, size_t length);

/*!
* \brief Deletes the configuration register memory map.
* \param vta The memory mapped region.
* \param length The size of the memory mapped region in bytes.
*/
void VTAUnmapRegister(void *vta, size_t length);

/*!
* \brief Writes to a memory mapped configuration register.
* \param vta_base The handle to the memory mapped configuration registers.
* \param offset The offset of the register to write to.
* \param val The value to be written to the memory mapped register.
*/
void VTAWriteMappedReg(VTAHandle vta_base, unsigned offset, unsigned val);

/*!
* \brief Reads from the memory mapped configuration register.
* \param vta_base The handle to the memory mapped configuration registers.
* \param offset The offset of the register to read from.
* \return The value read from the memory mapped register.
*/
unsigned VTAReadMappedReg(VTAHandle vta_base, unsigned offset);
void VTAInvalidateCache(vta_phy_addr_t buf, int size);

/*!
* \brief Programming the bit stream on the FPGA.
Expand Down
4 changes: 2 additions & 2 deletions vta/src/data_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct DataBuffer {
*/
void InvalidateCache(size_t offset, size_t size) {
if (!kBufferCoherent) {
VTAInvalidateCache(reinterpret_cast<void*>(phy_addr_ + offset), size);
VTAInvalidateCache(phy_addr_ + offset, size);
}
}
/*!
Expand All @@ -45,7 +45,7 @@ struct DataBuffer {
*/
void FlushCache(size_t offset, size_t size) {
if (!kBufferCoherent) {
VTAFlushCache(reinterpret_cast<void*>(phy_addr_ + offset), size);
VTAFlushCache(phy_addr_ + offset, size);
}
}
/*!
Expand Down
140 changes: 110 additions & 30 deletions vta/src/pynq/pynq_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

#include <vta/driver.h>
#include <thread>
#include "./pynq_driver.h"


Expand All @@ -16,16 +17,16 @@ void VTAMemFree(void* buf) {
cma_free(buf);
}

uint32_t VTAGetMemPhysAddr(void* buf) {
vta_phy_addr_t VTAGetMemPhysAddr(void* buf) {
return cma_get_phy_addr(buf);
}

void VTAFlushCache(void* buf, int size) {
xlnkFlushCache(buf, size);
void VTAFlushCache(vta_phy_addr_t buf, int size) {
xlnkFlushCache(reinterpret_cast<void*>(buf), size);
}

void VTAInvalidateCache(void* buf, int size) {
xlnkInvalidateCache(buf, size);
void VTAInvalidateCache(vta_phy_addr_t buf, int size) {
xlnkInvalidateCache(reinterpret_cast<void*>(buf), size);
}

void *VTAMapRegister(uint32_t addr, size_t length) {
Expand Down Expand Up @@ -57,33 +58,112 @@ uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) {
return *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset));
}

class VTADevice {
public:
VTADevice() {
// VTA stage handles
vta_fetch_handle_ = VTAMapRegister(VTA_FETCH_ADDR, VTA_RANGE);
vta_load_handle_ = VTAMapRegister(VTA_LOAD_ADDR, VTA_RANGE);
vta_compute_handle_ = VTAMapRegister(VTA_COMPUTE_ADDR, VTA_RANGE);
vta_store_handle_ = VTAMapRegister(VTA_STORE_ADDR, VTA_RANGE);
}

~VTADevice() {
// Close VTA stage handle
VTAUnmapRegister(vta_fetch_handle_, VTA_RANGE);
VTAUnmapRegister(vta_load_handle_, VTA_RANGE);
VTAUnmapRegister(vta_compute_handle_, VTA_RANGE);
VTAUnmapRegister(vta_store_handle_, VTA_RANGE);
}

int Run(vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
// NOTE: Register address map is derived from the auto-generated
// driver files available under hardware/build/vivado/<design>/export/driver
// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg(vta_fetch_handle_, 0x10, insn_count);
// FETCH @ 0x18 : Data signal of insns_V
VTAWriteMappedReg(vta_fetch_handle_, 0x18, insn_phy_addr);
// LOAD @ 0x10 : Data signal of inputs_V
VTAWriteMappedReg(vta_load_handle_, 0x10, 0);
// LOAD @ 0x18 : Data signal of weight_V
VTAWriteMappedReg(vta_load_handle_, 0x18, 0);
// COMPUTE @ 0x20 : Data signal of uops_V
VTAWriteMappedReg(vta_compute_handle_, 0x20, 0);
// COMPUTE @ 0x28 : Data signal of biases_V
VTAWriteMappedReg(vta_compute_handle_, 0x28, 0);
// STORE @ 0x10 : Data signal of outputs_V
VTAWriteMappedReg(vta_store_handle_, 0x10, 0);

// VTA start
VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START);
VTAWriteMappedReg(vta_load_handle_, 0x0, VTA_AUTORESTART);
VTAWriteMappedReg(vta_compute_handle_, 0x0, VTA_AUTORESTART);
VTAWriteMappedReg(vta_store_handle_, 0x0, VTA_AUTORESTART);

// Loop until the VTA is done
unsigned t, flag = 0;
for (t = 0; t < wait_cycles; ++t) {
flag = VTAReadMappedReg(vta_compute_handle_, 0x18);
if (flag == VTA_DONE) break;
std::this_thread::yield();
}
// Report error if timeout
return t < wait_cycles ? 0 : 1;
}

private:
// VTA handles (register maps)
void* vta_fetch_handle_{nullptr};
void* vta_load_handle_{nullptr};
void* vta_compute_handle_{nullptr};
void* vta_store_handle_{nullptr};
};

VTADeviceHandle VTADeviceAlloc() {
return new VTADevice();
}

void VTADeviceFree(VTADeviceHandle handle) {
delete static_cast<VTADevice*>(handle);
}

int VTADeviceRun(VTADeviceHandle handle,
vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
return static_cast<VTADevice*>(handle)->Run(
insn_phy_addr, insn_count, wait_cycles);
}

void VTAProgram(const char* bitstream) {
int elem;
FILE *src, *dst, *partial;
partial = fopen(VTA_PYNQ_BS_IS_PARTIAL, "w");
if (partial == NULL) {
printf("Cannot open partial config file %s\n", VTA_PYNQ_BS_IS_PARTIAL);
int elem;
FILE *src, *dst, *partial;
partial = fopen(VTA_PYNQ_BS_IS_PARTIAL, "w");
if (partial == NULL) {
printf("Cannot open partial config file %s\n", VTA_PYNQ_BS_IS_PARTIAL);
fclose(partial);
exit(1);
}
fputc('0', partial);
fclose(partial);
src = fopen(bitstream, "rb");
if (src == NULL) {
printf("Cannot open bitstream %s\n", bitstream);
exit(1);
}
dst = fopen(VTA_PYNQ_BS_XDEVCFG, "wb");
if (dst == NULL) {
printf("Cannot open device file %s\n", VTA_PYNQ_BS_XDEVCFG);
fclose(dst);
exit(1);
}
elem = fgetc(src);
while (elem != EOF) {
fputc(elem, dst);
elem = fgetc(src);
}
fclose(src);
}
fputc('0', partial);
fclose(partial);
src = fopen(bitstream, "rb");
if (src == NULL) {
printf("Cannot open bitstream %s\n", bitstream);
exit(1);
}
dst = fopen(VTA_PYNQ_BS_XDEVCFG, "wb");
if (dst == NULL) {
printf("Cannot open device file %s\n", VTA_PYNQ_BS_XDEVCFG);
fclose(dst);
exit(1);
}
elem = fgetc(src);
while (elem != EOF) {
fputc(elem, dst);
elem = fgetc(src);
}
fclose(src);
fclose(dst);
}
8 changes: 5 additions & 3 deletions vta/src/pynq/pynq_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ void xlnkFlushCache(void* buf, int size);
void xlnkInvalidateCache(void* buf, int size);
#endif

void *VTAMapRegister(uint32_t addr, size_t length);
void VTAUnmapRegister(void *vta, size_t length);
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);

/*! \brief (Pynq only) Partial bitstream status file path */
#define VTA_PYNQ_BS_IS_PARTIAL "/sys/devices/soc0/amba/f8007000.devcfg/is_partial_bitstream"
/*! \brief (Pynq only) Bitstream destination file path */
Expand All @@ -44,9 +49,6 @@ void xlnkInvalidateCache(void* buf, int size);
/*! \brief (Pynq only) MMIO driver constant */
#define VTA_PYNQ_MMIO_WORD_MASK (~(MMIO_WORD_LENGTH - 1))

/*! \brief Physically contiguous buffer size limit */
#define VTA_MAX_XFER (1<<22)

/*! \brief VTA configuration register address range */
#define VTA_RANGE 0x100
/*! \brief VTA configuration register start value */
Expand Down
Loading

0 comments on commit 88d743c

Please sign in to comment.