Skip to content

Commit

Permalink
wait for command completion through drm_timeline_syncobj (#293)
Browse files Browse the repository at this point in the history
Signed-off-by: Max Zhen <max.zhen@amd.com>
  • Loading branch information
maxzhen authored Oct 28, 2024
1 parent 59f1d62 commit f60608b
Show file tree
Hide file tree
Showing 13 changed files with 166 additions and 53 deletions.
118 changes: 83 additions & 35 deletions src/driver/amdxdna/aie2_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include <linux/timekeeping.h>
#include <drm/drm_syncobj.h>

#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
Expand Down Expand Up @@ -583,6 +584,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
}

static void aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
struct drm_file *filp = hwctx->client->filp;
struct drm_syncobj *syncobj;
u32 hdl;
int ret;

hwctx->priv->syncobj = NULL;
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;

ret = drm_syncobj_create(&syncobj, 0, NULL);
if (ret) {
XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
return;
}
ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
drm_syncobj_put(syncobj);
if (ret) {
XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
return;
}
hwctx->priv->syncobj = syncobj;
hwctx->syncobj_hdl = hdl;
}

static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
{
struct drm_file *filp = hwctx->client->filp;
u32 hdl = hwctx->syncobj_hdl;
struct drm_syncobj *syncobj;

if (hdl == AMDXDNA_INVALID_FENCE_HANDLE)
return;

hwctx->priv->syncobj = NULL;
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;

spin_lock(&filp->syncobj_table_lock);
syncobj = idr_remove(&filp->syncobj_idr, hdl);
spin_unlock(&filp->syncobj_table_lock);
drm_syncobj_put(syncobj);
}

static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx,
struct dma_fence *ofence, u64 seq)
{
struct drm_syncobj *syncobj = hwctx->priv->syncobj;
struct dma_fence_chain *chain;

if (!syncobj)
return;

chain = dma_fence_chain_alloc();
if (!chain)
return;

drm_syncobj_add_point(syncobj, chain, ofence, seq);
}

int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
{
struct amdxdna_client *client = hwctx->client;
Expand Down Expand Up @@ -691,6 +752,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
goto release_resource;
}

aie2_ctx_syncobj_create(hwctx);
hwctx->status = HWCTX_STATE_INIT;

XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
Expand Down Expand Up @@ -727,6 +790,8 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
struct amdxdna_dev *xdna;
int idx;

aie2_ctx_syncobj_destroy(hwctx);

xdna = hwctx->client->xdna;
drm_sched_wqueue_stop(&hwctx->priv->sched);

Expand Down Expand Up @@ -1002,6 +1067,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
struct ww_acquire_ctx acquire_ctx;
struct amdxdna_gem_obj *abo;
unsigned long timeout = 0;
struct dma_fence *ofence;
int ret, i;

ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
Expand Down Expand Up @@ -1064,20 +1130,22 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
ret = aie2_hwctx_add_job(hwctx, job);
if (ret) {
mutex_unlock(&hwctx->priv->io_lock);

if (ret == -EAGAIN) {
// Waiting for the first pending cmd to complete before trying again.
int res = aie2_cmd_wait(hwctx, hwctx->submitted - HWCTX_MAX_CMDS, 0);
if (!res)
goto again;
aie2_cmd_wait(hwctx, hwctx->submitted - HWCTX_MAX_CMDS, 0);
goto again;
}
goto signal_fence;
}

*seq = job->seq;
ofence = dma_fence_get(job->out_fence);

drm_sched_entity_push_job(&job->base);
mutex_unlock(&hwctx->priv->io_lock);

aie2_ctx_syncobj_add_fence(hwctx, ofence, *seq);
dma_fence_put(ofence);
return 0;

signal_fence:
Expand All @@ -1088,60 +1156,40 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
return ret;
}

int aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
{
signed long remaining = MAX_SCHEDULE_TIMEOUT;
struct amdxdna_sched_job *job;
struct dma_fence *out_fence;
long ret;

mutex_lock(&hwctx->priv->io_lock);
job = aie2_hwctx_get_job(hwctx, seq);
if (IS_ERR(job)) {
if (IS_ERR_OR_NULL(job)) {
mutex_unlock(&hwctx->priv->io_lock);
ret = PTR_ERR(job);
goto out;
return ERR_CAST(job);
}

if (unlikely(!job)) {
mutex_unlock(&hwctx->priv->io_lock);
ret = 0;
goto out;
}
out_fence = dma_fence_get(job->out_fence);
mutex_unlock(&hwctx->priv->io_lock);
return out_fence;
}

int aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
{
struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
signed long remaining = MAX_SCHEDULE_TIMEOUT;
long ret;

if (timeout)
remaining = msecs_to_jiffies(timeout);

ret = dma_fence_wait_timeout(out_fence, true, remaining);
if (!ret)
ret = -ETIME;
else if (ret > 0)
ret = 0;

dma_fence_put(out_fence);
out:
return ret;
}

struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
{
struct amdxdna_sched_job *job;
struct dma_fence *out_fence;

mutex_lock(&hwctx->priv->io_lock);
job = aie2_hwctx_get_job(hwctx, seq);
if (IS_ERR_OR_NULL(job)) {
mutex_unlock(&hwctx->priv->io_lock);
return ERR_CAST(job);
}

out_fence = dma_fence_get(job->out_fence);
mutex_unlock(&hwctx->priv->io_lock);
return out_fence;
}

void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
unsigned long cur_seq)
{
Expand Down
1 change: 0 additions & 1 deletion src/driver/amdxdna/aie2_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,5 @@ const struct amdxdna_dev_ops aie2_ops = {
.cmd_wait = aie2_cmd_wait,
.hmm_invalidate = aie2_hmm_invalidate,
.debugfs = aie2_debugfs_init,
// TODO: cmd_wait can be removed when all caller move to cmd_get_out_fence
.cmd_get_out_fence = aie2_cmd_get_out_fence,
};
1 change: 1 addition & 0 deletions src/driver/amdxdna/aie2_pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ struct amdxdna_hwctx_priv {

struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
struct workqueue_struct *submit_wq;
struct drm_syncobj *syncobj;
};

struct async_events;
Expand Down
1 change: 1 addition & 0 deletions src/driver/amdxdna/amdxdna_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
goto free_name;
}
args->handle = hwctx->id;
args->syncobj_handle = hwctx->syncobj_hdl;
args->umq_doorbell = hwctx->doorbell_offset;
mutex_unlock(&xdna->dev_lock);

Expand Down
2 changes: 2 additions & 0 deletions src/driver/amdxdna/amdxdna_ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ struct amdxdna_hwctx {
u64 completed ____cacheline_aligned_in_smp;
/* For TDR worker to keep last completed. low frequency update */
u64 tdr_last_completed;
/* For command completion notification. */
u32 syncobj_hdl;
};

#define drm_job_to_xdna_job(j) \
Expand Down
2 changes: 1 addition & 1 deletion src/driver/amdxdna/amdxdna_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
mem.notifier);
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);

XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
XDNA_DBG(xdna, "Invalidating range 0x%llx, 0x%lx, type %d",
abo->mem.userptr, abo->mem.size, abo->type);

if (!mmu_notifier_range_blockable(range))
Expand Down
4 changes: 2 additions & 2 deletions src/include/uapi/drm_local/amdxdna_accel.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ struct amdxdna_qos_info {
* @mem_size: Size of AIE tile memory.
* @umq_doorbell: Returned offset of doorbell associated with UMQ.
* @handle: Returned hardware context handle.
* @pad: Structure padding.
* @syncobj_handle: The drm timeline syncobj handle for command completion notification.
*/
struct amdxdna_drm_create_hwctx {
__u64 ext;
Expand All @@ -102,7 +102,7 @@ struct amdxdna_drm_create_hwctx {
__u32 mem_size;
__u32 umq_doorbell;
__u32 handle;
__u32 pad;
__u32 syncobj_handle;
};

/**
Expand Down
15 changes: 15 additions & 0 deletions src/shim/hwctx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ create_ctx_on_device()

set_slotidx(arg.handle);
set_doorbell(arg.umq_doorbell);
set_syncobj(arg.syncobj_handle);

m_q->bind_hwctx(this);
}
Expand Down Expand Up @@ -254,4 +255,18 @@ get_doorbell() const
return m_doorbell;
}

void
hw_ctx::
set_syncobj(uint32_t syncobj)
{
m_syncobj = syncobj;
}

uint32_t
hw_ctx::
get_syncobj() const
{
return m_syncobj;
}

} // shim_xdna
7 changes: 7 additions & 0 deletions src/shim/hwctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class hw_ctx : public xrt_core::hwctx_handle
uint32_t
get_doorbell() const;

uint32_t
get_syncobj() const;

protected:
const device&
get_device();
Expand All @@ -80,6 +83,9 @@ class hw_ctx : public xrt_core::hwctx_handle
void
set_doorbell(uint32_t db);

void
set_syncobj(uint32_t syncobj);

void
create_ctx_on_device();

Expand All @@ -98,6 +104,7 @@ class hw_ctx : public xrt_core::hwctx_handle
uint32_t m_ops_per_cycle;
uint32_t m_num_cols;
uint32_t m_doorbell;
uint32_t m_syncobj;
std::unique_ptr<xrt_core::buffer_handle> m_log_bo;
void *m_log_buf;

Expand Down
54 changes: 46 additions & 8 deletions src/shim/hwq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,69 @@

namespace {

uint64_t abs_now_ns()
{
auto now = std::chrono::high_resolution_clock::now();
auto now_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(now);
return now_ns.time_since_epoch().count();
}

ert_packet *
get_chained_command_pkt(xrt_core::buffer_handle *boh)
{
auto cmdpkt = reinterpret_cast<ert_packet *>(boh->map(xrt_core::buffer_handle::map_type::write));
return cmdpkt->opcode == ERT_CMD_CHAIN ? cmdpkt : nullptr;
}

void
wait_cmd_syncobj(const shim_xdna::pdev& pdev, uint32_t syncobj, uint64_t seq, uint32_t timeout_ms)
{
int64_t timeout = std::numeric_limits<int64_t>::max();

if (timeout_ms) {
timeout = timeout_ms;
timeout *= 1000000;
timeout += abs_now_ns();
}
drm_syncobj_timeline_wait wsobj = {
.handles = reinterpret_cast<uintptr_t>(&syncobj),
.points = reinterpret_cast<uintptr_t>(&seq),
.timeout_nsec = timeout,
.count_handles = 1,
.flags = 0,
};
pdev.ioctl(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &wsobj);
}

void
wait_cmd_ioctl(const shim_xdna::pdev& pdev, uint32_t ctx_id, uint64_t seq, uint32_t timeout_ms)
{
amdxdna_drm_wait_cmd wcmd = {
.hwctx = ctx_id,
.timeout = timeout_ms,
.seq = seq,
};
pdev.ioctl(DRM_IOCTL_AMDXDNA_WAIT_CMD, &wcmd);
}

int
wait_cmd(const shim_xdna::pdev& pdev, const shim_xdna::hw_ctx *ctx,
xrt_core::buffer_handle *cmd, uint32_t timeout_ms)
{
int ret = 1;
auto boh = static_cast<shim_xdna::bo*>(cmd);
auto id = boh->get_cmd_id();
auto syncobj = ctx->get_syncobj();
auto ctx_id = ctx->get_slotidx();
auto seq = boh->get_cmd_id();

shim_debug("Waiting for cmd (%ld)...", id);

amdxdna_drm_wait_cmd wcmd = {
.hwctx = ctx->get_slotidx(),
.timeout = timeout_ms,
.seq = boh->get_cmd_id(),
};


try {
pdev.ioctl(DRM_IOCTL_AMDXDNA_WAIT_CMD, &wcmd);
if (syncobj != AMDXDNA_INVALID_FENCE_HANDLE)
wait_cmd_syncobj(pdev, syncobj, seq, timeout_ms);
else
wait_cmd_ioctl(pdev, ctx_id, seq, timeout_ms);
}
catch (const xrt_core::system_error& ex) {
if (ex.get_code() != ETIME)
Expand Down
Loading

0 comments on commit f60608b

Please sign in to comment.