Skip to content

ve2: add support for create/destroy hw context #560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/driver/amdxdna/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ amdxdna-$(OFT_CONFIG_AMDXDNA_OF) += \
amdxdna_gem_of.o \
ve2_of.o \
ve2_regs.o \
amdxdna_of_drv.o
amdxdna_of_drv.o \
ve2_solver.o \
ve2_mgmt.o \
ve2_hwctx.o

# Helper functions for amdxdna development, but not for upstreaming
amdxdna-y += amdxdna_devel.o
Expand Down
3 changes: 3 additions & 0 deletions src/driver/amdxdna/amdxdna_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ int amdxdna_drm_create_ctx_ioctl(struct drm_device *dev, void *data, struct drm_
if (!drm_dev_enter(dev, &idx))
return -ENODEV;

if (!xdna->dev_info->ops->ctx_init)
return -EOPNOTSUPP;

ret = amdxdna_pm_resume_get(dev->dev);
if (ret)
goto exit;
Expand Down
2 changes: 1 addition & 1 deletion src/driver/amdxdna/amdxdna_gem_of.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_fil
abo = to_xdna_obj(gobj);

/* For now we only support CMA memory*/
bo_phyaddr = (uint64_t)abo->base.dma_addr;
bo_phyaddr = (u64)abo->base.dma_addr;
bo_phyaddr += args->offset;

if (args->direction == SYNC_DIRECT_TO_DEVICE) {
Expand Down
24 changes: 21 additions & 3 deletions src/driver/amdxdna/amdxdna_of_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <drm/drm_managed.h>

#include "amdxdna_of_drv.h"
#include "ve2_res_solver.h"

static const struct of_device_id amdxdna_of_table[] = {
{ .compatible = "amdxdna,ve2", .data = &dev_ve2_info },
Expand All @@ -19,6 +20,7 @@ MODULE_DEVICE_TABLE(of, amdxdna_of_table);

static int amdxdna_of_probe(struct platform_device *pdev)
{
struct init_config xrs_cfg = { 0 };
struct device *dev = &pdev->dev;
const struct of_device_id *id;
struct amdxdna_dev *xdna;
Expand Down Expand Up @@ -59,19 +61,35 @@ static int amdxdna_of_probe(struct platform_device *pdev)
return ret;
}

if (!xdna->dev_handle) {
XDNA_ERR(xdna, "amdxdna device handle is null");
ret = -EINVAL;
goto out;
}

ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
XDNA_ERR(xdna, "DMA configuration failed: 0x%x\n", ret);
drm_dev_put(&xdna->ddev);
return ret;
goto out;
}

XDNA_WARN(xdna, "DMA configuration downgraded to 32bit Mask\n");
}

xrs_cfg.ddev = &xdna->ddev;
xrs_cfg.total_col = XRS_MAX_COL;
xdna->dev_handle->xrs_hdl = xrsm_init(&xrs_cfg);
if (!xdna->dev_handle->xrs_hdl) {
XDNA_ERR(xdna, "Initialization of Resource resolver failed");
ret = -EINVAL;
goto out;
}

return 0;
goto out:
drm_dev_put(&xdna->ddev);
return ret;
}

static void amdxdna_of_remove(struct platform_device *pdev)
Expand Down
241 changes: 241 additions & 0 deletions src/driver/amdxdna/ve2_host_queue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
/* SPDX-License-Identifier: GPL-2.0 */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Handshake region is got updated in CERT. Could you please check once and update accordingly

/*
* Copyright (C) 2025, Advanced Micro Devices, Inc.
*/

#define HOST_QUEUE_ENTRY 32
#define HOST_INDIRECT_PKT_NUM 36

struct exec_buf {
u16 cu_index;
u16 reserved0;
u32 dpu_control_code_host_addr_low;
u32 dpu_control_code_host_addr_high;
u16 args_len;
u16 reserved1;
u32 args_host_addr_low;
u32 args_host_addr_high;
};

struct host_queue_header {
u64 read_index;
struct {
u16 major;
u16 minor;
}
version;
//Queue capacity, must be a power of two.
u32 capacity;
u64 write_index;
u64 data_address;
};

struct host_indirect_packet_entry {
u32 host_addr_low;
u32 host_addr_high:25;
u32 uc_index:7;
};

enum host_queue_packet_type {
HOST_QUEUE_PACKET_TYPE_VENDOR_SPECIFIC = 0,
HOST_QUEUE_PACKET_TYPE_INVALID = 1,
};

enum host_queue_packet_opcode {
HOST_QUEUE_PACKET_EXEC_BUF = 1,
HOST_QUEUE_PACKET_TEST = 2,
HOST_QUEUE_PACKET_EXIT = 3,
};

struct common_header {
union {
struct {
u16 type: 8;
u16 barrier: 1;
u16 acquire_fence_scope: 2;
u16 release_fence_scope: 2;
};
u16 header;
};
u16 opcode;
u16 count;
u8 distribute;
u8 indirect;
};

struct xrt_packet_header {
struct common_header common_header;
u64 completion_signal;
};

struct host_queue_packet {
struct xrt_packet_header xrt_header;
u32 data[12];
};

typedef struct host_queue_header host_queue_header_t;
typedef enum host_queue_packet_type host_queue_packet_type_t;
typedef struct host_indirect_packet_entry host_indirect_packet_entry_t;
typedef struct host_queue_packet host_queue_packet_t;
typedef enum host_queue_packet_opcode host_queue_packet_opcode_t;
typedef struct xrt_packet_header xrt_packet_header_t;

struct host_queue_entry {
struct host_queue_header hq_header;
struct host_queue_packet hq_entry[HOST_QUEUE_ENTRY];
};

typedef struct host_queue_indirect_pkt {
struct common_header header;
struct exec_buf payload;
} host_queue_indirect_pkt_t;

typedef struct host_queue_indirect_hdr {
struct common_header header;
u32 data[HOST_INDIRECT_PKT_NUM * sizeof(host_indirect_packet_entry_t)];
} host_queue_indirect_hdr_t;

struct hsa_queue {
host_queue_header_t hq_header;
host_queue_packet_t hq_entry[HOST_QUEUE_ENTRY];
host_queue_indirect_hdr_t hq_indirect_hdr[HOST_QUEUE_ENTRY];
host_queue_indirect_pkt_t hq_indirect_pkt[HOST_QUEUE_ENTRY][HOST_INDIRECT_PKT_NUM];
};

struct ve2_hq_complete {
u64 *hqc_mem;
u64 hqc_dma_addr;
};

struct ve2_mem {
// mapped for user to access memory
u64 user_addr;
// addr for hardware to access, can be phy_t or dma_t
u64 dma_addr;
};

struct ve2_hsa_queue {
struct hsa_queue *hsa_queue_p;
struct ve2_mem hsa_queue_mem;
struct ve2_hq_complete hq_complete;
// protect hwctx idr
struct mutex hq_lock;
};

// Handshake packet structure format
#define ALIVE_MAGIC 0x404C5645
typedef struct {
u32 mpaie_alive; //0
u32 partition_base_address; //4
struct {
u32 partition_size:7; //8
u32 reserved:23; //8
u32 mode:1; //8
u32 uc_b:1; //8
}
aie_info;
u32 hsa_addr_high; //c
u32 hsa_addr_low; //10
u32 ctx_switch_req; //14
u32 hsa_location; //18
u32 cert_idle_status; //1c
u32 misc_status; //20
u32 log_addr_high; //24
u32 log_addr_low; //28
u32 log_buf_size; //2c
u32 host_time_high; //30
u32 host_time_low; //34
struct {
u32 dtrace_addr_high; //38
u32 dtrace_addr_low; //3c
}
trace;
struct {
u32 restore_page; //40
u32 pdi_id; //44
struct {
u16 page_index;
u16 page_len;
}
pdi_page; //48
}
ctx_save;
struct {
u32 hsa_addr_high; //4c
u32 hsa_addr_low; //50
}
dbg;
struct {
u32 dbg_buf_addr_high; //54
u32 dbg_buf_addr_low; //58
u32 size; //5c
}
dbg_buf;
volatile struct {
// number of checks whether there are jobs ready
u32 c_job_readiness_checked;
// number of opcode run
u32 c_opcode;
u32 c_job_launched;
u32 c_job_finished;
// number of hsa pkt handled
u32 c_hsa_pkt;
// number of pages loaded
u32 c_page;
// number of hsa doorbell ring
u32 c_doorbell;
// number of uc memory(PM) scrub
u32 c_uc_scrub;
// number of tct requested
u32 c_tct_requested;
// number of tct received
u32 c_tct_received;
// run out of wait handle UC_DMA_WRITE_DES opcode
u16 c_preemption_ucdma;
// run out of wait handle UC_DMA_WRITE_DES_SYNC opcode
u16 c_preemption_ucdma_sync;
// POLL_32 opcode retry times
u16 c_preemption_poll;
// MASK_POLL_32 opcode retry times
u16 c_preemption_mask_poll;
// run out of physical barrier REMOTE_BARRIER opcode
u16 c_preemption_remote_barrier;
// actor entry overflow or run out of wait handle WAIT_TCTS opcode
u16 c_preemption_wait_tct;
// block UC_DMA_WRITE_DES opcode
u16 c_block_ucdma;
// block UC_DMA_WRITE_DES_SYNC opcode
u16 c_block_ucdma_sync;
// block local_barrier opcode
u16 c_block_local_barrier;
// block REMOTE_BARRIER opcode
u16 c_block_remote_barrier;
// block WAIT_TCTS opcode
u16 c_block_wait_tct;
// number of slow actor entry lookup
u16 c_actor_hash_conflict;
}
counter;
volatile struct {
u32 fw_state;
//absolute index of page where current control code are in
u32 abs_page_index;
// previous pc (relative addr to current page) that drives current_job_ctxt to NULL
u32 ppc;
}
vm;
volatile struct
{
//exception address
u32 ear;
//exception status
u32 esr;
//exception pc
u32 pc;
}
exception;
#ifdef PDI_LOAD_TEST
u32 test_pdi_addr_high;
u32 test_pdi_addr_low;
#endif
} handshake_t;
Loading