[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251104185340.897560-1-lizhi.hou@amd.com>
Date: Tue, 4 Nov 2025 10:53:39 -0800
From: Lizhi Hou <lizhi.hou@....com>
To: <ogabbay@...nel.org>, <quic_jhugo@...cinc.com>,
<maciej.falkowski@...ux.intel.com>, <dri-devel@...ts.freedesktop.org>
CC: Lizhi Hou <lizhi.hou@....com>, <linux-kernel@...r.kernel.org>,
<max.zhen@....com>, <sonal.santan@....com>, <mario.limonciello@....com>
Subject: [PATCH V1] accel/amdxdna: Support preemption requests
The driver checks the firmware version during initialization.If preemption
is supported, the driver configures preemption accordingly and handles
userspace preemption requests. Otherwise, the driver returns an error for
userspace preemption requests.
Signed-off-by: Lizhi Hou <lizhi.hou@....com>
---
drivers/accel/amdxdna/aie2_message.c | 95 +++++++++++++++++++++++++
drivers/accel/amdxdna/aie2_msg_priv.h | 3 +
drivers/accel/amdxdna/aie2_pci.c | 63 ++++++++++++++++
drivers/accel/amdxdna/aie2_pci.h | 8 +++
drivers/accel/amdxdna/amdxdna_ctx.h | 17 +++++
drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +-
drivers/accel/amdxdna/npu4_regs.c | 4 ++
include/uapi/drm/amdxdna_accel.h | 16 ++++-
8 files changed, 207 insertions(+), 2 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 69cdce9ff208..d493bb1c3360 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -210,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
hwctx->fw_ctx_id = resp.context_id;
WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+ if (ndev->force_preempt_enabled) {
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
+ return ret;
+ }
+ }
+
cq_pair = &resp.cq_pair[0];
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
@@ -601,6 +609,11 @@ aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
return 0;
}
+static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ return -EOPNOTSUPP;
+}
+
static u32 aie2_get_chain_msg_op(u32 cmd_op)
{
switch (cmd_op) {
@@ -621,6 +634,8 @@ static struct aie2_exec_msg_ops legacy_exec_message_ops = {
.init_chain_req = aie2_init_exec_chain_req,
.fill_cf_slot = aie2_cmdlist_fill_cf,
.fill_dpu_slot = aie2_cmdlist_fill_dpu,
+ .fill_preempt_slot = aie2_cmdlist_unsupp,
+ .fill_elf_slot = aie2_cmdlist_unsupp,
.get_chain_msg_op = aie2_get_chain_msg_op,
};
@@ -680,6 +695,74 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
return 0;
}
+static int
+aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, pd->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_preempt_data *pd;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*pd);
+ if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_ELF;
+ npu_slot->inst_buf_addr = pd->inst_buf;
+ npu_slot->save_buf_addr = pd->save_buf;
+ npu_slot->restore_buf_addr = pd->restore_buf;
+ npu_slot->inst_size = pd->inst_size;
+ npu_slot->save_size = pd->save_size;
+ npu_slot->restore_size = pd->restore_size;
+ npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+ npu_slot->arg_cnt = 1;
+ npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
+
+ *size = struct_size(npu_slot, args, npu_slot->arg_cnt);
+ return 0;
+}
+
static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
{
return MSG_OP_CHAIN_EXEC_NPU;
@@ -691,6 +774,8 @@ static struct aie2_exec_msg_ops npu_exec_message_ops = {
.init_chain_req = aie2_init_npu_chain_req,
.fill_cf_slot = aie2_cmdlist_fill_npu_cf,
.fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+ .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
+ .fill_elf_slot = aie2_cmdlist_fill_npu_elf,
.get_chain_msg_op = aie2_get_npu_chain_msg_op,
};
@@ -749,6 +834,16 @@ aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
case ERT_START_NPU:
ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
break;
+ case ERT_START_NPU_PREEMPT:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU_PREEMPT_ELF:
+ if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+ return -EOPNOTSUPP;
+ ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
+ break;
default:
XDNA_INFO(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 947daa63f064..1c957a6298d3 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -176,6 +176,8 @@ struct exec_dpu_req {
enum exec_npu_type {
EXEC_NPU_TYPE_NON_ELF = 0x1,
EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
+ EXEC_NPU_TYPE_PREEMPT = 0x3,
+ EXEC_NPU_TYPE_ELF = 0x4,
};
union exec_req {
@@ -372,6 +374,7 @@ struct cmd_chain_slot_dpu {
};
#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3
struct cmd_chain_slot_npu {
enum exec_npu_type type;
u64 inst_buf_addr;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index d7ccbdaf47f5..ceef1c502e9e 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -183,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
if (cfg->category != category)
continue;
+ if (cfg->feature_mask &&
+ bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX))
+ continue;
+
value = val ? *val : cfg->value;
ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
if (ret) {
@@ -932,6 +936,25 @@ static int aie2_get_telemetry(struct amdxdna_client *client,
return 0;
}
+static int aie2_get_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_attribute_state state = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE)
+ state.state = ndev->force_preempt_enabled;
+ else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE)
+ state.state = ndev->frame_boundary_preempt;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
{
struct amdxdna_dev *xdna = client->xdna;
@@ -972,6 +995,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
ret = aie2_query_resource_info(client, args);
break;
+ case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE:
+ case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE:
+ ret = aie2_get_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
@@ -1078,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client,
return aie2_pm_set_mode(xdna->dev_handle, power_mode);
}
+static int aie2_set_preempt_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
+ struct amdxdna_drm_attribute_state state;
+ u32 val;
+ int ret;
+
+ if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state)))
+ return -EFAULT;
+
+ if (state.state > 1)
+ return -EINVAL;
+
+ if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad)))
+ return -EINVAL;
+
+ if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) {
+ ndev->force_preempt_enabled = state.state;
+ } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) {
+ val = state.state;
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+ &val);
+ if (ret)
+ return ret;
+
+ ndev->frame_boundary_preempt = state.state;
+ }
+
+ return 0;
+}
+
static int aie2_set_state(struct amdxdna_client *client,
struct amdxdna_drm_set_state *args)
{
@@ -1095,6 +1154,10 @@ static int aie2_set_state(struct amdxdna_client *client,
case DRM_AMDXDNA_SET_POWER_MODE:
ret = aie2_set_power_mode(client, args);
break;
+ case DRM_AMDXDNA_SET_FORCE_PREEMPT:
+ case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT:
+ ret = aie2_set_preempt_state(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 9793cd1e0c55..a5f9c42155d1 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -110,12 +110,15 @@ struct aie_metadata {
enum rt_config_category {
AIE2_RT_CFG_INIT,
AIE2_RT_CFG_CLK_GATING,
+ AIE2_RT_CFG_FORCE_PREEMPT,
+ AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
};
struct rt_config {
u32 type;
u32 value;
u32 category;
+ unsigned long feature_mask;
};
struct dpm_clk_freq {
@@ -164,6 +167,8 @@ struct aie2_exec_msg_ops {
void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
u32 (*get_chain_msg_op)(u32 cmd_op);
};
@@ -197,6 +202,8 @@ struct amdxdna_dev_hdl {
u32 hclk_freq;
u32 max_tops;
u32 curr_tops;
+ u32 force_preempt_enabled;
+ u32 frame_boundary_preempt;
/* Mailbox and the management channel */
struct mailbox *mbox;
@@ -223,6 +230,7 @@ struct aie2_hw_ops {
enum aie2_fw_feature {
AIE2_NPU_COMMAND,
+ AIE2_PREEMPT,
AIE2_FEATURE_MAX
};
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index d02fb32499fa..b6151244d64f 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -16,6 +16,8 @@ enum ert_cmd_opcode {
ERT_START_CU = 0,
ERT_CMD_CHAIN = 19,
ERT_START_NPU = 20,
+ ERT_START_NPU_PREEMPT = 21,
+ ERT_START_NPU_PREEMPT_ELF = 22,
ERT_INVALID_CMD = ~0U,
};
@@ -55,6 +57,21 @@ struct amdxdna_cmd_chain {
u64 data[] __counted_by(command_count);
};
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_preempt_data {
+ u64 inst_buf; /* instruction buffer address */
+ u64 save_buf; /* save buffer address */
+ u64 restore_buf; /* restore buffer address */
+ u32 inst_size; /* size of instruction buffer in bytes */
+ u32 save_size; /* size of save buffer in bytes */
+ u32 restore_size; /* size of restore buffer in bytes */
+ u32 inst_prop_cnt; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
/* Exec buffer command header format */
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 7590265d4485..1973ab67721b 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -31,9 +31,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
* 0.3: Support firmware debug buffer
* 0.4: Support getting resource information
* 0.5: Support getting telemetry data
+ * 0.6: Support preemption
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 5
+#define AMDXDNA_DRIVER_MINOR 6
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index d90777275a9f..986a5f28ba24 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -64,10 +64,13 @@
const struct rt_config npu4_default_rt_cfg[] = {
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
{ 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
+ { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
+ { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
{ 0 },
};
@@ -85,6 +88,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
{ .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+ { .feature = AIE2_PREEMPT, .min_minor = 12 },
{ 0 }
};
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 8ad254bc35a5..62c917fd4f7b 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -443,7 +443,9 @@ enum amdxdna_drm_get_param {
DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
DRM_AMDXDNA_GET_POWER_MODE,
DRM_AMDXDNA_QUERY_TELEMETRY,
- DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12,
+ DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE,
+ DRM_AMDXDNA_QUERY_RESOURCE_INFO,
+ DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE,
};
/**
@@ -462,6 +464,16 @@ struct amdxdna_drm_get_resource_info {
__u64 npu_task_curr;
};
+/**
+ * struct amdxdna_drm_attribute_state - State of an attribute
+ */
+struct amdxdna_drm_attribute_state {
+ /** @state: enabled or disabled */
+ __u8 state;
+ /** @pad: MBZ */
+ __u8 pad[7];
+};
+
/**
* struct amdxdna_drm_query_telemetry_header - Telemetry data header
*/
@@ -613,6 +625,8 @@ enum amdxdna_drm_set_param {
DRM_AMDXDNA_SET_POWER_MODE,
DRM_AMDXDNA_WRITE_AIE_MEM,
DRM_AMDXDNA_WRITE_AIE_REG,
+ DRM_AMDXDNA_SET_FORCE_PREEMPT,
+ DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT,
};
/**
--
2.34.1
Powered by blists - more mailing lists