[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250409173033.2261755-2-nipun.gupta@amd.com>
Date: Wed, 9 Apr 2025 23:00:32 +0530
From: Nipun Gupta <nipun.gupta@....com>
To: <dri-devel@...ts.freedesktop.org>, <devicetree@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <krzk+dt@...nel.org>,
<gregkh@...uxfoundation.org>, <robh@...nel.org>, <conor+dt@...nel.org>,
<ogabbay@...nel.org>, <maarten.lankhorst@...ux.intel.com>,
<mripard@...nel.org>, <tzimmermann@...e.de>, <airlied@...il.com>,
<simona@...ll.ch>, <derek.kiernan@....com>, <dragan.cvetic@....com>,
<arnd@...db.de>
CC: <praveen.jain@....com>, <harpreet.anand@....com>,
<nikhil.agarwal@....com>, <srivatsa@...il.mit.edu>, <code@...icks.com>,
<ptsm@...ux.microsoft.com>, Nipun Gupta <nipun.gupta@....com>
Subject: [PATCH v2 2/3] accel/amdpk: add driver for AMD PKI accelerator
The AMD PKI accelerator driver provides a accel interface to interact
with the device for offloading and accelerating asymmetric crypto
operations.
Signed-off-by: Nipun Gupta <nipun.gupta@....com>
---
Changes RFC->v2:
- moved from misc to accel
- added architecture and compile test dependency in Kconfig
- removed sysfs (and added debugfs in new patch 3/3)
- fixed platform compat
- removed redundant resource index 1 configuration (which was there in
RFC patch)
MAINTAINERS | 2 +
drivers/accel/Kconfig | 1 +
drivers/accel/Makefile | 1 +
drivers/accel/amdpk/Kconfig | 18 +
drivers/accel/amdpk/Makefile | 8 +
drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++
drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++
include/uapi/drm/amdpk.h | 49 +++
8 files changed, 1086 insertions(+)
create mode 100644 drivers/accel/amdpk/Kconfig
create mode 100644 drivers/accel/amdpk/Makefile
create mode 100644 drivers/accel/amdpk/amdpk_drv.c
create mode 100644 drivers/accel/amdpk/amdpk_drv.h
create mode 100644 include/uapi/drm/amdpk.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 11f8815daa77..cdc305a206aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1161,6 +1161,8 @@ L: dri-devel@...ts.freedesktop.org
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml
+F: drivers/accel/amdpk/
+F: include/uapi/drm/amdpk.h
AMD PMC DRIVER
M: Shyam Sundar S K <Shyam-sundar.S-k@....com>
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 5b9490367a39..5632c6c62c15 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
+source "drivers/accel/amdpk/Kconfig"
endif
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index a301fb6089d4..caea6d636ac8 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
+obj-$(CONFIG_DRM_ACCEL_AMDPK) += amdpk/
diff --git a/drivers/accel/amdpk/Kconfig b/drivers/accel/amdpk/Kconfig
new file mode 100644
index 000000000000..c0b459bb66a7
--- /dev/null
+++ b/drivers/accel/amdpk/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for AMD PKI accelerator for versal-net
+#
+
+config DRM_ACCEL_AMDPK
+ tristate "AMD PKI accelerator for versal-net"
+ depends on DRM_ACCEL
+ depends on ARM64 || COMPILE_TEST
+ help
+ Enables platform driver for AMD PKI accelerator that are designed
+ for high performance Public Key asymmetric crypto operations on AMD
+ versal-net.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called amdpk.
diff --git a/drivers/accel/amdpk/Makefile b/drivers/accel/amdpk/Makefile
new file mode 100644
index 000000000000..826f43ccebdf
--- /dev/null
+++ b/drivers/accel/amdpk/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for AMD PKI accelerator for versal-net
+#
+
+obj-$(CONFIG_DRM_ACCEL_AMDPK) := amdpk.o
+
+amdpk-y := amdpk_drv.o
diff --git a/drivers/accel/amdpk/amdpk_drv.c b/drivers/accel/amdpk/amdpk_drv.c
new file mode 100644
index 000000000000..17c328d03db8
--- /dev/null
+++ b/drivers/accel/amdpk/amdpk_drv.c
@@ -0,0 +1,736 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2021 Silex Insight sa
+ * Copyright (c) 2018-2021 Beerten Engineering scs
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+/*
+ * Device Overview
+ * ===============
+ * AMD PKI accelerator is a device on AMD versal-net to execute public
+ * key asymmetric crypto operations like ECDSA, ECDH, RSA etc. with high
+ * performance. The driver provides accel interface to applications for
+ * configuring the device and performing the required operations. AMD PKI
+ * device comprises of multiple Barco Silex ba414 PKI engines bundled together,
+ * and providing a queue based interface to interact with these devices on AMD
+ * versal-net.
+ *
+ * Following figure provides the brief overview of the device interface with
+ * the software:
+ *
+ * +------------------+
+ * | Software |
+ * +------------------+
+ * | |
+ * | v
+ * | +-----------------------------------------------------------+
+ * | | RAM |
+ * | | +----------------------------+ +---------------------+ |
+ * | | | RQ pages | | CQ pages | |
+ * | | | +------------------------+ | | +-----------------+ | |
+ * | | | | START (cmd) | | | | req_id | status | | |
+ * | | | | TFRI (addr, sz)---+ | | | | req_id | status | | |
+ * | | | | +-TFRO (addr, sz) | | | | | ... | | |
+ * | | | | | NTFY (req_id) | | | | +-----------------+ | |
+ * | | | +-|-------------------|--+ | | | |
+ * | | | | v | +---------------------+ |
+ * | | | | +-----------+ | |
+ * | | | | | input | | |
+ * | | | | | data | | |
+ * | | | v +-----------+ | |
+ * | | | +----------------+ | |
+ * | | | | output data | | |
+ * | | | +----------------+ | |
+ * | | +----------------------------+ |
+ * | | |
+ * | +-----------------------------------------------------------+
+ * |
+ * |
+ * +---|----------------------------------------------------+
+ * | v AMD PKI device |
+ * | +-------------------+ +------------------------+ |
+ * | | New request FIFO | --> | PK engines | |
+ * | +-------------------+ +------------------------+ |
+ * +--------------------------------------------------------+
+ *
+ * To perform a crypto operation, the software writes a sequence of descriptors,
+ * into the RQ memory. This includes input data and designated location for the
+ * output data. After preparing the request, request offset (from the RQ memory
+ * region) is written into the NEW_REQUEST register. Request is then stored in a
+ * common hardware FIFO shared among all RQs.
+ *
+ * When a PK engine becomes available, device pops the request from the FIFO and
+ * fetches the descriptors. It DMAs the input data from RQ memory and executes
+ * the necessary computations. After computation is complete, the device writes
+ * output data back to RAM via DMA. Device then writes a new entry in CQ ring
+ * buffer in RAM, indicating completion of the request. Device also generates
+ * an interrupt for notifying completion to the software.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/eventfd.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_ioctl.h>
+
+#include "amdpk_drv.h"
+
+#define DRIVER_NAME "amdpk"
+
+static void amdpk_init_cq(struct amdpk_dev *pkdev, struct amdpk_cq *cq,
+ int szcode, char *base)
+{
+ cq->pkdev = pkdev;
+ cq->generation = 1;
+ cq->szcode = szcode;
+ cq->base = (u32 *)base;
+ cq->tail = 0;
+}
+
+static int amdpk_pop_cq(struct amdpk_cq *cq, int *rid)
+{
+ u32 status = CQ_STATUS_VALID;
+ unsigned int sz;
+ u32 completion;
+
+ completion = cq->base[cq->tail + 1];
+ if ((completion & CQ_GENERATION_BIT) != cq->generation)
+ return CQ_STATUS_INVALID;
+
+ *rid = (completion >> 16) & 0xffff;
+ /* read memory barrier: to avoid a race condition, the status field should
+ * not be read before the completion generation bit. Otherwise we could
+ * get stale outdated status data.
+ */
+ rmb();
+ status |= cq->base[cq->tail];
+ /* advance completion queue tail */
+ cq->tail += 2;
+ sz = 1 << (cq->szcode - 2);
+ if (cq->tail >= sz) {
+ cq->tail = 0;
+ cq->generation ^= 1; /* invert generation bit */
+ }
+
+ /* evaluate status from the completion queue */
+ if (completion & CQ_COMPLETION_BIT)
+ status |= CQ_COMPLETION_ERROR;
+
+ return status;
+}
+
+static int amdpk_trigpos(struct amdpk_cq *cq)
+{
+ int trigpos;
+
+ /* Set trigger position on next completed operation */
+ trigpos = cq->tail / 2 + (cq->generation << (cq->szcode - 3));
+ trigpos++;
+ trigpos &= (1 << (cq->szcode - 2)) - 1;
+
+ return trigpos;
+}
+
+static void amdpk_cq_workfn(struct kthread_work *work)
+{
+ struct amdpk_work *pkwork;
+ struct amdpk_dev *pkdev;
+ struct amdpk_user *user;
+ int qid, rid, trigpos;
+ u32 status;
+
+ pkwork = to_amdpk_work(work);
+ pkdev = pkwork->pkdev;
+ qid = pkwork->qid;
+
+ user = pkwork->user;
+ status = amdpk_pop_cq(&pkdev->work[qid]->pk_cq, &rid);
+ if (rid < user->rq_entries && status != CQ_STATUS_INVALID) {
+ u32 *status_mem;
+
+ status_mem = (u32 *)user->stmem;
+ status_mem[rid] = status;
+ eventfd_signal(user->evfd_ctx[rid]);
+ }
+
+ trigpos = amdpk_trigpos(&pkdev->work[qid]->pk_cq);
+ pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(user->qid), trigpos);
+}
+
+static irqreturn_t amdpk_cq_irq(int irq, void *dev)
+{
+ struct amdpk_dev *pkdev = (struct amdpk_dev *)dev;
+ u64 active = 0;
+ int i;
+
+ active = pk_rdreg(pkdev->regs, REG_PK_IRQ_STATUS);
+ pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, active);
+
+ for (i = 0; i < pkdev->max_queues && active; i++, active >>= 1) {
+ if (!(active & 1))
+ continue;
+ if (!pkdev->users[i])
+ continue;
+ kthread_queue_work(pkdev->work[i]->cq_wq, &pkdev->work[i]->cq_work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void amdpk_free_rqmem(struct amdpk_dev *pkdev, struct amdpk_user *user)
+{
+ int pages = user->rq_pages;
+ int pagemult = pages / 4;
+ int i;
+
+ for (i = 0; i < pages / pagemult; i++) {
+ if (!user->rqmem[i])
+ continue;
+ dma_free_coherent(pkdev->dev, PAGE_SIZE * pagemult,
+ user->rqmem[i], user->physrq[i]);
+ user->rqmem[i] = NULL;
+ }
+}
+
+static int amdpk_accel_get_info(struct drm_device *dev, void *data, struct drm_file *fp)
+{
+ struct amdpk_user *user = fp->driver_priv;
+ struct amdpk_dev *pkdev = user->pkdev;
+ struct amdpk_info *info = data;
+
+ info->avail_qdepth = atomic_read(&pkdev->avail_qdepth);
+ return 0;
+}
+
+static int amdpk_accel_configure(struct amdpk_user *user, struct amdpk_conf *conf)
+{
+ struct amdpk_dev *pkdev = user->pkdev;
+ struct amdpk_work *pkwork = NULL;
+ int qid = user->qid;
+ int trigpos, ret, i;
+ char wq_name[32];
+
+ i = atomic_sub_return(conf->qdepth, &pkdev->avail_qdepth);
+ if (i < 0) {
+ /* If enough entries are not present, give back the reserved entries. */
+ dev_err(user->pkdev->dev, "Out of descriptors\n");
+ atomic_add(conf->qdepth, &pkdev->avail_qdepth);
+ return -ENOSPC;
+ }
+ user->rq_entries = conf->qdepth;
+
+ for (i = 0; i < user->rq_entries; i++) {
+ if (conf->eventfd[i] <= 0) {
+ dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ user->evfd_ctx[i] = eventfd_ctx_fdget(conf->eventfd[i]);
+ if (IS_ERR(user->evfd_ctx[i])) {
+ dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]);
+ ret = PTR_ERR(user->evfd_ctx[i]);
+ goto fail;
+ }
+ }
+
+ user->cqmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physcq, GFP_KERNEL);
+ if (!user->cqmem) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* Initialize completion queue handler */
+ pkwork = pkdev->work[qid];
+ amdpk_init_cq(pkdev, &pkwork->pk_cq, __builtin_ctz(PAGE_SIZE), user->cqmem);
+
+ snprintf(wq_name, sizeof(wq_name), "cq_worker_%d", qid);
+ pkwork->cq_wq = kthread_create_worker(0, wq_name);
+ if (IS_ERR(pkwork->cq_wq)) {
+ ret = PTR_ERR(pkwork->cq_wq);
+ pkwork->cq_wq = NULL;
+ goto fail;
+ }
+ kthread_init_work(&pkwork->cq_work, amdpk_cq_workfn);
+
+ pk_wrreg(pkdev->regs, REG_CQ_CFG_IRQ_NR(qid), qid);
+ pk_wrreg(pkdev->regs, REG_CQ_CFG_ADDR(qid), user->physcq);
+ pk_wrreg(pkdev->regs, REG_CQ_CFG_SIZE(qid), PAGE_SHIFT);
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_CQID(qid), qid);
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_DEPTH(qid), user->rq_entries);
+
+ /* set trigger position for notifications */
+ trigpos = amdpk_trigpos(&pkwork->pk_cq);
+ pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(qid), trigpos);
+
+ return 0;
+fail:
+ if (pkwork->cq_wq) {
+ kthread_destroy_worker(pkwork->cq_wq);
+ pkwork->cq_wq = NULL;
+ }
+ if (user->cqmem) {
+ dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq);
+ user->cqmem = NULL;
+ }
+ atomic_add(user->rq_entries, &pkdev->avail_qdepth);
+ user->rq_entries = 0;
+
+ return ret;
+}
+
+static int amdpk_accel_set_conf(struct drm_device *dev, void *data, struct drm_file *fp)
+{
+ struct amdpk_user *user = fp->driver_priv;
+ struct amdpk_conf *conf = data;
+ int ret;
+
+ if (conf->qdepth == 0 || conf->qdepth > MAX_CQ_ENTRIES_ON_PAGE) {
+ dev_err(user->pkdev->dev, "Invalid qdepth: %d\n", conf->qdepth);
+ return -EINVAL;
+ }
+
+ if (user->configured) {
+ dev_err(user->pkdev->dev, "User already configured\n");
+ return -EEXIST;
+ }
+
+ ret = amdpk_accel_configure(user, conf);
+ if (ret)
+ return ret;
+
+ user->configured = true;
+ return 0;
+}
+
+static int amdpk_mmap_regs(struct vm_area_struct *vma)
+{
+ struct amdpk_user *user = vma->vm_private_data;
+ struct amdpk_dev *pkdev = user->pkdev;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return io_remap_pfn_range(vma, vma->vm_start,
+ (pkdev->regsphys + REG_CTL_BASE(user->qid)) >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
+static int mmap_dmamem(struct vm_area_struct *vma, struct amdpk_dev *pkdev,
+ void *addr, dma_addr_t phys, off_t offset, size_t sz)
+{
+ unsigned long vmstart = vma->vm_start;
+ unsigned long pgoff = vma->vm_pgoff;
+ int ret;
+
+ vma->vm_pgoff = 0;
+ vma->vm_start = vmstart + offset;
+ vma->vm_end = vma->vm_start + sz;
+ ret = dma_mmap_coherent(pkdev->dev, vma, addr, phys, sz);
+ vma->vm_pgoff = pgoff;
+ vma->vm_start = vmstart;
+
+ return ret;
+}
+
+static int amdpk_mmap_mem(struct vm_area_struct *vma)
+{
+ struct amdpk_user *user = vma->vm_private_data;
+ struct amdpk_dev *pkdev = user->pkdev;
+ int pagemult, pagemultshift;
+ int requested_pages;
+ int qid = user->qid;
+ int ret, i;
+
+ if (!user->configured) {
+ dev_err(pkdev->dev, "configuration not found!");
+ return -ENODEV;
+ }
+ /* Mapping already done */
+ if (user->stmem) {
+ dev_err(pkdev->dev, "memory already mapped\n");
+ return -EINVAL;
+ }
+
+ requested_pages = vma_pages(vma);
+ /* As the last page is reserved for the status and the starting ones are for
+ * the rq, the mmap must be at least 2 pages big.
+ */
+ if (requested_pages < 2) {
+ dev_err(pkdev->dev, "Invalid request pages: %d\n", requested_pages);
+ return -EINVAL;
+ }
+ /* Store number of rq pages. 1 page is reserved for status */
+ user->rq_pages = requested_pages - 1;
+ /* Requests memory can have up to 4 hardware pages. All hardware pages have the
+ * same size. If requesting more than 4 OS pages, the hardware pages will use
+ * the same multiple (pagemult) of OS pages. Thus the requested size for the
+ * request queue must be a multiple of pagemult.
+ */
+ pagemult = (requested_pages - 1 + 3) / 4;
+ if ((requested_pages - 1) % pagemult != 0) {
+ dev_err(pkdev->dev, "requested pages: %d not multiple of page multiplier: %d\n",
+ requested_pages, pagemult);
+ return -EINVAL;
+ }
+ /* hardware page size must be a power of 2, and as a consequence pagemult too. */
+ if ((pagemult & (pagemult - 1)) != 0) {
+ dev_err(pkdev->dev, "page multiplier: %d is not power of 2\n", pagemult);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < (requested_pages - 1) / pagemult; i++) {
+ user->rqmem[i] = dma_alloc_coherent(pkdev->dev, PAGE_SIZE * pagemult,
+ &user->physrq[i], GFP_KERNEL);
+ if (!user->rqmem[i]) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[i]);
+ }
+
+ user->stmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physst, GFP_KERNEL);
+ if (!user->stmem) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* Configure unused rq pages with start of allocated shared mem. Those should not
+ * be accessed, but if descriptors of a (malicious) user writes descriptors for
+ * those pages, it will not break the rest of the system.
+ */
+ for (i = (requested_pages - 1) / pagemult; i < MAX_RQMEM_PER_QUEUE; i++)
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[0]);
+
+ pagemultshift = pagemult - 1;
+ pagemultshift = (pagemultshift & 5) + ((pagemultshift & 0xa) >> 1);
+ pagemultshift = (pagemultshift & 3) + ((pagemultshift >> 2) & 3);
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE_SIZE(qid), PAGE_SHIFT + pagemultshift);
+ pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGES_WREN(qid),
+ (1 << ((requested_pages - 1) / pagemult)));
+
+ ret = mmap_dmamem(vma, pkdev, user->stmem, user->physst, 0, PAGE_SIZE);
+ if (ret)
+ goto fail;
+ for (i = 0; i < (requested_pages - 1) / pagemult; i++) {
+ ret = mmap_dmamem(vma, pkdev, user->rqmem[i], user->physrq[i],
+ (i * pagemult + 1) * PAGE_SIZE, PAGE_SIZE * pagemult);
+ if (ret)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ amdpk_free_rqmem(pkdev, user);
+ if (user->stmem) {
+ dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst);
+ user->stmem = NULL;
+ }
+ return ret;
+}
+
+static int amdpk_accel_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct drm_file *dfp = fp->private_data;
+ struct amdpk_user *user;
+ int ret = 0;
+
+ user = dfp->driver_priv;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+
+ vma->vm_private_data = user;
+
+ switch (vma->vm_pgoff) {
+ case AMDPK_MMAP_REGS:
+ ret = amdpk_mmap_regs(vma);
+ break;
+ case AMDPK_MMAP_MEM:
+ ret = amdpk_mmap_mem(vma);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int amdpk_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct amdpk_work *pkwork = NULL;
+ struct amdpk_user *user = NULL;
+ struct amdpk_dev *pkdev;
+ int ret, qid;
+
+ pkdev = to_amdpk_dev(dev);
+ qid = ida_alloc_range(&pkdev->avail_queues, 0, pkdev->max_queues - 1, GFP_KERNEL);
+ if (qid < 0)
+ return -ENOSPC;
+
+ get_device(pkdev->dev);
+
+ user = kzalloc(sizeof(*user), GFP_KERNEL);
+ if (!user) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ user->pkdev = pkdev;
+ user->qid = qid;
+ user->rq_entries = 0;
+ file->driver_priv = user;
+ pkdev->users[qid] = user;
+
+ pkwork = kzalloc(sizeof(*pkwork), GFP_KERNEL);
+ if (!pkwork) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ pkwork->qid = qid;
+ pkwork->pkdev = pkdev;
+ pkwork->user = user;
+ pkdev->work[qid] = pkwork;
+
+ return 0;
+
+fail:
+ kfree(user);
+ kfree(pkwork);
+ ida_free(&pkdev->avail_queues, qid);
+ put_device(pkdev->dev);
+ return ret;
+}
+
+static void amdpk_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct amdpk_user *user = file->driver_priv;
+ struct amdpk_dev *pkdev = user->pkdev;
+ char __iomem *regs = pkdev->regs;
+
+ /* Set pkdev->users[qid] to NULL first, so that Completion interrupt handler gets
+ * to know that this user will not exists, and so it does not schedule any completion
+ * work on cq worker kthread.
+ */
+ pkdev->users[user->qid] = NULL;
+
+ if (user->configured) {
+ unsigned int attempts = 0;
+
+ /* Disable RQCQ pages to help the hardware finish potential
+ * pending requests sooner.
+ */
+ pk_wrreg(regs, REG_RQ_CFG_PAGE_SIZE(user->qid), 0);
+ pk_wrreg(regs, REG_RQ_CFG_PAGES_WREN(user->qid), 0);
+ pk_wrreg(regs, REG_CQ_CFG_SIZE(user->qid), 0);
+
+ /* The hardware does not have a flush mechanism for the requests pending
+ * in the RQ. Instead check periodically with REG_CTL_PENDING_REQS if the
+ * user still has requests going on. If the hardware never completes the
+ * requests, abort after a MAX_FLUSH_WAIT_ATTEMPTS and don't free resources.
+ */
+ while (pk_rdreg(regs, REG_CTL_BASE(user->qid) + REG_CTL_PENDING_REQS)) {
+ attempts++;
+ if (attempts > MAX_FLUSH_WAIT_ATTEMPTS) {
+ dev_err(pkdev->dev,
+ "Time out waiting for hw completions. Resources leaked.\n");
+ goto abort_cleanup;
+ }
+ msleep(20);
+ }
+
+ if (pkdev->work[user->qid]->cq_wq) {
+ kthread_cancel_work_sync(&pkdev->work[user->qid]->cq_work);
+ kthread_destroy_worker(pkdev->work[user->qid]->cq_wq);
+ }
+
+ amdpk_free_rqmem(pkdev, user);
+ if (user->cqmem) {
+ dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq);
+ user->cqmem = NULL;
+ }
+ if (user->stmem) {
+ dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst);
+ user->stmem = NULL;
+ }
+
+ atomic_add(user->rq_entries, &pkdev->avail_qdepth);
+ }
+ ida_free(&pkdev->avail_queues, user->qid);
+
+abort_cleanup:
+ put_device(pkdev->dev);
+ kfree(pkdev->work[user->qid]);
+ pkdev->work[user->qid] = NULL;
+ kfree(user);
+}
+
+static const struct drm_ioctl_desc amdpk_accel_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(AMDPK_GET_INFO, amdpk_accel_get_info, 0),
+ DRM_IOCTL_DEF_DRV(AMDPK_SET_CONF, amdpk_accel_set_conf, 0),
+};
+
+static const struct file_operations amdpk_accel_fops = {
+ .owner = THIS_MODULE,
+ .open = accel_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .compat_ioctl = drm_compat_ioctl,
+ .llseek = noop_llseek,
+ .mmap = amdpk_accel_mmap,
+};
+
+static const struct drm_driver amdpk_accel_driver = {
+ .driver_features = DRIVER_COMPUTE_ACCEL,
+
+ .name = "amdpk_accel_driver",
+ .desc = "AMD PKI Accelerator for versal-net",
+
+ .fops = &amdpk_accel_fops,
+ .open = amdpk_open,
+ .postclose = amdpk_postclose,
+
+ .ioctls = amdpk_accel_ioctls,
+ .num_ioctls = ARRAY_SIZE(amdpk_accel_ioctls),
+};
+
+static int amdpk_create_device(struct amdpk_dev *pkdev, struct device *dev, int irq)
+{
+ u64 qdepth, ver;
+ long magic;
+ int ret;
+
+ magic = pk_rdreg(pkdev->regs, REG_MAGIC);
+ if (magic != AMDPK_MAGIC) {
+ dev_err(dev, "Invalid magic constant %08lx !\n", magic);
+ return -ENODEV;
+ }
+ ver = pk_rdreg(pkdev->regs, REG_SEMVER);
+ if (AMDPK_SEMVER_MAJOR(ver) != 1 || AMDPK_SEMVER_MINOR(ver) < 1) {
+ dev_err(dev, "Hardware version (%d.%d) not supported.\n",
+ (int)AMDPK_SEMVER_MAJOR(ver), (int)AMDPK_SEMVER_MINOR(ver));
+ return -ENODEV;
+ }
+
+ /* Reset all accelerators and the hw scheduler */
+ pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x1);
+ pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x0);
+
+ pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT);
+ qdepth = pk_rdreg(pkdev->regs, REG_CFG_MAX_PENDING_REQ);
+ atomic_set(&pkdev->avail_qdepth, qdepth);
+ pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT);
+
+ pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0);
+ pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, ~0);
+ pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, (1 << pkdev->max_queues) - 1);
+
+ ret = devm_request_irq(dev, irq, amdpk_cq_irq, 0, "amdpk", pkdev);
+ if (ret)
+ return ret;
+
+ ida_init(&pkdev->avail_queues);
+
+ return 0;
+}
+
+static void amdpk_remove_device(struct amdpk_dev *pkdev)
+{
+ drm_dev_unplug(&pkdev->ddev);
+ pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0);
+ ida_destroy(&pkdev->avail_queues);
+}
+
+static int amdpk_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct amdpk_dev *pkdev;
+ struct resource *memres;
+ int irq, ret;
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (ret < 0)
+ return ret;
+
+ pkdev = devm_drm_dev_alloc(dev, &amdpk_accel_driver, typeof(*pkdev), ddev);
+ if (IS_ERR(pkdev))
+ return PTR_ERR(pkdev);
+ pkdev->dev = dev;
+
+ memres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ pkdev->regs = devm_ioremap_resource(dev, memres);
+ if (IS_ERR(pkdev->regs))
+ return PTR_ERR(pkdev->regs);
+ pkdev->regsphys = memres->start;
+ platform_set_drvdata(pdev, pkdev);
+
+ if (platform_irq_count(pdev) != 1)
+ return -ENODEV;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -ENODEV;
+
+ ret = drm_dev_register(&pkdev->ddev, 0);
+ if (ret) {
+ dev_err(&pdev->dev, "DRM register failed, ret %d", ret);
+ return ret;
+ }
+
+ return amdpk_create_device(pkdev, dev, irq);
+}
+
+static void amdpk_remove(struct platform_device *pdev)
+{
+ struct amdpk_dev *pkdev = platform_get_drvdata(pdev);
+
+ amdpk_remove_device(pkdev);
+}
+
+static void amdpk_shutdown(struct platform_device *pdev)
+{
+ amdpk_remove(pdev);
+}
+
+static const struct of_device_id amdpk_match_table[] = {
+ { .compatible = "amd,versal-net-pki" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, amdpk_match_table);
+
+static struct platform_driver amdpk_pdrv = {
+ .probe = amdpk_probe,
+ .remove = amdpk_remove,
+ .shutdown = amdpk_shutdown,
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = amdpk_match_table,
+ },
+};
+
+static int __init amdpk_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&amdpk_pdrv);
+ if (ret) {
+ pr_err("can't register platform driver\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit amdpk_exit(void)
+{
+ platform_driver_unregister(&amdpk_pdrv);
+}
+
+module_init(amdpk_init);
+module_exit(amdpk_exit);
+
+MODULE_AUTHOR("AMD");
+MODULE_DESCRIPTION("AMD PKI accelerator for versal-net");
+MODULE_LICENSE("GPL");
diff --git a/drivers/accel/amdpk/amdpk_drv.h b/drivers/accel/amdpk/amdpk_drv.h
new file mode 100644
index 000000000000..c14c10db5d97
--- /dev/null
+++ b/drivers/accel/amdpk/amdpk_drv.h
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2021 Silex Insight sa
+ * Copyright (c) 2018-2021 Beerten Engineering scs
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __AMDPK_DRV_H__
+#define __AMDPK_DRV_H__
+
+#include <linux/types.h>
+#include <linux/kthread.h>
+#include <linux/io.h>
+#include <drm/drm_drv.h>
+#include <uapi/drm/amdpk.h>
+
+/* Magic number in the AMD PKI device, required to validate hardware access. */
+#define AMDPK_MAGIC 0x5113C50C
+
+/* Contains magic number 0x5113C5OC.
+ * Used to validate access to the hardware registers.
+ */
+#define REG_MAGIC (0x00)
+
+/* Contains version of the hardware interface as semver.
+ * The semantic version : major 8 bits, minor 8 bits in little endian order.
+ */
+#define REG_SEMVER (0x08)
+
+/* The number of request queues available in the hardware. */
+#define REG_CFG_REQ_QUEUES_CNT 0x10
+
+/* The maximum number of pending requests from all request queues combined. */
+#define REG_CFG_MAX_PENDING_REQ 0x18
+
+/* The maximum number of pending requests in a single request queue. */
+#define REG_CFG_MAX_REQ_QUEUE_ENTRIES 0x0020
+
+/* The first 16 bits give the amount of PK core instances with 64 multipliers.
+ * The next 16 bits give the amount of PK core instances with 256 multipliers.
+ */
+#define REG_CFG_PK_INST 0x28
+
+/* Writing 0x1 puts all pkcore accelerators and scheduler in reset.
+ * Writing 0x0 makes all pkcore accelerators and scheduler leave reset
+ * and become operational.
+ */
+#define REG_PK_GLOBAL_STATE 0x38
+
+/* The semantic version : major 8 bits, minor 8 bits,
+ * scm id 16 bits in little endian order.
+ */
+#define REG_HW_VERSION (0x40)
+
+/* Bitmask of which CQ interrupts are raised. */
+#define REG_PK_IRQ_STATUS 0x88
+
+/* Bitmask of which CQ may trigger interrupts. */
+#define REG_IRQ_ENABLE 0x90
+
+/* Bitmask of CQ interrupts to reset. */
+#define REG_PK_IRQ_RESET 0xA0
+
+/* Bus address of the page p for the given request queue.
+ * The address must be aligned on the page size.
+ */
+#define REG_RQ_CFG_PAGE(qid, pageidx) (0x00100 + (qid) * 0x80 + (pageidx) * 0x8)
+
+/* Size in bytes of the pages represented as a power of 2.
+ *
+ * Allowed values :
+ * ================ ==============
+ * register value size in bytes
+ * ================ ==============
+ * 7 128
+ * 8 256
+ * 9 512
+ * 10 1024
+ * 11 2048
+ * 12 4096
+ * 13 8192
+ * 14 16384
+ * 15 32768
+ * 16 65536
+ * ================ ==============
+ */
+#define REG_RQ_CFG_PAGE_SIZE(qid) (0x00120 + (qid) * 0x80)
+
+/* Index of the associated completion queue. */
+#define REG_RQ_CFG_CQID(qid) (0x00128 + (qid) * 0x80)
+
+/* Bit field of pages where descriptor can write to.
+ * When a bit is 1, a descriptor can write to the corresponding page.
+ */
+#define REG_RQ_CFG_PAGES_WREN(qid) (0x00138 + (qid) * 0x80)
+
+/* Maximum number of entries which can be written into this request queue. */
+#define REG_RQ_CFG_DEPTH(qid) (0x00140 + (qid) * 0x80)
+
+/* Bus address of the ring base of completion queue n.
+ * The address must be aligned on 64 bits.
+ */
+#define REG_CQ_CFG_ADDR(qid) (0x1100 + (qid) * 0x80)
+
+/* CQ notification trigger position. */
+#define REG_CTL_CQ_NTFY(qid) (0x2028 + (qid) * 0x1000)
+
+/* Size in bytes of the completion ring represented as a power of 2.
+ *
+ * Allowed sizes :
+ * ================ ============== ==============
+ * register value size in bytes max entries
+ * ================ ============== ==============
+ * 7 128 16
+ * 8 256 32
+ * 9 512 64
+ * 10 1024 128
+ * 11 2048 256
+ * 12 4096 512
+ * 13 8192 1024
+ * 14 16384 2048
+ * ================ ============== ==============
+ */
+#define REG_CQ_CFG_SIZE(qid) (0x1108 + (qid) * 0x80)
+
+/* Interrupt number for this completion queue. */
+#define REG_CQ_CFG_IRQ_NR(qid) (0x1110 + (qid) * 0x80)
+
+/* Control registers base address for the given request completion queue pair. */
+#define REG_CTL_BASE(qid) (0x2000 + (qid) * 0x1000)
+
+/* Count of how many requests are queued at a given time for this RQCQ.
+ * When this count reaches 0, the resources of the request and
+ * completion queues can be deleted.
+ */
+#define REG_CTL_PENDING_REQS 0x18
+
+/* Busy cycle count register address. */
+#define REG_PK_BUSY_CYCLES 0x2108
+/* Busy cycle count register address.*/
+#define REG_PK_IDLE_CYCLES 0x2110
+
+/* Hardware interface versions. */
+#define AMDPK_SEMVER_MAJOR(v) (((v) >> 24) & 0xff)
+#define AMDPK_SEMVER_MINOR(v) (((v) >> 16) & 0xff)
+#define AMDPK_SEMVER_PATCH(v) ((v) & 0xffff)
+
+/* Hardware implementation versions. */
+#define AMDPK_HWVER_MAJOR(v) (((v) >> 24) & 0xff)
+#define AMDPK_HWVER_MINOR(v) (((v) >> 16) & 0xff)
+#define AMDPK_HWVER_SVN(v) ((v) & 0xffff)
+
+/* Maximum number of queues supported by the driver. */
+#define MAX_QUEUES 4
+
+/* Number of RQ memory addresses for each queue. */
+#define MAX_RQMEM_PER_QUEUE 4
+
+/* Wait attempts for HW to flush all requests before close. */
+#define MAX_FLUSH_WAIT_ATTEMPTS 500
+
+/* Bit 0 (0x1) is the Generation bit. */
+#define CQ_GENERATION_BIT BIT(0)
+
+/* Bit 1 (0x2) is set when completion is valid. */
+#define CQ_COMPLETION_BIT BIT(1)
+
+/* Maximal value of rq_entries is 512. There is 1 CQ of 4K bytes.
+ * Each completion status is 8 Bytes. Only 4096 / 8 = 512 entries
+ * are possible at any time.
+ */
+#define MAX_CQ_ENTRIES_ON_PAGE (PAGE_SIZE / 8)
+
+/* Forward declaration */
+struct amdpk_dev;
+struct amdpk_user;
+
+/* structure to hold completion queue information */
+struct amdpk_cq {
+ /* PKI device */
+ struct amdpk_dev *pkdev;
+ /* Base address of the completion queue */
+ u32 *base;
+ /* tail representing last completion */
+ unsigned int tail;
+ /* generation bit which toggles as per the device */
+ unsigned int generation;
+ /* size code as configured in REG_RQ_CFG_PAGE_SIZE */
+ u16 szcode;
+};
+
+/* represents PKI work context */
+struct amdpk_work {
+ /* PKI device */
+ struct amdpk_dev *pkdev;
+ /* PKI user */
+ struct amdpk_user *user;
+ /* Completion queue */
+ struct amdpk_cq pk_cq;
+ /* Kthread work associated with the PKI work */
+ struct kthread_work cq_work;
+ /* Kthred worker to handle completions */
+ struct kthread_worker *cq_wq;
+ /* Associated queue ID */
+ u16 qid;
+};
+
+/* AMD PKI device */
+struct amdpk_dev {
+ /* DRM device associated with PKI device */
+ struct drm_device ddev;
+ /* Core device */
+ struct device *dev;
+ /* PKI register space address */
+ char __iomem *regs;
+ /* PKI register space physical address */
+ resource_size_t regsphys;
+ /* Maximum queues supported by device. */
+ u16 max_queues;
+ /* Available queues */
+ struct ida avail_queues;
+ /* Total available queues */
+ atomic_t avail_qdepth;
+ /* List of all the AMD users */
+ struct amdpk_user *users[MAX_QUEUES];
+ /* PKI work for each queue */
+ struct amdpk_work *work[MAX_QUEUES];
+};
+
+/* AMD PKI user */
+struct amdpk_user {
+ /* PKI device */
+ struct amdpk_dev *pkdev;
+ /* Indicates if user has been configured */
+ bool configured;
+ /* Queue ID allocated for the user */
+ u16 qid;
+ /* Number of pages allocated on request queue */
+ u16 rq_pages;
+ /* RQ entries reserved for this user */
+ size_t rq_entries;
+ /* DMA address for RQ pages */
+ dma_addr_t physrq[MAX_RQMEM_PER_QUEUE];
+ /* RQ pages addresses */
+ u8 *rqmem[MAX_RQMEM_PER_QUEUE];
+ /* DMA address for CQ page */
+ dma_addr_t physcq;
+ /* CQ page address */
+ u8 *cqmem;
+ /* DMA address for status page */
+ dma_addr_t physst;
+ /* Status page address */
+ u8 *stmem;
+ /* Eventfd context for each request */
+ struct eventfd_ctx *evfd_ctx[MAX_PK_REQS];
+};
+
+#define to_amdpk_dev(dev) container_of(dev, struct amdpk_dev, ddev)
+#define to_amdpk_work(work) container_of(work, struct amdpk_work, cq_work)
+
+static void __maybe_unused pk_wrreg(char __iomem *regs, int addr, u64 val)
+{
+ iowrite64(val, regs + addr);
+}
+
+static u64 pk_rdreg(char __iomem *regs, int addr)
+{
+ return ioread64(regs + addr);
+}
+
+#endif /* __AMDPK_DRV_H__ */
diff --git a/include/uapi/drm/amdpk.h b/include/uapi/drm/amdpk.h
new file mode 100644
index 000000000000..e5e18fdbc2c4
--- /dev/null
+++ b/include/uapi/drm/amdpk.h
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __AMDPK_H__
+#define __AMDPK_H__
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define MAX_PK_REQS 256
+
+struct amdpk_info {
+ /** maximum available queue depth */
+ unsigned int avail_qdepth;
+};
+
+struct amdpk_conf {
+ /** queue depth to configure */
+ unsigned int qdepth;
+ /** eventfd's associated with the descriptors */
+ int eventfd[MAX_PK_REQS];
+};
+
+/* IOCTL */
+#define DRM_AMDPK_GET_INFO 0x0
+#define DRM_AMDPK_SET_CONF 0x1
+
+#define DRM_IOCTL_AMDPK_GET_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_GET_INFO, \
+ struct amdpk_info)
+#define DRM_IOCTL_AMDPK_SET_CONF DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_SET_CONF, \
+ struct amdpk_conf)
+
+/* MMAP */
+#define AMDPK_MMAP_REGS 0
+#define AMDPK_MMAP_MEM 1
+
+/* Completion Status */
+#define CQ_STATUS_INVALID 0x0
+#define CQ_STATUS_VALID 0x80000000
+#define CQ_COMPLETION_ERROR 0x40000000
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __AMDPK_H__ */
--
2.34.1
Powered by blists - more mailing lists