lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZMlMuEdVmQGRVlCV@matsya>
Date:   Tue, 1 Aug 2023 23:49:36 +0530
From:   Vinod Koul <vkoul@...nel.org>
To:     Lizhi Hou <lizhi.hou@....com>
Cc:     dmaengine@...r.kernel.org, linux-kernel@...r.kernel.org,
        Nishad Saraf <nishads@....com>, nishad.saraf@....com,
        sonal.santan@....com, max.zhen@....com
Subject: Re: [PATCH V3 1/1] dmaengine: amd: qdma: Add AMD QDMA driver

On 31-07-23, 10:12, Lizhi Hou wrote:
> From: Nishad Saraf <nishads@....com>
> 
> Adds driver to enable PCIe board which uses AMD QDMA (the Queue-based
> Direct Memory Access) subsystem. For example, Xilinx Alveo V70 AI
> Accelerator devices.
>     https://www.xilinx.com/applications/data-center/v70.html
> 
> The primary mechanism to transfer data using the QDMA is for the QDMA
> engine to operate on instructions (descriptors) provided by the host
> operating system. Using the descriptors, the QDMA can move data in both
> the Host to Card (H2C) direction, or the Card to Host (C2H) direction.
> The QDMA provides a per-queue basis option whether DMA traffic goes
> to an AXI4 memory map (MM) interface or to an AXI4-Stream interface.
> 
> The hardware detail is provided by
>     https://docs.xilinx.com/r/en-US/pg302-qdma
> 
> Implements dmaengine APIs to support MM DMA transfers.
> - probe the available DMA channels
> - use dma_slave_map for channel lookup
> - use virtual channel to manage dmaengine tx descriptors
> - implement device_prep_slave_sg callback to handle host scatter gather
>   list

So you claim to support only mem-to-mem transfers, how are you
supporting a slave api, that is for peripheral devices...

> - implement descriptor metadata operations to set device address for DMA
>   transfer
> 
> Signed-off-by: Nishad Saraf <nishads@....com>
> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
> ---
>  MAINTAINERS                            |    9 +
>  drivers/dma/Kconfig                    |   13 +
>  drivers/dma/Makefile                   |    1 +
>  drivers/dma/amd/Makefile               |    8 +
>  drivers/dma/amd/qdma-comm-regs.c       |   66 ++
>  drivers/dma/amd/qdma.c                 | 1189 ++++++++++++++++++++++++
>  drivers/dma/amd/qdma.h                 |  269 ++++++
>  include/linux/platform_data/amd_qdma.h |   36 +
>  8 files changed, 1591 insertions(+)
>  create mode 100644 drivers/dma/amd/Makefile
>  create mode 100644 drivers/dma/amd/qdma-comm-regs.c
>  create mode 100644 drivers/dma/amd/qdma.c
>  create mode 100644 drivers/dma/amd/qdma.h
>  create mode 100644 include/linux/platform_data/amd_qdma.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 53b7ca804465..a15c03738188 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1037,6 +1037,15 @@ L:	dmaengine@...r.kernel.org
>  S:	Maintained
>  F:	drivers/dma/ptdma/
>  
> +AMD QDMA DRIVER
> +M:	Nishad Saraf <nishads@....com>
> +M:	Lizhi Hou <lizhi.hou@....com>
> +L:	dmaengine@...r.kernel.org
> +S:	Supported
> +F:	drivers/dma/amd/qdma.c
> +F:	drivers/dma/amd/qdma.h
> +F:	include/linux/platform_data/amd_qdma.h
> +
>  AMD SEATTLE DEVICE TREE SUPPORT
>  M:	Suravee Suthikulpanit <suravee.suthikulpanit@....com>
>  M:	Tom Lendacky <thomas.lendacky@....com>
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index 644c188d6a11..3646fe1ab347 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -85,6 +85,19 @@ config AMCC_PPC440SPE_ADMA
>  	help
>  	  Enable support for the AMCC PPC440SPe RAID engines.
>  
> +config AMD_QDMA
> +	tristate "AMD Queue-based DMA"
> +	depends on HAS_IOMEM
> +	select DMA_ENGINE
> +	select DMA_VIRTUAL_CHANNELS
> +	select REGMAP_MMIO
> +	help
> +	  Enable support for the AMD Queue-based DMA subsystem. The primary
> +	  mechanism to transfer data using the QDMA is for the QDMA engine to
> +	  operate on instructions (descriptors) provided by the host operating
> +	  system. Using the descriptors, the QDMA can move data in both the Host
> +	  to Card (H2C) direction, or the Card to Host (C2H) direction.
> +
>  config APPLE_ADMAC
>  	tristate "Apple ADMAC support"
>  	depends on ARCH_APPLE || COMPILE_TEST
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index a4fd1ce29510..75988d364ef4 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -82,6 +82,7 @@ obj-$(CONFIG_ST_FDMA) += st_fdma.o
>  obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
>  obj-$(CONFIG_INTEL_LDMA) += lgm/
>  
> +obj-y += amd/
>  obj-y += mediatek/
>  obj-y += qcom/
>  obj-y += ti/
> diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
> new file mode 100644
> index 000000000000..ba53971d2714
> --- /dev/null
> +++ b/drivers/dma/amd/Makefile
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Copyright (C) 2023, Advanced Micro Devices, Inc.
> +#
> +
> +obj-$(CONFIG_AMD_QDMA)			+= amd-qdma.o
> +
> +amd-qdma-$(CONFIG_AMD_QDMA)		:= qdma.o qdma-comm-regs.o
> diff --git a/drivers/dma/amd/qdma-comm-regs.c b/drivers/dma/amd/qdma-comm-regs.c
> new file mode 100644
> index 000000000000..59d66b2fec54
> --- /dev/null
> +++ b/drivers/dma/amd/qdma-comm-regs.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * DMA header for AMD CPM5 Queue-based DMA Subsystem variant
> + *
> + * Copyright (C) 2023, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef __QDMA_REGS_DEF_H
> +#define __QDMA_REGS_DEF_H
> +
> +#include "qdma.h"
> +
> +const struct qdma_reg qdma_regos_default[QDMA_REGO_MAX] = {
> +	[QDMA_REGO_CTXT_DATA] = QDMA_REGO(0x804, 8),
> +	[QDMA_REGO_CTXT_CMD] = QDMA_REGO(0x844, 1),
> +	[QDMA_REGO_CTXT_MASK] = QDMA_REGO(0x824, 8),
> +	[QDMA_REGO_MM_H2C_CTRL] = QDMA_REGO(0x1004, 1),
> +	[QDMA_REGO_MM_C2H_CTRL] = QDMA_REGO(0x1204, 1),
> +	[QDMA_REGO_QUEUE_COUNT] = QDMA_REGO(0x120, 1),
> +	[QDMA_REGO_RING_SIZE] = QDMA_REGO(0x204, 1),
> +	[QDMA_REGO_H2C_PIDX] = QDMA_REGO(0x18004, 1),
> +	[QDMA_REGO_C2H_PIDX] = QDMA_REGO(0x18008, 1),
> +	[QDMA_REGO_INTR_CIDX] = QDMA_REGO(0x18000, 1),
> +	[QDMA_REGO_FUNC_ID] = QDMA_REGO(0x12c, 1),
> +	[QDMA_REGO_ERR_INT] = QDMA_REGO(0xb04, 1),
> +	[QDMA_REGO_ERR_STAT] = QDMA_REGO(0x248, 1),
> +};
> +
> +const struct qdma_reg_field qdma_regfs_default[QDMA_REGF_MAX] = {
> +	/* QDMA_REGO_CTXT_DATA fields */
> +	[QDMA_REGF_IRQ_ENABLE] = QDMA_REGF(53, 53),
> +	[QDMA_REGF_WBK_ENABLE] = QDMA_REGF(52, 52),
> +	[QDMA_REGF_WBI_CHECK] = QDMA_REGF(34, 34),
> +	[QDMA_REGF_IRQ_ARM] = QDMA_REGF(16, 16),
> +	[QDMA_REGF_IRQ_VEC] = QDMA_REGF(138, 128),
> +	[QDMA_REGF_IRQ_AGG] = QDMA_REGF(139, 139),
> +	[QDMA_REGF_WBI_INTVL_ENABLE] = QDMA_REGF(35, 35),
> +	[QDMA_REGF_MRKR_DISABLE] = QDMA_REGF(62, 62),
> +	[QDMA_REGF_QUEUE_ENABLE] = QDMA_REGF(32, 32),
> +	[QDMA_REGF_QUEUE_MODE] = QDMA_REGF(63, 63),
> +	[QDMA_REGF_DESC_BASE] = QDMA_REGF(127, 64),
> +	[QDMA_REGF_DESC_SIZE] = QDMA_REGF(49, 48),
> +	[QDMA_REGF_RING_ID] = QDMA_REGF(47, 44),
> +	[QDMA_REGF_QUEUE_BASE] = QDMA_REGF(11, 0),
> +	[QDMA_REGF_QUEUE_MAX] = QDMA_REGF(44, 32),
> +	[QDMA_REGF_FUNCTION_ID] = QDMA_REGF(24, 17),
> +	[QDMA_REGF_INTR_AGG_BASE] = QDMA_REGF(66, 15),
> +	[QDMA_REGF_INTR_VECTOR] = QDMA_REGF(11, 1),
> +	[QDMA_REGF_INTR_SIZE] = QDMA_REGF(69, 67),
> +	[QDMA_REGF_INTR_VALID] = QDMA_REGF(0, 0),
> +	[QDMA_REGF_INTR_COLOR] = QDMA_REGF(14, 14),
> +	[QDMA_REGF_INTR_FUNCTION_ID] = QDMA_REGF(125, 114),
> +	/* QDMA_REGO_CTXT_CMD fields */
> +	[QDMA_REGF_CMD_INDX] = QDMA_REGF(19, 7),
> +	[QDMA_REGF_CMD_CMD] = QDMA_REGF(6, 5),
> +	[QDMA_REGF_CMD_TYPE] = QDMA_REGF(4, 1),
> +	[QDMA_REGF_CMD_BUSY] = QDMA_REGF(0, 0),
> +	/* QDMA_REGO_QUEUE_COUNT fields */
> +	[QDMA_REGF_QUEUE_COUNT] = QDMA_REGF(11, 0),
> +	/* QDMA_REGO_ERR_INT fields */
> +	[QDMA_REGF_ERR_INT_FUNC] = QDMA_REGF(11, 0),
> +	[QDMA_REGF_ERR_INT_VEC] = QDMA_REGF(22, 12),
> +	[QDMA_REGF_ERR_INT_ARM] = QDMA_REGF(24, 24),
> +};
> +
> +#endif	/* __QDMA_CPM5_H */
> diff --git a/drivers/dma/amd/qdma.c b/drivers/dma/amd/qdma.c
> new file mode 100644
> index 000000000000..b65214341551
> --- /dev/null
> +++ b/drivers/dma/amd/qdma.c
> @@ -0,0 +1,1189 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * DMA driver for AMD Queue-based DMA Subsystem
> + *
> + * Copyright (C) 2023, Advanced Micro Devices, Inc.
> + */
> +#include <linux/bitfield.h>
> +#include <linux/bitops.h>
> +#include <linux/dmaengine.h>
> +#include <linux/module.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/dma-map-ops.h>
> +#include <linux/platform_device.h>
> +#include <linux/platform_data/amd_qdma.h>
> +#include <linux/regmap.h>
> +
> +#include "../virt-dma.h"
> +#include "qdma.h"
> +
> +#define CHAN_STR(q)		(((q)->dir == DMA_MEM_TO_DEV) ? "H2C" : "C2H")
> +#define QDMA_REG_OFF(d, r)	((d)->roffs[r].off)
> +
> +/* MMIO regmap config for all QDMA registers */
> +static const struct regmap_config qdma_regmap_config = {
> +	.reg_bits = 32,
> +	.val_bits = 32,
> +	.reg_stride = 4,
> +};
> +
> +static inline struct qdma_queue *to_qdma_queue(struct dma_chan *chan)
> +{
> +	return container_of(chan, struct qdma_queue, vchan.chan);
> +}
> +
> +static inline struct qdma_mm_vdesc *to_qdma_vdesc(struct virt_dma_desc *vdesc)
> +{
> +	return container_of(vdesc, struct qdma_mm_vdesc, vdesc);
> +}
> +
> +static inline u32 qdma_get_intr_ring_idx(struct qdma_device *qdev)
> +{
> +	u32 idx;
> +
> +	idx = qdev->qintr_rings[qdev->qintr_ring_idx++].ridx;
> +	qdev->qintr_ring_idx %= qdev->qintr_ring_num;
> +
> +	return idx;
> +}
> +
> +static u64 qdma_get_field(const struct qdma_device *qdev, const u32 *data,
> +			  enum qdma_reg_fields field)
> +{
> +	const struct qdma_reg_field *f = &qdev->rfields[field];
> +	u16 low_pos, hi_pos, low_bit, hi_bit;
> +	u64 value = 0, mask;
> +
> +	low_pos = f->lsb / BITS_PER_TYPE(*data);
> +	hi_pos = f->msb / BITS_PER_TYPE(*data);
> +
> +	if (low_pos == hi_pos) {
> +		low_bit = f->lsb % BITS_PER_TYPE(*data);
> +		hi_bit = f->msb % BITS_PER_TYPE(*data);
> +		mask = GENMASK(hi_bit, low_bit);
> +		value = (data[low_pos] & mask) >> low_bit;
> +	} else if (hi_pos == low_pos + 1) {
> +		low_bit = f->lsb % BITS_PER_TYPE(*data);
> +		hi_bit = low_bit + (f->msb - f->lsb);
> +		value = ((u64)data[hi_pos] << BITS_PER_TYPE(*data)) |
> +			data[low_pos];
> +		mask = GENMASK_ULL(hi_bit, low_bit);
> +		value = (value & mask) >> low_bit;
> +	} else {
> +		hi_bit = f->msb % BITS_PER_TYPE(*data);
> +		mask = GENMASK(hi_bit, 0);
> +		value = data[hi_pos] & mask;
> +		low_bit = f->msb - f->lsb - hi_bit;
> +		value <<= low_bit;
> +		low_bit -= 32;
> +		value |= (u64)data[hi_pos - 1] << low_bit;
> +		mask = GENMASK(31, 32 - low_bit);
> +		value |= (data[hi_pos - 2] & mask) >> low_bit;
> +	}
> +
> +	return value;
> +}
> +
> +static void qdma_set_field(const struct qdma_device *qdev, u32 *data,
> +			   enum qdma_reg_fields field, u64 value)
> +{
> +	const struct qdma_reg_field *f = &qdev->rfields[field];
> +	u16 low_pos, hi_pos, low_bit;
> +
> +	low_pos = f->lsb / BITS_PER_TYPE(*data);
> +	hi_pos = f->msb / BITS_PER_TYPE(*data);
> +	low_bit = f->lsb % BITS_PER_TYPE(*data);
> +
> +	data[low_pos++] |= value << low_bit;
> +	if (low_pos <= hi_pos)
> +		data[low_pos++] |= (u32)(value >> (32 - low_bit));
> +	if (low_pos <= hi_pos)
> +		data[low_pos] |= (u32)(value >> (64 - low_bit));
> +}
> +
> +static inline int qdma_reg_write(const struct qdma_device *qdev,
> +				 const u32 *data, enum qdma_regs reg)
> +{
> +	const struct qdma_reg *r = &qdev->roffs[reg];
> +	int ret;
> +
> +	if (r->count > 1)
> +		ret = regmap_bulk_write(qdev->regmap, r->off, data, r->count);
> +	else
> +		ret = regmap_write(qdev->regmap, r->off, *data);
> +
> +	return ret;
> +}
> +
> +static inline int qdma_reg_read(const struct qdma_device *qdev, u32 *data,
> +				enum qdma_regs reg)
> +{
> +	const struct qdma_reg *r = &qdev->roffs[reg];
> +	int ret;
> +
> +	if (r->count > 1)
> +		ret = regmap_bulk_read(qdev->regmap, r->off, data, r->count);
> +	else
> +		ret = regmap_read(qdev->regmap, r->off, data);
> +
> +	return ret;
> +}
> +
> +static int qdma_context_cmd_execute(const struct qdma_device *qdev,
> +				    enum qdma_ctxt_type type,
> +				    enum qdma_ctxt_cmd cmd, u16 index)
> +{
> +	u32 value = 0;
> +	int ret;
> +
> +	qdma_set_field(qdev, &value, QDMA_REGF_CMD_INDX, index);
> +	qdma_set_field(qdev, &value, QDMA_REGF_CMD_CMD, cmd);
> +	qdma_set_field(qdev, &value, QDMA_REGF_CMD_TYPE, type);
> +
> +	ret = qdma_reg_write(qdev, &value, QDMA_REGO_CTXT_CMD);
> +	if (ret)
> +		return ret;
> +
> +	ret = regmap_read_poll_timeout(qdev->regmap,
> +				       QDMA_REG_OFF(qdev, QDMA_REGO_CTXT_CMD),
> +				       value,
> +				       !qdma_get_field(qdev, &value,
> +						       QDMA_REGF_CMD_BUSY),
> +				       QDMA_POLL_INTRVL_US,
> +				       QDMA_POLL_TIMEOUT_US);
> +	if (ret) {
> +		qdma_err(qdev, "Context command execution timed out");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int qdma_context_write_data(const struct qdma_device *qdev,
> +				   const u32 *data)
> +{
> +	u32 mask[QDMA_CTXT_REGMAP_LEN];
> +	int ret;
> +
> +	memset(mask, ~0, sizeof(mask));
> +
> +	ret = qdma_reg_write(qdev, mask, QDMA_REGO_CTXT_MASK);
> +	if (ret)
> +		return ret;
> +
> +	ret = qdma_reg_write(qdev, data, QDMA_REGO_CTXT_DATA);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static void qdma_prep_sw_desc_context(const struct qdma_device *qdev,
> +				      const struct qdma_ctxt_sw_desc *ctxt,
> +				      u32 *data)
> +{
> +	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
> +	qdma_set_field(qdev, data, QDMA_REGF_DESC_BASE, ctxt->desc_base);
> +	qdma_set_field(qdev, data, QDMA_REGF_IRQ_VEC, ctxt->vec);
> +	qdma_set_field(qdev, data, QDMA_REGF_FUNCTION_ID, qdev->fid);
> +
> +	qdma_set_field(qdev, data, QDMA_REGF_DESC_SIZE, QDMA_DESC_SIZE_32B);
> +	qdma_set_field(qdev, data, QDMA_REGF_RING_ID, QDMA_DEFAULT_RING_ID);
> +	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MODE, QDMA_QUEUE_OP_MM);
> +	qdma_set_field(qdev, data, QDMA_REGF_IRQ_ENABLE, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_WBK_ENABLE, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_WBI_CHECK, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_IRQ_ARM, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_IRQ_AGG, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_WBI_INTVL_ENABLE, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_ENABLE, 1);
> +	qdma_set_field(qdev, data, QDMA_REGF_MRKR_DISABLE, 1);
> +}
> +
> +static void qdma_prep_intr_context(const struct qdma_device *qdev,
> +				   const struct qdma_ctxt_intr *ctxt,
> +				   u32 *data)
> +{
> +	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_AGG_BASE, ctxt->agg_base);
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_VECTOR, ctxt->vec);
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_SIZE, ctxt->size);
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_VALID, ctxt->valid);
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_COLOR, ctxt->color);
> +	qdma_set_field(qdev, data, QDMA_REGF_INTR_FUNCTION_ID, qdev->fid);
> +}
> +
> +static void qdma_prep_fmap_context(const struct qdma_device *qdev,
> +				   const struct qdma_ctxt_fmap *ctxt,
> +				   u32 *data)
> +{
> +	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
> +	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_BASE, ctxt->qbase);
> +	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MAX, ctxt->qmax);
> +}
> +
> +/*
> + * Program the indirect context register space
> + *
> + * Once the queue is enabled, context is dynamically updated by hardware. Any
> + * modification of the context through this API when the queue is enabled can
> + * result in unexpected behavior. Reading the context when the queue is enabled
> + * is not recommended as it can result in reduced performance.
> + */
> +static int qdma_prog_context(struct qdma_device *qdev, enum qdma_ctxt_type type,
> +			     enum qdma_ctxt_cmd cmd, u16 index, u32 *ctxt)
> +{
> +	int ret;
> +
> +	mutex_lock(&qdev->ctxt_lock);
> +	if (cmd == QDMA_CTXT_WRITE) {
> +		ret = qdma_context_write_data(qdev, ctxt);
> +		if (ret)
> +			goto failed;
> +	}
> +
> +	ret = qdma_context_cmd_execute(qdev, type, cmd, index);
> +	if (ret)
> +		goto failed;
> +
> +	if (cmd == QDMA_CTXT_READ) {
> +		ret = qdma_reg_read(qdev, ctxt, QDMA_REGO_CTXT_DATA);
> +		if (ret)
> +			goto failed;
> +	}
> +
> +failed:
> +	mutex_unlock(&qdev->ctxt_lock);
> +
> +	return ret;
> +}
> +
> +static int qdma_check_queue_status(struct qdma_device *qdev,
> +				   enum dma_transfer_direction dir, u16 qid)
> +{
> +	u32 status, data[QDMA_CTXT_REGMAP_LEN] = {0};
> +	enum qdma_ctxt_type type;
> +	int ret;
> +
> +	if (dir == DMA_MEM_TO_DEV)
> +		type = QDMA_CTXT_DESC_SW_H2C;
> +	else
> +		type = QDMA_CTXT_DESC_SW_C2H;
> +
> +	ret = qdma_prog_context(qdev, type, QDMA_CTXT_READ, qid, data);
> +	if (ret)
> +		return ret;
> +
> +	status = qdma_get_field(qdev, data, QDMA_REGF_QUEUE_ENABLE);
> +	if (status) {
> +		qdma_err(qdev, "queue %d already in use", qid);
> +		return -EBUSY;
> +	}
> +
> +	return 0;
> +}
> +
> +static int qdma_clear_queue_context(const struct qdma_queue *queue)
> +{
> +	enum qdma_ctxt_type h2c_types[] = { QDMA_CTXT_DESC_SW_H2C,
> +					    QDMA_CTXT_DESC_HW_H2C,
> +					    QDMA_CTXT_DESC_CR_H2C,
> +					    QDMA_CTXT_PFTCH, };
> +	enum qdma_ctxt_type c2h_types[] = { QDMA_CTXT_DESC_SW_C2H,
> +					    QDMA_CTXT_DESC_HW_C2H,
> +					    QDMA_CTXT_DESC_CR_C2H,
> +					    QDMA_CTXT_PFTCH, };
> +	struct qdma_device *qdev = queue->qdev;
> +	enum qdma_ctxt_type *type;
> +	int ret, num, i;
> +
> +	if (queue->dir == DMA_MEM_TO_DEV) {
> +		type = h2c_types;
> +		num = ARRAY_SIZE(h2c_types);
> +	} else {
> +		type = c2h_types;
> +		num = ARRAY_SIZE(c2h_types);
> +	}
> +	for (i = 0; i < num; i++) {
> +		ret = qdma_prog_context(qdev, type[i], QDMA_CTXT_CLEAR,
> +					queue->qid, NULL);
> +		if (ret) {
> +			qdma_err(qdev, "Failed to clear ctxt %d", type[i]);
> +			return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int qdma_setup_fmap_context(struct qdma_device *qdev)
> +{
> +	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
> +	struct qdma_ctxt_fmap fmap;
> +	int ret;
> +
> +	ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_CLEAR,
> +				qdev->fid, NULL);
> +	if (ret) {
> +		qdma_err(qdev, "Failed clearing context");
> +		return ret;
> +	}
> +
> +	fmap.qbase = 0;
> +	fmap.qmax = qdev->chan_num * 2;
> +	qdma_prep_fmap_context(qdev, &fmap, ctxt);
> +	ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_WRITE,
> +				qdev->fid, ctxt);
> +	if (ret)
> +		qdma_err(qdev, "Failed setup fmap, ret %d", ret);
> +
> +	return ret;
> +}
> +
> +static int qdma_setup_queue_context(struct qdma_device *qdev,
> +				    const struct qdma_ctxt_sw_desc *sw_desc,
> +				    enum dma_transfer_direction dir, u16 qid)
> +{
> +	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
> +	enum qdma_ctxt_type type;
> +	int ret;
> +
> +	if (dir == DMA_MEM_TO_DEV)
> +		type = QDMA_CTXT_DESC_SW_H2C;
> +	else
> +		type = QDMA_CTXT_DESC_SW_C2H;
> +
> +	qdma_prep_sw_desc_context(qdev, sw_desc, ctxt);
> +	/* Setup SW descriptor context */
> +	ret = qdma_prog_context(qdev, type, QDMA_CTXT_WRITE, qid, ctxt);
> +	if (ret)
> +		qdma_err(qdev, "Failed setup SW desc ctxt for queue: %d", qid);
> +
> +	return ret;
> +}
> +
> +/*
> + * Enable or disable memory-mapped DMA engines
> + * 1: enable, 0: disable
> + */
> +static int qdma_sgdma_control(struct qdma_device *qdev, u32 ctrl)
> +{
> +	int ret;
> +
> +	ret = qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_H2C_CTRL);
> +	ret |= qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_C2H_CTRL);
> +
> +	return ret;
> +}
> +
> +static int qdma_get_hw_info(struct qdma_device *qdev)
> +{
> +	struct qdma_platdata *pdata = dev_get_platdata(&qdev->pdev->dev);
> +	u32 value = 0;
> +	int ret;
> +
> +	ret = qdma_reg_read(qdev, &value, QDMA_REGO_QUEUE_COUNT);
> +	if (ret)
> +		return ret;
> +
> +	value = qdma_get_field(qdev, &value, QDMA_REGF_QUEUE_COUNT) + 1;
> +	if (pdata->max_mm_channels * 2 > value) {
> +		qdma_err(qdev, "not enough hw queues %d", value);
> +		return -EINVAL;
> +	}
> +	qdev->chan_num = pdata->max_mm_channels;
> +
> +	ret = qdma_reg_read(qdev, &qdev->fid, QDMA_REGO_FUNC_ID);
> +	if (ret)
> +		return ret;
> +
> +	qdma_info(qdev, "max channel %d, function id %d",
> +		  qdev->chan_num, qdev->fid);
> +
> +	return 0;
> +}
> +
> +static inline int qdma_update_pidx(const struct qdma_queue *queue, u16 pidx)
> +{
> +	struct qdma_device *qdev = queue->qdev;
> +
> +	return regmap_write(qdev->regmap, queue->pidx_reg,
> +			    pidx | QDMA_QUEUE_ARM_BIT);
> +}
> +
> +static inline int qdma_update_cidx(const struct qdma_queue *queue,
> +				   u16 ridx, u16 cidx)
> +{
> +	struct qdma_device *qdev = queue->qdev;
> +
> +	return regmap_write(qdev->regmap, queue->cidx_reg,
> +			    ((u32)ridx << 16) | cidx);
> +}
> +
> +/**
> + * qdma_free_vdesc - Free descriptor
> + * @vdesc: Virtual DMA descriptor
> + */
> +static void qdma_free_vdesc(struct virt_dma_desc *vdesc)
> +{
> +	struct qdma_mm_vdesc *vd = to_qdma_vdesc(vdesc);
> +
> +	kfree(vd);
> +}
> +
> +static int qdma_alloc_queues(struct qdma_device *qdev,
> +			     enum dma_transfer_direction dir)
> +{
> +	struct qdma_queue *q, **queues;
> +	u32 i, pidx_base;
> +	int ret;
> +
> +	if (dir == DMA_MEM_TO_DEV) {
> +		queues = &qdev->h2c_queues;
> +		pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_H2C_PIDX);
> +	} else {
> +		queues = &qdev->c2h_queues;
> +		pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_C2H_PIDX);
> +	}
> +
> +	*queues = devm_kcalloc(&qdev->pdev->dev, qdev->chan_num, sizeof(*q),
> +			       GFP_KERNEL);
> +	if (!*queues)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < qdev->chan_num; i++) {
> +		ret = qdma_check_queue_status(qdev, dir, i);
> +		if (ret)
> +			return ret;
> +
> +		q = &(*queues)[i];
> +		q->ring_size = QDMA_DEFAULT_RING_SIZE;
> +		q->idx_mask = q->ring_size - 2;
> +		q->qdev = qdev;
> +		q->dir = dir;
> +		q->qid = i;
> +		q->pidx_reg = pidx_base + i * QDMA_DMAP_REG_STRIDE;
> +		q->cidx_reg = QDMA_REG_OFF(qdev, QDMA_REGO_INTR_CIDX) +
> +				i * QDMA_DMAP_REG_STRIDE;
> +		q->vchan.desc_free = qdma_free_vdesc;
> +		vchan_init(&q->vchan, &qdev->dma_dev);
> +	}
> +
> +	return 0;
> +}
> +
> +static int qdma_device_verify(struct qdma_device *qdev)
> +{
> +	u32 value;
> +	int ret;
> +
> +	ret = regmap_read(qdev->regmap, QDMA_IDENTIFIER_REGOFF, &value);
> +	if (ret)
> +		return ret;
> +
> +	value = FIELD_GET(QDMA_IDENTIFIER_MASK, value);
> +	if (value != QDMA_IDENTIFIER) {
> +		qdma_err(qdev, "Invalid identifier");
> +		return -ENODEV;
> +	}
> +	qdev->rfields = qdma_regfs_default;
> +	qdev->roffs = qdma_regos_default;
> +
> +	return 0;
> +}
> +
> +static int qdma_device_setup(struct qdma_device *qdev)
> +{
> +	struct device *dev = &qdev->pdev->dev;
> +	u32 ring_sz = QDMA_DEFAULT_RING_SIZE;
> +	int ret = 0;
> +
> +	while (dev && get_dma_ops(dev))
> +		dev = dev->parent;
> +	if (!dev) {
> +		qdma_err(qdev, "dma device not found");
> +		return -EINVAL;
> +	}
> +	set_dma_ops(&qdev->pdev->dev, get_dma_ops(dev));
> +
> +	ret = qdma_setup_fmap_context(qdev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed setup fmap context");
> +		return ret;
> +	}
> +
> +	/* Setup global ring buffer size at QDMA_DEFAULT_RING_ID index */
> +	ret = qdma_reg_write(qdev, &ring_sz, QDMA_REGO_RING_SIZE);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to setup ring %d of size %ld",
> +			 QDMA_DEFAULT_RING_ID, QDMA_DEFAULT_RING_SIZE);
> +		return ret;
> +	}
> +
> +	/* Enable memory-mapped DMA engine in both directions */
> +	ret = qdma_sgdma_control(qdev, 1);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to SGDMA with error %d", ret);
> +		return ret;
> +	}
> +
> +	ret = qdma_alloc_queues(qdev, DMA_MEM_TO_DEV);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to alloc H2C queues, ret %d", ret);
> +		return ret;
> +	}
> +
> +	ret = qdma_alloc_queues(qdev, DMA_DEV_TO_MEM);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to alloc C2H queues, ret %d", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * qdma_free_queue_resources() - Free queue resources
> + * @chan: DMA channel
> + */
> +static void qdma_free_queue_resources(struct dma_chan *chan)
> +{
> +	struct qdma_queue *queue = to_qdma_queue(chan);
> +	struct qdma_device *qdev = queue->qdev;
> +	struct device *dev = qdev->dma_dev.dev;
> +
> +	qdma_clear_queue_context(queue);
> +	vchan_free_chan_resources(&queue->vchan);
> +	dma_free_coherent(dev, queue->ring_size * QDMA_MM_DESC_SIZE,
> +			  queue->desc_base, queue->dma_desc_base);
> +}
> +
> +/**
> + * qdma_alloc_queue_resources() - Allocate queue resources
> + * @chan: DMA channel
> + */
> +static int qdma_alloc_queue_resources(struct dma_chan *chan)
> +{
> +	struct qdma_queue *queue = to_qdma_queue(chan);
> +	struct qdma_device *qdev = queue->qdev;
> +	struct qdma_ctxt_sw_desc desc;
> +	size_t size;
> +	int ret;
> +
> +	ret = qdma_clear_queue_context(queue);
> +	if (ret)
> +		return ret;
> +
> +	size = queue->ring_size * QDMA_MM_DESC_SIZE;
> +	queue->desc_base = dma_alloc_coherent(qdev->dma_dev.dev, size,
> +					      &queue->dma_desc_base,
> +					      GFP_KERNEL);
> +	if (!queue->desc_base) {
> +		qdma_err(qdev, "Failed to allocate descriptor ring");
> +		return -ENOMEM;
> +	}
> +
> +	/* Setup SW descriptor queue context for DMA memory map */
> +	desc.vec = qdma_get_intr_ring_idx(qdev);
> +	desc.desc_base = queue->dma_desc_base;
> +	ret = qdma_setup_queue_context(qdev, &desc, queue->dir, queue->qid);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to setup SW desc ctxt for %s",
> +			 chan->name);
> +		dma_free_coherent(qdev->dma_dev.dev, size, queue->desc_base,
> +				  queue->dma_desc_base);
> +		return ret;
> +	}
> +
> +	queue->pidx = 0;
> +	queue->cidx = 0;
> +
> +	return 0;
> +}
> +
> +static bool qdma_filter_fn(struct dma_chan *chan, void *param)
> +{
> +	struct qdma_queue *queue = to_qdma_queue(chan);
> +	struct qdma_queue_info *info = param;
> +
> +	return info->dir == queue->dir;
> +}
> +
> +static int qdma_xfer_start(struct qdma_queue *queue)
> +{
> +	struct qdma_device *qdev = queue->qdev;
> +	int ret;
> +
> +	if (!vchan_next_desc(&queue->vchan))
> +		return 0;
> +
> +	qdma_dbg(qdev, "Tnx kickoff with P: %d for %s%d",
> +		 queue->issued_vdesc->pidx, CHAN_STR(queue), queue->qid);
> +
> +	ret = qdma_update_pidx(queue, queue->issued_vdesc->pidx);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to update PIDX to %d for %s queue: %d",
> +			 queue->pidx, CHAN_STR(queue), queue->qid);
> +	}
> +
> +	return ret;
> +}
> +
> +static void qdma_issue_pending(struct dma_chan *chan)
> +{
> +	struct qdma_queue *queue = to_qdma_queue(chan);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&queue->vchan.lock, flags);
> +	if (vchan_issue_pending(&queue->vchan)) {
> +		if (queue->submitted_vdesc) {
> +			queue->issued_vdesc = queue->submitted_vdesc;
> +			queue->submitted_vdesc = NULL;
> +		}
> +		qdma_xfer_start(queue);
> +	}
> +
> +	spin_unlock_irqrestore(&queue->vchan.lock, flags);
> +}
> +
> +static struct qdma_mm_desc *qdma_get_desc(struct qdma_queue *q)
> +{
> +	struct qdma_mm_desc *desc;
> +
> +	if (((q->pidx + 1) & q->idx_mask) == q->cidx)
> +		return NULL;
> +
> +	desc = q->desc_base + q->pidx;
> +	q->pidx = (q->pidx + 1) & q->idx_mask;
> +
> +	return desc;
> +}
> +
> +static int qdma_hw_enqueue(struct qdma_queue *q, struct qdma_mm_vdesc *vdesc)
> +{
> +	struct qdma_mm_desc *desc;
> +	struct scatterlist *sg;
> +	u64 addr, *src, *dst;
> +	u32 rest, len;
> +	int ret = 0;
> +	u32 i;
> +
> +	if (!vdesc->sg_len)
> +		return 0;
> +
> +	if (q->dir == DMA_MEM_TO_DEV) {
> +		dst = &vdesc->dev_addr;
> +		src = &addr;
> +	} else {
> +		dst = &addr;
> +		src = &vdesc->dev_addr;
> +	}
> +
> +	for_each_sg(vdesc->sgl, sg, vdesc->sg_len, i) {
> +		addr = sg_dma_address(sg) + vdesc->sg_off;
> +		rest = sg_dma_len(sg) - vdesc->sg_off;
> +		while (rest) {
> +			len = min_t(u32, rest, QDMA_MM_DESC_MAX_LEN);
> +			desc = qdma_get_desc(q);
> +			if (!desc) {
> +				ret = -EBUSY;
> +				goto out;
> +			}
> +
> +			desc->src_addr = cpu_to_le64(*src);
> +			desc->dst_addr = cpu_to_le64(*dst);
> +			desc->len = cpu_to_le32(len);
> +
> +			vdesc->dev_addr += len;
> +			vdesc->sg_off += len;
> +			vdesc->pending_descs++;
> +			addr += len;
> +			rest -= len;
> +		}
> +		vdesc->sg_off = 0;
> +	}
> +out:
> +	vdesc->sg_len -= i;
> +	vdesc->pidx = q->pidx;
> +	return ret;
> +}
> +
> +static void qdma_fill_pending_vdesc(struct qdma_queue *q)
> +{
> +	struct virt_dma_chan *vc = &q->vchan;
> +	struct qdma_mm_vdesc *vdesc;
> +	struct virt_dma_desc *vd;
> +	int ret;
> +
> +	if (!list_empty(&vc->desc_issued)) {
> +		vd = &q->issued_vdesc->vdesc;
> +		list_for_each_entry_from(vd, &vc->desc_issued, node) {
> +			vdesc = to_qdma_vdesc(vd);
> +			ret = qdma_hw_enqueue(q, vdesc);
> +			if (ret) {
> +				q->issued_vdesc = vdesc;
> +				return;
> +			}
> +		}
> +		q->issued_vdesc = vdesc;
> +	}
> +
> +	if (list_empty(&vc->desc_submitted))
> +		return;
> +
> +	if (q->submitted_vdesc)
> +		vd = &q->submitted_vdesc->vdesc;
> +	else
> +		vd = list_first_entry(&vc->desc_submitted, typeof(*vd), node);
> +
> +	list_for_each_entry_from(vd, &vc->desc_submitted, node) {
> +		vdesc = to_qdma_vdesc(vd);
> +		ret = qdma_hw_enqueue(q, vdesc);
> +		if (ret)
> +			break;
> +	}
> +	q->submitted_vdesc = vdesc;
> +}
> +
> +static dma_cookie_t qdma_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> +	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
> +	struct qdma_queue *q = to_qdma_queue(&vc->chan);
> +	struct virt_dma_desc *vd;
> +	unsigned long flags;
> +	dma_cookie_t cookie;
> +
> +	vd = container_of(tx, struct virt_dma_desc, tx);
> +	spin_lock_irqsave(&vc->lock, flags);
> +	cookie = dma_cookie_assign(tx);
> +
> +	list_move_tail(&vd->node, &vc->desc_submitted);
> +	qdma_fill_pending_vdesc(q);
> +	spin_unlock_irqrestore(&vc->lock, flags);
> +
> +	return cookie;
> +}
> +
> +static void *qdma_get_metadata_ptr(struct dma_async_tx_descriptor *tx,
> +				   size_t *payload_len, size_t *max_len)
> +{
> +	struct qdma_mm_vdesc *vdesc;
> +
> +	vdesc = container_of(tx, typeof(*vdesc), vdesc.tx);
> +	if (payload_len)
> +		*payload_len = sizeof(vdesc->dev_addr);
> +	if (max_len)
> +		*max_len = *payload_len;
> +
> +	return &vdesc->dev_addr;
> +}
> +
> +static int qdma_set_metadata_len(struct dma_async_tx_descriptor *tx,
> +				 size_t payload_len)
> +{
> +	struct qdma_mm_vdesc *vdesc;
> +
> +	vdesc = container_of(tx, typeof(*vdesc), vdesc.tx);
> +	if (payload_len != sizeof(vdesc->dev_addr))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static struct dma_descriptor_metadata_ops metadata_ops = {
> +	.get_ptr = qdma_get_metadata_ptr,
> +	.set_len = qdma_set_metadata_len,
> +};
> +
> +static struct dma_async_tx_descriptor *
> +qdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
> +		    unsigned int sg_len, enum dma_transfer_direction dir,
> +		    unsigned long flags, void *context)
> +{
> +	struct qdma_queue *q = to_qdma_queue(chan);
> +	struct dma_async_tx_descriptor *tx;
> +	struct qdma_mm_vdesc *vdesc;
> +
> +	vdesc = kzalloc(sizeof(*vdesc), GFP_NOWAIT);
> +	if (!vdesc)
> +		return NULL;
> +	vdesc->sgl = sgl;
> +	vdesc->sg_len = sg_len;
> +
> +	tx = vchan_tx_prep(&q->vchan, &vdesc->vdesc, flags);
> +	tx->tx_submit = qdma_tx_submit;
> +	tx->metadata_ops = &metadata_ops;
> +
> +	return tx;
> +}
> +
> +static int qdma_arm_err_intr(const struct qdma_device *qdev)
> +{
> +	u32 value = 0;
> +
> +	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_FUNC, qdev->fid);
> +	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_VEC, qdev->err_irq_idx);
> +	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_ARM, 1);
> +
> +	return qdma_reg_write(qdev, &value, QDMA_REGO_ERR_INT);
> +}
> +
> +static irqreturn_t qdma_error_isr(int irq, void *data)
> +{
> +	struct qdma_device *qdev = data;
> +	u32 err_stat = 0;
> +	int ret;
> +
> +	ret = qdma_reg_read(qdev, &err_stat, QDMA_REGO_ERR_STAT);
> +	if (ret) {
> +		qdma_err(qdev, "read error state failed, ret %d", ret);
> +		goto out;
> +	}
> +
> +	qdma_err(qdev, "global error %d", err_stat);
> +	ret = qdma_reg_write(qdev, &err_stat, QDMA_REGO_ERR_STAT);
> +	if (ret)
> +		qdma_err(qdev, "clear error state failed, ret %d", ret);
> +
> +out:
> +	qdma_arm_err_intr(qdev);
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t qdma_queue_isr(int irq, void *data)
> +{
> +	struct qdma_intr_ring *intr = data;
> +	struct qdma_queue *q = NULL;
> +	struct qdma_device *qdev;
> +	u32 index, comp_desc;
> +	u64 intr_ent;
> +	u8 color;
> +	int ret;
> +	u16 qid;
> +
> +	qdev = intr->qdev;
> +	index = intr->cidx;
> +	while (1) {
> +		struct virt_dma_desc *vd;
> +		struct qdma_mm_vdesc *vdesc;
> +		unsigned long flags;
> +		u32 cidx;
> +
> +		intr_ent = le64_to_cpu(intr->base[index]);
> +		color = FIELD_GET(QDMA_INTR_MASK_COLOR, intr_ent);
> +		if (color != intr->color)
> +			break;
> +
> +		qid = FIELD_GET(QDMA_INTR_MASK_QID, intr_ent);
> +		if (FIELD_GET(QDMA_INTR_MASK_TYPE, intr_ent))
> +			q = qdev->c2h_queues;
> +		else
> +			q = qdev->h2c_queues;
> +		q += qid;
> +
> +		cidx = FIELD_GET(QDMA_INTR_MASK_CIDX, intr_ent);
> +
> +		spin_lock_irqsave(&q->vchan.lock, flags);
> +		comp_desc = (cidx - q->cidx) & q->idx_mask;
> +
> +		vd = vchan_next_desc(&q->vchan);
> +		if (!vd)
> +			goto skip;
> +
> +		vdesc = to_qdma_vdesc(vd);
> +		while (comp_desc > vdesc->pending_descs) {
> +			list_del(&vd->node);
> +			vchan_cookie_complete(vd);
> +			comp_desc -= vdesc->pending_descs;
> +			vd = vchan_next_desc(&q->vchan);
> +			vdesc = to_qdma_vdesc(vd);
> +		}
> +		vdesc->pending_descs -= comp_desc;
> +		if (!vdesc->pending_descs && QDMA_VDESC_QUEUED(vdesc)) {
> +			list_del(&vd->node);
> +			vchan_cookie_complete(vd);
> +		}
> +		q->cidx = cidx;
> +
> +		qdma_fill_pending_vdesc(q);
> +		qdma_xfer_start(q);
> +
> +skip:
> +		spin_unlock_irqrestore(&q->vchan.lock, flags);
> +
> +		/*
> +		 * Wrap the index value and flip the expected color value if
> +		 * interrupt aggregation PIDX has wrapped around.
> +		 */
> +		index++;
> +		index &= QDMA_INTR_RING_IDX_MASK;
> +		if (!index)
> +			intr->color = !intr->color;
> +	}
> +
> +	/*
> +	 * Update the software interrupt aggregation ring CIDX if a valid entry
> +	 * was found.
> +	 */
> +	if (q) {
> +		qdma_dbg(qdev, "update intr ring%d %d", intr->ridx, index);
> +
> +		/*
> +		 * Record the last read index of status descriptor from the
> +		 * interrupt aggregation ring.
> +		 */
> +		intr->cidx = index;
> +
> +		ret = qdma_update_cidx(q, intr->ridx, index);
> +		if (ret) {
> +			qdma_err(qdev, "Failed to update IRQ CIDX");
> +			return IRQ_NONE;
> +		}
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int qdma_init_error_irq(struct qdma_device *qdev)
> +{
> +	struct device *dev = &qdev->pdev->dev;
> +	int ret;
> +	u32 vec;
> +
> +	vec = qdev->queue_irq_start - 1;
> +
> +	ret = devm_request_threaded_irq(dev, vec, NULL, qdma_error_isr,
> +					IRQF_ONESHOT, "amd-qdma-error", qdev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to request error IRQ vector: %d", vec);
> +		return ret;
> +	}
> +
> +	ret = qdma_arm_err_intr(qdev);
> +	if (ret)
> +		qdma_err(qdev, "Failed to arm err interrupt, ret %d", ret);
> +
> +	return ret;
> +}
> +
> +static void qdma_free_qintr_rings(struct qdma_device *qdev)
> +{
> +	int i;
> +
> +	for (i = 0; i < qdev->qintr_ring_num; i++) {
> +		if (!qdev->qintr_rings[i].base)
> +			continue;
> +
> +		dma_free_coherent(&qdev->pdev->dev, QDMA_INTR_RING_SIZE,
> +				  qdev->qintr_rings[i].base,
> +				  qdev->qintr_rings[i].dev_base);
> +	}
> +}
> +
> +static int qdma_alloc_qintr_rings(struct qdma_device *qdev)
> +{
> +	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
> +	struct device *dev = &qdev->pdev->dev;
> +	struct qdma_intr_ring *ring;
> +	struct qdma_ctxt_intr intr_ctxt;
> +	u32 vector;
> +	int ret, i;
> +
> +	qdev->qintr_ring_num = qdev->queue_irq_num;
> +	qdev->qintr_rings = devm_kcalloc(dev, qdev->qintr_ring_num,
> +					 sizeof(*qdev->qintr_rings),
> +					 GFP_KERNEL);
> +	if (!qdev->qintr_rings)
> +		return -ENOMEM;
> +
> +	vector = qdev->queue_irq_start;
> +	for (i = 0; i < qdev->qintr_ring_num; i++, vector++) {
> +		ring = &qdev->qintr_rings[i];
> +		ring->qdev = qdev;
> +		ring->msix_id = qdev->err_irq_idx + i + 1;
> +		ring->ridx = i;
> +		ring->color = 1;
> +		ring->base = dma_alloc_coherent(dev, QDMA_INTR_RING_SIZE,
> +						&ring->dev_base,
> +						GFP_KERNEL);
> +		if (!ring->base) {
> +			qdma_err(qdev, "Failed to alloc intr ring %d", i);
> +			ret = -ENOMEM;
> +			goto failed;
> +		}
> +		intr_ctxt.agg_base = QDMA_INTR_RING_BASE(ring->dev_base);
> +		intr_ctxt.size = (QDMA_INTR_RING_SIZE - 1) / 4096;
> +		intr_ctxt.vec = ring->msix_id;
> +		intr_ctxt.valid = true;
> +		intr_ctxt.color = true;
> +		ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
> +					QDMA_CTXT_CLEAR, ring->ridx, NULL);
> +		if (ret) {
> +			qdma_err(qdev, "Failed clear intr ctx, ret %d", ret);
> +			goto failed;
> +		}
> +
> +		qdma_prep_intr_context(qdev, &intr_ctxt, ctxt);
> +		ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
> +					QDMA_CTXT_WRITE, ring->ridx, ctxt);
> +		if (ret) {
> +			qdma_err(qdev, "Failed setup intr ctx, ret %d", ret);
> +			goto failed;
> +		}
> +
> +		ret = devm_request_threaded_irq(dev, vector, NULL,
> +						qdma_queue_isr, IRQF_ONESHOT,
> +						"amd-qdma-queue", ring);
> +		if (ret) {
> +			qdma_err(qdev, "Failed to request irq %d", vector);
> +			goto failed;
> +		}
> +	}
> +
> +	return 0;
> +
> +failed:
> +	qdma_free_qintr_rings(qdev);
> +	return ret;
> +}
> +
> +static int qdma_intr_init(struct qdma_device *qdev)
> +{
> +	int ret;
> +
> +	ret = qdma_init_error_irq(qdev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to init error IRQs, ret %d", ret);
> +		return ret;
> +	}
> +
> +	ret = qdma_alloc_qintr_rings(qdev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to init queue IRQs, ret %d", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int amd_qdma_remove(struct platform_device *pdev)
> +{
> +	struct qdma_device *qdev = platform_get_drvdata(pdev);
> +
> +	qdma_sgdma_control(qdev, 0);
> +
> +	if (qdev->status & QDMA_DEV_STATUS_REG_DMA)
> +		dma_async_device_unregister(&qdev->dma_dev);
> +
> +	if (qdev->status & QDMA_DEV_STATUS_INTR_INIT)
> +		qdma_free_qintr_rings(qdev);
> +
> +	mutex_destroy(&qdev->ctxt_lock);
> +
> +	return 0;
> +}
> +
> +static int amd_qdma_probe(struct platform_device *pdev)
> +{
> +	struct qdma_platdata *pdata = dev_get_platdata(&pdev->dev);
> +	struct qdma_device *qdev;
> +	struct resource *res;
> +	void __iomem *regs;
> +	int ret;
> +
> +	qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
> +	if (!qdev)
> +		return -ENOMEM;
> +
> +	platform_set_drvdata(pdev, qdev);
> +	qdev->pdev = pdev;
> +	mutex_init(&qdev->ctxt_lock);
> +
> +	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
> +	if (!res) {
> +		qdma_err(qdev, "Failed to get IRQ resource");
> +		ret = -ENODEV;
> +		goto failed;
> +	}
> +	qdev->err_irq_idx = pdata->irq_index;
> +	qdev->queue_irq_start = res->start + 1;
> +	qdev->queue_irq_num = res->end - res->start;
> +
> +	regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
> +	if (IS_ERR(regs)) {
> +		ret = PTR_ERR(regs);
> +		qdma_err(qdev, "Failed to map IO resource, err %d", ret);
> +		goto failed;
> +	}
> +
> +	qdev->regmap = devm_regmap_init_mmio(&pdev->dev, regs,
> +					     &qdma_regmap_config);
> +	if (IS_ERR(qdev->regmap)) {
> +		ret = PTR_ERR(qdev->regmap);
> +		qdma_err(qdev, "Regmap init failed, err %d", ret);
> +		goto failed;
> +	}
> +
> +	ret = qdma_device_verify(qdev);
> +	if (ret)
> +		goto failed;
> +
> +	ret = qdma_get_hw_info(qdev);
> +	if (ret)
> +		goto failed;
> +
> +	INIT_LIST_HEAD(&qdev->dma_dev.channels);
> +
> +	ret = qdma_device_setup(qdev);
> +	if (ret)
> +		goto failed;
> +
> +	ret = qdma_intr_init(qdev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to initialize IRQs %d", ret);
> +		return ret;
> +	}
> +	qdev->status |= QDMA_DEV_STATUS_INTR_INIT;
> +
> +	dma_cap_set(DMA_SLAVE, qdev->dma_dev.cap_mask);
> +	dma_cap_set(DMA_PRIVATE, qdev->dma_dev.cap_mask);
> +
> +	qdev->dma_dev.dev = &pdev->dev;
> +	qdev->dma_dev.filter.map = pdata->device_map;
> +	qdev->dma_dev.filter.mapcnt = qdev->chan_num * 2;
> +	qdev->dma_dev.filter.fn = qdma_filter_fn;
> +	qdev->dma_dev.desc_metadata_modes = DESC_METADATA_ENGINE;
> +	qdev->dma_dev.device_alloc_chan_resources = qdma_alloc_queue_resources;
> +	qdev->dma_dev.device_free_chan_resources = qdma_free_queue_resources;
> +	qdev->dma_dev.device_prep_slave_sg = qdma_prep_device_sg;
> +	qdev->dma_dev.device_issue_pending = qdma_issue_pending;
> +	qdev->dma_dev.device_tx_status = dma_cookie_status;
> +	qdev->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
> +
> +	ret = dma_async_device_register(&qdev->dma_dev);
> +	if (ret) {
> +		qdma_err(qdev, "Failed to register AMD QDMA: %d", ret);
> +		goto failed;
> +	}
> +	qdev->status |= QDMA_DEV_STATUS_REG_DMA;
> +
> +	return 0;
> +failed:
> +	qdma_err(qdev, "Failed to probe AMD QDMA driver");
> +	amd_qdma_remove(pdev);
> +	return ret;
> +}
> +
> +static struct platform_driver amd_qdma_driver = {
> +	.driver		= {
> +		.name = "amd-qdma",
> +	},
> +	.probe		= amd_qdma_probe,
> +	.remove		= amd_qdma_remove,
> +};
> +
> +module_platform_driver(amd_qdma_driver);
> +
> +MODULE_DESCRIPTION("AMD QDMA driver");
> +MODULE_AUTHOR("XRT Team <runtimeca39d@....com>");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/dma/amd/qdma.h b/drivers/dma/amd/qdma.h
> new file mode 100644
> index 000000000000..b4a0e23d3d79
> --- /dev/null
> +++ b/drivers/dma/amd/qdma.h
> @@ -0,0 +1,269 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * DMA header for AMD Queue-based DMA Subsystem
> + *
> + * Copyright (C) 2023, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef __QDMA_H
> +#define __QDMA_H
> +
> +#include <linux/bitfield.h>
> +#include <linux/dmaengine.h>
> +#include <linux/kernel.h>
> +#include <linux/platform_device.h>
> +#include <linux/regmap.h>
> +
> +#include "../virt-dma.h"
> +
> +#define DISABLE					0
> +#define ENABLE					1
> +
> +#define QDMA_MIN_IRQ				3
> +#define QDMA_INTR_NAME_MAX_LEN			30
> +#define QDMA_INTR_PREFIX			"amd-qdma"
> +
> +#define QDMA_DEV_STATUS_REG_DMA			BIT(0)
> +#define QDMA_DEV_STATUS_INTR_INIT		BIT(1)
> +
> +#define QDMA_IDENTIFIER				0x1FD3
> +#define QDMA_DEFAULT_RING_SIZE			(BIT(10) + 1)
> +#define QDMA_DEFAULT_RING_ID			0
> +#define QDMA_POLL_INTRVL_US			10		/* 10us */
> +#define QDMA_POLL_TIMEOUT_US			(500 * 1000)	/* 500ms */
> +#define QDMA_DMAP_REG_STRIDE			16
> +#define QDMA_CTXT_REGMAP_LEN			8		/* 8 regs */
> +#define QDMA_MM_DESC_SIZE			32		/* Bytes */
> +#define QDMA_MM_DESC_LEN_BITS			28
> +#define QDMA_MM_DESC_MAX_LEN			(BIT(QDMA_MM_DESC_LEN_BITS) - 1)
> +#define QDMA_MIN_DMA_ALLOC_SIZE			4096
> +#define QDMA_INTR_RING_SIZE			BIT(13)
> +#define QDMA_INTR_RING_IDX_MASK			GENMASK(9, 0)
> +#define QDMA_INTR_RING_BASE(_addr)		((_addr) >> 12)
> +
> +#define QDMA_IDENTIFIER_REGOFF			0x0
> +#define QDMA_IDENTIFIER_MASK			GENMASK(31, 16)
> +#define QDMA_QUEUE_ARM_BIT			BIT(16)
> +
> +#define qdma_err(qdev, fmt, args...)					\
> +	dev_err(&(qdev)->pdev->dev, fmt, ##args)
> +
> +#define qdma_dbg(qdev, fmt, args...)					\
> +	dev_dbg(&(qdev)->pdev->dev, fmt, ##args)
> +
> +#define qdma_info(qdev, fmt, args...)					\
> +	dev_info(&(qdev)->pdev->dev, fmt, ##args)
> +
> +enum qdma_reg_fields {
> +	QDMA_REGF_IRQ_ENABLE,
> +	QDMA_REGF_WBK_ENABLE,
> +	QDMA_REGF_WBI_CHECK,
> +	QDMA_REGF_IRQ_ARM,
> +	QDMA_REGF_IRQ_VEC,
> +	QDMA_REGF_IRQ_AGG,
> +	QDMA_REGF_WBI_INTVL_ENABLE,
> +	QDMA_REGF_MRKR_DISABLE,
> +	QDMA_REGF_QUEUE_ENABLE,
> +	QDMA_REGF_QUEUE_MODE,
> +	QDMA_REGF_DESC_BASE,
> +	QDMA_REGF_DESC_SIZE,
> +	QDMA_REGF_RING_ID,
> +	QDMA_REGF_CMD_INDX,
> +	QDMA_REGF_CMD_CMD,
> +	QDMA_REGF_CMD_TYPE,
> +	QDMA_REGF_CMD_BUSY,
> +	QDMA_REGF_QUEUE_COUNT,
> +	QDMA_REGF_QUEUE_MAX,
> +	QDMA_REGF_QUEUE_BASE,
> +	QDMA_REGF_FUNCTION_ID,
> +	QDMA_REGF_INTR_AGG_BASE,
> +	QDMA_REGF_INTR_VECTOR,
> +	QDMA_REGF_INTR_SIZE,
> +	QDMA_REGF_INTR_VALID,
> +	QDMA_REGF_INTR_COLOR,
> +	QDMA_REGF_INTR_FUNCTION_ID,
> +	QDMA_REGF_ERR_INT_FUNC,
> +	QDMA_REGF_ERR_INT_VEC,
> +	QDMA_REGF_ERR_INT_ARM,
> +	QDMA_REGF_MAX
> +};
> +
> +enum qdma_regs {
> +	QDMA_REGO_CTXT_DATA,
> +	QDMA_REGO_CTXT_CMD,
> +	QDMA_REGO_CTXT_MASK,
> +	QDMA_REGO_MM_H2C_CTRL,
> +	QDMA_REGO_MM_C2H_CTRL,
> +	QDMA_REGO_QUEUE_COUNT,
> +	QDMA_REGO_RING_SIZE,
> +	QDMA_REGO_H2C_PIDX,
> +	QDMA_REGO_C2H_PIDX,
> +	QDMA_REGO_INTR_CIDX,
> +	QDMA_REGO_FUNC_ID,
> +	QDMA_REGO_ERR_INT,
> +	QDMA_REGO_ERR_STAT,
> +	QDMA_REGO_MAX
> +};
> +
> +struct qdma_reg_field {
> +	u16 lsb; /* Least significant bit of field */
> +	u16 msb; /* Most significant bit of field */
> +};
> +
> +struct qdma_reg {
> +	u32 off;
> +	u32 count;
> +};
> +
> +#define QDMA_REGF(_msb, _lsb) {						\
> +	.lsb = (_lsb),							\
> +	.msb = (_msb),							\
> +}
> +
> +#define QDMA_REGO(_off, _count) {					\
> +	.off = (_off),							\
> +	.count = (_count),						\
> +}
> +
> +enum qdma_desc_size {
> +	QDMA_DESC_SIZE_8B,
> +	QDMA_DESC_SIZE_16B,
> +	QDMA_DESC_SIZE_32B,
> +	QDMA_DESC_SIZE_64B,
> +};
> +
> +enum qdma_queue_op_mode {
> +	QDMA_QUEUE_OP_STREAM,
> +	QDMA_QUEUE_OP_MM,
> +};
> +
> +enum qdma_ctxt_type {
> +	QDMA_CTXT_DESC_SW_C2H,
> +	QDMA_CTXT_DESC_SW_H2C,
> +	QDMA_CTXT_DESC_HW_C2H,
> +	QDMA_CTXT_DESC_HW_H2C,
> +	QDMA_CTXT_DESC_CR_C2H,
> +	QDMA_CTXT_DESC_CR_H2C,
> +	QDMA_CTXT_WRB,
> +	QDMA_CTXT_PFTCH,
> +	QDMA_CTXT_INTR_COAL,
> +	QDMA_CTXT_RSVD,
> +	QDMA_CTXT_HOST_PROFILE,
> +	QDMA_CTXT_TIMER,
> +	QDMA_CTXT_FMAP,
> +	QDMA_CTXT_FNC_STS,
> +};
> +
> +enum qdma_ctxt_cmd {
> +	QDMA_CTXT_CLEAR,
> +	QDMA_CTXT_WRITE,
> +	QDMA_CTXT_READ,
> +	QDMA_CTXT_INVALIDATE,
> +	QDMA_CTXT_MAX
> +};
> +
> +struct qdma_ctxt_sw_desc {
> +	u64				desc_base;
> +	u16				vec;
> +};
> +
> +struct qdma_ctxt_intr {
> +	u64				agg_base;
> +	u16				vec;
> +	u32				size;
> +	bool				valid;
> +	bool				color;
> +};
> +
> +struct qdma_ctxt_fmap {
> +	u16				qbase;
> +	u16				qmax;
> +};
> +
> +struct qdma_device;
> +
> +struct qdma_mm_desc {
> +	__le64			src_addr;
> +	__le32			len;
> +	__le32			reserved1;
> +	__le64			dst_addr;
> +	__le64			reserved2;
> +} __packed;
> +
> +struct qdma_mm_vdesc {
> +	struct virt_dma_desc		vdesc;
> +	struct qdma_queue		*queue;
> +	struct scatterlist		*sgl;
> +	u64				sg_off;
> +	u32				sg_len;
> +	u64				dev_addr;
> +	u32				pidx;
> +	u32				pending_descs;
> +};
> +
> +#define QDMA_VDESC_QUEUED(vdesc)	(!(vdesc)->sg_len)
> +
> +struct qdma_queue {
> +	struct qdma_device		*qdev;
> +	struct virt_dma_chan		vchan;
> +	enum dma_transfer_direction	dir;
> +	struct dma_slave_config		cfg;
> +	struct qdma_mm_desc		*desc_base;
> +	struct qdma_mm_vdesc		*submitted_vdesc;
> +	struct qdma_mm_vdesc		*issued_vdesc;
> +	dma_addr_t			dma_desc_base;
> +	u32				pidx_reg;
> +	u32				cidx_reg;
> +	u32				ring_size;
> +	u32				idx_mask;
> +	u16				qid;
> +	u32				pidx;
> +	u32				cidx;
> +};
> +
> +struct qdma_intr_ring {
> +	struct qdma_device		*qdev;
> +	__le64				*base;
> +	dma_addr_t			dev_base;
> +	char				msix_name[QDMA_INTR_NAME_MAX_LEN];
> +	u32				msix_vector;
> +	u16				msix_id;
> +	u32				ring_size;
> +	u16				ridx;
> +	u16				cidx;
> +	u8				color;
> +};
> +
> +#define QDMA_INTR_MASK_PIDX		GENMASK_ULL(15, 0)
> +#define QDMA_INTR_MASK_CIDX		GENMASK_ULL(31, 16)
> +#define QDMA_INTR_MASK_DESC_COLOR	GENMASK_ULL(32, 32)
> +#define QDMA_INTR_MASK_STATE		GENMASK_ULL(34, 33)
> +#define QDMA_INTR_MASK_ERROR		GENMASK_ULL(36, 35)
> +#define QDMA_INTR_MASK_TYPE		GENMASK_ULL(38, 38)
> +#define QDMA_INTR_MASK_QID		GENMASK_ULL(62, 39)
> +#define QDMA_INTR_MASK_COLOR		GENMASK_ULL(63, 63)
> +
> +struct qdma_device {
> +	struct platform_device		*pdev;
> +	struct dma_device		dma_dev;
> +	struct regmap			*regmap;
> +	struct mutex			ctxt_lock; /* protect ctxt registers */
> +	const struct qdma_reg_field	*rfields;
> +	const struct qdma_reg		*roffs;
> +	struct qdma_queue		*h2c_queues;
> +	struct qdma_queue		*c2h_queues;
> +	struct qdma_intr_ring		*qintr_rings;
> +	u32				qintr_ring_num;
> +	u32				qintr_ring_idx;
> +	u32				chan_num;
> +	u32				queue_irq_start;
> +	u32				queue_irq_num;
> +	u32				err_irq_idx;
> +	u32				fid;
> +	u32				status;
> +};
> +
> +extern const struct qdma_reg qdma_regos_default[QDMA_REGO_MAX];
> +extern const struct qdma_reg_field qdma_regfs_default[QDMA_REGF_MAX];
> +
> +#endif	/* __QDMA_H */
> diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h
> new file mode 100644
> index 000000000000..59fa0c174f70
> --- /dev/null
> +++ b/include/linux/platform_data/amd_qdma.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (C) 2023, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _PLATDATA_AMD_QDMA_H
> +#define _PLATDATA_AMD_QDMA_H
> +
> +#include <linux/dmaengine.h>
> +
> +/**
> + * struct qdma_queue_info - DMA queue information. This information is used to
> + *			    match queue when DMA channel is requested
> + * @dir: Channel transfer direction
> + */
> +struct qdma_queue_info {
> +	enum dma_transfer_direction dir;
> +};
> +
> +#define QDMA_FILTER_PARAM(qinfo)	((void *)(qinfo))
> +
> +struct dma_slave_map;
> +
> +/**
> + * struct qdma_platdata - Platform specific data for QDMA engine
> + * @max_mm_channels: Maximum number of MM DMA channels in each direction
> + * @device_map: DMA slave map
> + * @irq_index: The index of first IRQ
> + */
> +struct qdma_platdata {
> +	u32			max_mm_channels;
> +	u32			irq_index;
> +	struct dma_slave_map	*device_map;
> +};
> +
> +#endif /* _PLATDATA_AMD_QDMA_H */
> -- 
> 2.34.1

-- 
~Vinod

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ