[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5e39026e-bec9-408f-b24a-bb60b3f1f411@arm.com>
Date: Sat, 30 Aug 2025 01:00:47 +0100
From: Robin Murphy <robin.murphy@....com>
To: Jisheng Zhang <jszhang@...nel.org>, Vinod Koul <vkoul@...nel.org>,
Rob Herring <robh@...nel.org>, Krzysztof Kozlowski <krzk+dt@...nel.org>,
Conor Dooley <conor+dt@...nel.org>
Cc: dmaengine@...r.kernel.org, devicetree@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 11/14] dmaengine: dma350: Support device_prep_slave_sg
On 2025-08-23 4:40 pm, Jisheng Zhang wrote:
> Add device_prep_slave_sg() callback function so that DMA_MEM_TO_DEV
> and DMA_DEV_TO_MEM operations in single mode can be supported.
>
> Signed-off-by: Jisheng Zhang <jszhang@...nel.org>
> ---
> drivers/dma/arm-dma350.c | 180 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 174 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c
> index 3d26a1f020df..a285778264b9 100644
> --- a/drivers/dma/arm-dma350.c
> +++ b/drivers/dma/arm-dma350.c
> @@ -1,5 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0
> // Copyright (C) 2024-2025 Arm Limited
> +// Copyright (C) 2025 Synaptics Incorporated
> // Arm DMA-350 driver
>
> #include <linux/bitfield.h>
> @@ -98,7 +99,23 @@
>
> #define CH_FILLVAL 0x38
> #define CH_SRCTRIGINCFG 0x4c
> +#define CH_SRCTRIGINMODE GENMASK(11, 10)
> +#define CH_SRCTRIG_CMD 0
> +#define CH_SRCTRIG_DMA_FC 2
> +#define CH_SRCTRIG_PERIF_FC 3
> +#define CH_SRCTRIGINTYPE GENMASK(9, 8)
> +#define CH_SRCTRIG_SW_REQ 0
> +#define CH_SRCTRIG_HW_REQ 2
> +#define CH_SRCTRIG_INTERN_REQ 3
> #define CH_DESTRIGINCFG 0x50
> +#define CH_DESTRIGINMODE GENMASK(11, 10)
> +#define CH_DESTRIG_CMD 0
> +#define CH_DESTRIG_DMA_FC 2
> +#define CH_DESTRIG_PERIF_FC 3
> +#define CH_DESTRIGINTYPE GENMASK(9, 8)
> +#define CH_DESTRIG_SW_REQ 0
> +#define CH_DESTRIG_HW_REQ 2
> +#define CH_DESTRIG_INTERN_REQ 3
Like the CH_CFG_* definitions, let's just have common CH_TRIG* fields
that work for both SRC and DES to simplify matters. FWIW, my attempt to
balance clarity with conciseness would be:
#define CH_TRIGINCFG_BLKSIZE GENMASK(23, 16)
#define CH_TRIGINCFG_MODE GENMASK(11, 10)
#define CH_TRIG_MODE_CMD 0
#define CH_TRIG_MODE_DMA 2
#define CH_TRIG_MODE_PERIPH 3
#define CH_TRIGINCFG_TYPE GENMASK(9, 8)
#define CH_TRIG_TYPE_SW 0
#define CH_TRIG_TYPE_HW 2
#define CH_TRIG_TYPE_INTERN 3
#define CH_TRIGINCFG_SEL GENMASK(7, 0)
> #define CH_LINKATTR 0x70
> #define CH_LINK_SHAREATTR GENMASK(9, 8)
> #define CH_LINK_MEMATTR GENMASK(7, 0)
> @@ -190,11 +207,13 @@ struct d350_chan {
> struct d350_desc *desc;
> void __iomem *base;
> struct dma_pool *cmd_pool;
> + struct dma_slave_config config;
> int irq;
> enum dma_status status;
> dma_cookie_t cookie;
> u32 residue;
> u8 tsz;
> + u8 ch;
What's this for? It doesn't seem to be anything meaningful.
> bool has_trig;
> bool has_wrap;
> bool coherent;
> @@ -327,6 +346,144 @@ static struct dma_async_tx_descriptor *d350_prep_memset(struct dma_chan *chan,
> return vchan_tx_prep(&dch->vc, &desc->vd, flags);
> }
>
> +static struct dma_async_tx_descriptor *
> +d350_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
> + unsigned int sg_len, enum dma_transfer_direction dir,
> + unsigned long flags, void *context)
> +{
> + struct d350_chan *dch = to_d350_chan(chan);
> + dma_addr_t src, dst, phys;
> + struct d350_desc *desc;
> + struct scatterlist *sg;
> + u32 len, trig, *cmd, *la_cmd, tsz;
> + struct d350_sg *dsg;
> + int i, j;
> +
> + if (unlikely(!is_slave_direction(dir) || !sg_len))
> + return NULL;
> +
> + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_NOWAIT);
> + if (!desc)
> + return NULL;
> +
> + desc->sglen = sg_len;
> +
> + if (dir == DMA_MEM_TO_DEV)
> + tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
> + else
> + tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
Surely we should just use the exact addr_width requested?
> + for_each_sg(sgl, sg, sg_len, i) {
> + desc->sg[i].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
> + if (unlikely(!desc->sg[i].command))
> + goto err_cmd_alloc;
> +
> + desc->sg[i].phys = phys;
> + dsg = &desc->sg[i];
> + len = sg_dma_len(sg);
> +
> + if (dir == DMA_MEM_TO_DEV) {
> + src = sg_dma_address(sg);
> + dst = dch->config.dst_addr;
> + trig = CH_CTRL_USEDESTRIGIN;
> + } else {
> + src = dch->config.src_addr;
> + dst = sg_dma_address(sg);
> + trig = CH_CTRL_USESRCTRIGIN;
> + }
> + dsg->tsz = tsz;
> + dsg->xsize = lower_16_bits(len >> dsg->tsz);
> + dsg->xsizehi = upper_16_bits(len >> dsg->tsz);
> +
> + cmd = dsg->command;
> + if (!i) {
> + cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_SRCADDRHI | LINK_DESADDR |
> + LINK_DESADDRHI | LINK_XSIZE | LINK_XSIZEHI | LINK_SRCTRANSCFG |
> + LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
> +
> + cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
> + FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
> +
> + cmd[2] = lower_32_bits(src);
> + cmd[3] = upper_32_bits(src);
> + cmd[4] = lower_32_bits(dst);
> + cmd[5] = upper_32_bits(dst);
> + cmd[6] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
> + FIELD_PREP(CH_XY_DES, dsg->xsize);
> + cmd[7] = FIELD_PREP(CH_XY_SRC, dsg->xsizehi) |
> + FIELD_PREP(CH_XY_DES, dsg->xsizehi);
> + if (dir == DMA_MEM_TO_DEV) {
> + cmd[0] |= LINK_DESTRIGINCFG;
> + cmd[8] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
> + cmd[9] = TRANSCFG_DEVICE;
> + cmd[10] = FIELD_PREP(CH_XY_SRC, 1);
> + cmd[11] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
> + FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
> + } else {
> + cmd[0] |= LINK_SRCTRIGINCFG;
> + cmd[8] = TRANSCFG_DEVICE;
> + cmd[9] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
> + cmd[10] = FIELD_PREP(CH_XY_DES, 1);
> + cmd[11] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
> + FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
> + }
> + la_cmd = &cmd[12];
> + } else {
> + *la_cmd = phys | CH_LINKADDR_EN;
> + if (i == sg_len - 1) {
> + cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_SRCADDRHI | LINK_DESADDR |
> + LINK_DESADDRHI | LINK_XSIZE | LINK_XSIZEHI | LINK_LINKADDR;
> + cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
> + FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
> + cmd[2] = lower_32_bits(src);
> + cmd[3] = upper_32_bits(src);
> + cmd[4] = lower_32_bits(dst);
> + cmd[5] = upper_32_bits(dst);
> + cmd[6] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
> + FIELD_PREP(CH_XY_DES, dsg->xsize);
> + cmd[7] = FIELD_PREP(CH_XY_SRC, dsg->xsizehi) |
> + FIELD_PREP(CH_XY_DES, dsg->xsizehi);
> + la_cmd = &cmd[8];
> + } else {
> + cmd[0] = LINK_SRCADDR | LINK_SRCADDRHI | LINK_DESADDR |
> + LINK_DESADDRHI | LINK_XSIZE | LINK_XSIZEHI | LINK_LINKADDR;
> + cmd[1] = lower_32_bits(src);
> + cmd[2] = upper_32_bits(src);
> + cmd[3] = lower_32_bits(dst);
> + cmd[4] = upper_32_bits(dst);
> + cmd[5] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
> + FIELD_PREP(CH_XY_DES, dsg->xsize);
> + cmd[6] = FIELD_PREP(CH_XY_SRC, dsg->xsizehi) |
> + FIELD_PREP(CH_XY_DES, dsg->xsizehi);
> + la_cmd = &cmd[7];
> + }
> + }
> + }
Again, the structure and duplication here is unbearable. I fail to
comprehend why the "if (i == sg_len - 1)" case even exists...
More crucially, it clearly can't even work for DMA-350 since it would
always be selecting trigger 0. How much of this has been tested?
> + /* the last command */
> + *la_cmd = 0;
> + desc->sg[sg_len - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
> +
> + mb();
> +
> + return vchan_tx_prep(&dch->vc, &desc->vd, flags);
> +
> +err_cmd_alloc:
> + for (j = 0; j < i; j++)
> + dma_pool_free(dch->cmd_pool, desc->sg[j].command, desc->sg[j].phys);
> + kfree(desc);
> + return NULL;
> +}
> +
> +static int d350_slave_config(struct dma_chan *chan, struct dma_slave_config *config)
> +{
> + struct d350_chan *dch = to_d350_chan(chan);
Shouldn't we validate that the given channel has any chance of
supporting the given config?
> + memcpy(&dch->config, config, sizeof(*config));
> +
> + return 0;
> +}
> +
> static int d350_pause(struct dma_chan *chan)
> {
> struct d350_chan *dch = to_d350_chan(chan);
> @@ -558,8 +715,9 @@ static irqreturn_t d350_irq(int irq, void *data)
> writel_relaxed(ch_status, dch->base + CH_STATUS);
>
> spin_lock(&dch->vc.lock);
> - vchan_cookie_complete(vd);
> if (ch_status & CH_STAT_INTR_DONE) {
> + vchan_cookie_complete(vd);
> + dch->desc = NULL;
What's this about? As I mentioned on the earlier patches it was very
fiddly to get consistently appropriate handling of errors for
MEM_TO_MEM, so please explain if this change is somehow necessary for
MEM_TO_DEV/DEV_TO_MEM. Otherwise, if anything it just looks like an
undocumented bodge around what those previous patches subtly broke.
> dch->status = DMA_COMPLETE;
> dch->residue = 0;
> d350_start_next(dch);
> @@ -617,7 +775,7 @@ static int d350_probe(struct platform_device *pdev)
> struct device *dev = &pdev->dev;
> struct d350 *dmac;
> void __iomem *base;
> - u32 reg, dma_chan_mask;
> + u32 reg, dma_chan_mask, trig_bits = 0;
> int ret, nchan, dw, aw, r, p;
> bool coherent, memset;
>
> @@ -637,13 +795,11 @@ static int d350_probe(struct platform_device *pdev)
> dw = 1 << FIELD_GET(DMA_CFG_DATA_WIDTH, reg);
> aw = FIELD_GET(DMA_CFG_ADDR_WIDTH, reg) + 1;
>
> - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
> - coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
> -
> dmac = devm_kzalloc(dev, struct_size(dmac, channels, nchan), GFP_KERNEL);
> if (!dmac)
> return -ENOMEM;
>
> + dmac->dma.dev = dev;
Similarly, why are these two hunks just moving lines around apparently
at random?
> dmac->nchan = nchan;
>
> /* Enable all channels by default */
> @@ -655,12 +811,14 @@ static int d350_probe(struct platform_device *pdev)
> return ret;
> }
>
> + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
> + coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
> +
> reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
> dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
>
> dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
>
> - dmac->dma.dev = dev;
> for (int i = min(dw, 16); i > 0; i /= 2) {
> dmac->dma.src_addr_widths |= BIT(i);
> dmac->dma.dst_addr_widths |= BIT(i);
> @@ -692,6 +850,7 @@ static int d350_probe(struct platform_device *pdev)
>
> dch->coherent = coherent;
> dch->base = base + DMACH(i);
> + dch->ch = i;
> writel_relaxed(CH_CMD_CLEAR, dch->base + CH_CMD);
>
> reg = readl_relaxed(dch->base + CH_BUILDCFG1);
> @@ -711,6 +870,7 @@ static int d350_probe(struct platform_device *pdev)
>
> /* Fill is a special case of Wrap */
> memset &= dch->has_wrap;
> + trig_bits |= dch->has_trig << dch->ch;
This is a bool merely dressed up as a bitmap, the shift is doing nothing.
Thanks,
Robin.
> reg = readl_relaxed(dch->base + CH_BUILDCFG0);
> dch->tsz = FIELD_GET(CH_CFG_DATA_WIDTH, reg);
> @@ -723,6 +883,13 @@ static int d350_probe(struct platform_device *pdev)
> vchan_init(&dch->vc, &dmac->dma);
> }
>
> + if (trig_bits) {
> + dmac->dma.directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
> + dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask);
> + dmac->dma.device_config = d350_slave_config;
> + dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
> + }
> +
> if (memset) {
> dma_cap_set(DMA_MEMSET, dmac->dma.cap_mask);
> dmac->dma.device_prep_dma_memset = d350_prep_memset;
> @@ -759,5 +926,6 @@ static struct platform_driver d350_driver = {
> module_platform_driver(d350_driver);
>
> MODULE_AUTHOR("Robin Murphy <robin.murphy@....com>");
> +MODULE_AUTHOR("Jisheng Zhang <jszhang@...nel.org>");
> MODULE_DESCRIPTION("Arm DMA-350 driver");
> MODULE_LICENSE("GPL v2");
Powered by blists - more mailing lists