[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218-stm32-dcmi-dma-chaining-v1-10-39948ca6cbf6@foss.st.com>
Date: Thu, 18 Dec 2025 19:44:50 +0100
From: Alain Volmat <alain.volmat@...s.st.com>
To: Hugues Fruchet <hugues.fruchet@...s.st.com>,
Mauro Carvalho Chehab
<mchehab@...nel.org>,
Maxime Coquelin <mcoquelin.stm32@...il.com>,
"Alexandre
Torgue" <alexandre.torgue@...s.st.com>,
Rob Herring <robh@...nel.org>,
Krzysztof Kozlowski <krzk+dt@...nel.org>,
Conor Dooley <conor+dt@...nel.org>,
Sumit Semwal <sumit.semwal@...aro.org>,
Christian König
<christian.koenig@....com>
CC: <linux-media@...r.kernel.org>, <linux-stm32@...md-mailman.stormreply.com>,
<linux-arm-kernel@...ts.infradead.org>, <linux-kernel@...r.kernel.org>,
<devicetree@...r.kernel.org>, <dri-devel@...ts.freedesktop.org>,
<linaro-mm-sig@...ts.linaro.org>,
Alain Volmat <alain.volmat@...s.st.com>
Subject: [PATCH 10/12] media: stm32: dcmi: addition of DMA-MDMA chaining
support
Add possibility to rely on an additional MDMA channel and chain
the DMA and MDMA channels allowing to achieve faster capture.
Indeed, on the MP15 platform, the MDMA engine has an higher
bandwidth to the DDR than the DMA engine. Relying on that
it is possible to capture frames from the DCMI IP into the DDR
by using two channels as follow:
DCMI -> (DMA) -> SRAM -> (MDMA) -> DDR
The DMA is able by himself to trigger a MDMA request hence, once
properly configured, the DCMI IP can simply trigger the DMA in order
to have the data pushed up to the DDR (via the SRAM and a MDMA
channel).
This behavior is detailed in the document
Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
Signed-off-by: Alain Volmat <alain.volmat@...s.st.com>
---
drivers/media/platform/st/stm32/stm32-dcmi.c | 250 +++++++++++++++++++++++----
1 file changed, 220 insertions(+), 30 deletions(-)
diff --git a/drivers/media/platform/st/stm32/stm32-dcmi.c b/drivers/media/platform/st/stm32/stm32-dcmi.c
index 4c0f8ed0f87e..e4a9f96a989b 100644
--- a/drivers/media/platform/st/stm32/stm32-dcmi.c
+++ b/drivers/media/platform/st/stm32/stm32-dcmi.c
@@ -15,6 +15,7 @@
#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
+#include <linux/genalloc.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
@@ -113,7 +114,9 @@ struct dcmi_buf {
struct vb2_v4l2_buffer vb;
bool prepared;
struct sg_table sgt;
+ struct sg_table sgt_mdma;
struct dma_async_tx_descriptor *dma_desc;
+ struct dma_async_tx_descriptor *mdma_desc;
size_t size;
struct list_head list;
};
@@ -159,6 +162,15 @@ struct stm32_dcmi {
struct dma_chan *dma_chan;
dma_cookie_t dma_cookie;
u32 dma_max_burst;
+
+ /* Elements for the MDMA - DMA chaining */
+ struct gen_pool *sram_pool;
+ struct dma_chan *mdma_chan;
+ void *sram_buf;
+ u32 sram_buf_size;
+ dma_addr_t sram_dma_buf;
+ dma_cookie_t mdma_cookie;
+
u32 misr;
int errors_count;
int overrun_count;
@@ -247,12 +259,22 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi,
struct dcmi_buf *buf)
{
/* Push current DMA transaction in the pending queue */
+ if (dcmi->mdma_chan) {
+ dcmi->mdma_cookie = dmaengine_submit(buf->mdma_desc);
+ if (dma_submit_error(dcmi->mdma_cookie)) {
+ dev_err(dcmi->dev, "%s: MDMA submission failed\n", __func__);
+ return -ENXIO;
+ }
+ }
+
dcmi->dma_cookie = dmaengine_submit(buf->dma_desc);
if (dma_submit_error(dcmi->dma_cookie)) {
dev_err(dcmi->dev, "%s: DMA submission failed\n", __func__);
return -ENXIO;
}
+ if (dcmi->mdma_chan)
+ dma_async_issue_pending(dcmi->mdma_chan);
dma_async_issue_pending(dcmi->dma_chan);
return 0;
@@ -301,7 +323,9 @@ static void dcmi_set_crop(struct stm32_dcmi *dcmi)
static void dcmi_process_frame(struct stm32_dcmi *dcmi)
{
- struct dma_tx_state state;
+ struct dma_tx_state state, state_dma;
+ size_t bytes_used;
+
enum dma_status status;
struct dcmi_buf *buf = dcmi->active;
@@ -309,23 +333,36 @@ static void dcmi_process_frame(struct stm32_dcmi *dcmi)
return;
/*
- * At the time FRAME interrupt is received, all dma req have been sent to the DMA,
- * however DMA might still be transferring data hence first synchronize prior to
- * getting the status of the DMA transfer.
- * Then DMA tx status gives the amount of data transferred to memory, which is then
- * returned to V4L2 through the active buffer payload.
+ * Pause the DMA transfer, leading to trigger of the MDMA channel read while
+ * keeping a valid residue value on the dma channel
*/
+ if (dcmi->mdma_chan) {
+ spin_unlock_irq(&dcmi->irqlock);
+ dmaengine_pause(dcmi->dma_chan);
+ spin_lock_irq(&dcmi->irqlock);
+
+ do {
+ status = dmaengine_tx_status(dcmi->mdma_chan, dcmi->mdma_cookie, &state);
+ cpu_relax();
+ } while (status != DMA_ERROR && status != DMA_COMPLETE &&
+ state.residue && state.in_flight_bytes);
+ } else {
+ status = dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, &state);
+ }
- spin_unlock_irq(&dcmi->irqlock);
- /* Drain DMA */
- dmaengine_synchronize(dcmi->dma_chan);
- spin_lock_irq(&dcmi->irqlock);
-
- /* Get DMA status and residue size */
- status = dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, &state);
if (status != DMA_ERROR && state.residue < buf->size) {
+ bytes_used = buf->size - state.residue;
+
+ if (state.residue && dcmi->mdma_chan) {
+ dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, &state_dma);
+ /* Getting full size residue equal to no residue */
+ if (state_dma.residue == dcmi->sram_buf_size)
+ state_dma.residue = 0;
+ bytes_used -= state_dma.residue;
+ }
+
/* Return buffer to V4L2 with received data size */
- dcmi_buffer_done(dcmi, buf, buf->size - state.residue, 0);
+ dcmi_buffer_done(dcmi, buf, bytes_used, 0);
} else {
dcmi->errors_count++;
dev_err(dcmi->dev, "%s: DMA error. status: 0x%x, residue: %d\n",
@@ -336,6 +373,8 @@ static void dcmi_process_frame(struct stm32_dcmi *dcmi)
/* Abort DMA operation */
dmaengine_terminate_async(dcmi->dma_chan);
+ if (dcmi->mdma_chan)
+ dmaengine_terminate_async(dcmi->mdma_chan);
}
static irqreturn_t dcmi_irq_thread(int irq, void *arg)
@@ -354,13 +393,15 @@ static irqreturn_t dcmi_irq_thread(int irq, void *arg)
dcmi->errors_count++;
dmaengine_terminate_async(dcmi->dma_chan);
+ if (dcmi->mdma_chan)
+ dmaengine_terminate_async(dcmi->mdma_chan);
if (dcmi_restart_capture(dcmi))
dev_err(dcmi->dev, "%s: Cannot restart capture\n", __func__);
spin_unlock_irq(&dcmi->irqlock);
-
return IRQ_HANDLED;
}
+
if (dcmi->misr & IT_ERR)
dcmi->errors_count++;
@@ -447,28 +488,70 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
vb2_set_plane_payload(vb, 0, size);
if (!buf->prepared) {
+ u32 max_size = dcmi->dma_max_burst;
+ unsigned int dma_nents;
+
/* Get memory addresses */
buf->size = vb2_plane_size(&buf->vb.vb2_buf, 0);
- if (buf->size > dcmi->dma_max_burst)
- num_sgs = DIV_ROUND_UP(buf->size, dcmi->dma_max_burst);
+ if (dcmi->mdma_chan)
+ max_size = dcmi->sram_buf_size / 2;
- ret = sg_alloc_table(&buf->sgt, num_sgs, GFP_ATOMIC);
+ if (buf->size > max_size)
+ num_sgs = DIV_ROUND_UP(buf->size, max_size);
+
+ /*
+ * If we use MDMA chaining, DMA is used in cyclic mode and the scatterlist
+ * maximum size is thus 2
+ */
+ dma_nents = num_sgs;
+ if (dcmi->mdma_chan)
+ dma_nents = min_t(unsigned int, num_sgs, 2);
+
+ ret = sg_alloc_table(&buf->sgt, dma_nents, GFP_ATOMIC);
if (ret) {
- dev_err(dcmi->dev, "sg table alloc failed\n");
+ dev_err(dcmi->dev, "sg table alloc failed for DMA\n");
return ret;
}
+ if (dcmi->mdma_chan) {
+ ret = sg_alloc_table(&buf->sgt_mdma, num_sgs, GFP_ATOMIC);
+ if (ret) {
+ dev_err(dcmi->dev, "sg table alloc failed for MDMA\n");
+ return ret;
+ }
+ }
+
dma_buf = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
dev_dbg(dcmi->dev, "buffer[%d] phy=%pad size=%zu\n",
vb->index, &dma_buf, buf->size);
- for_each_sg(buf->sgt.sgl, sg, num_sgs, i) {
- size_t bytes = min_t(size_t, size, dcmi->dma_max_burst);
+ for_each_sg(buf->sgt.sgl, sg, dma_nents, i) {
+ size_t bytes = min_t(size_t, size, max_size);
+
+ if (!dcmi->mdma_chan) {
+ sg_dma_address(sg) = dma_buf;
+ dma_buf += bytes;
+ } else {
+ /* Targets the beginning = first half of the sram_buf */
+ sg_dma_address(sg) = dcmi->sram_dma_buf;
+ /*
+ * Targets the second half of the sram_bubf
+ * for odd indexes of the item of the sg_list
+ */
+ if (i & 1)
+ sg->dma_address += (dcmi->sram_buf_size / 2);
+ }
+ /*
+ * In case of DMA-MDMA chaining, since the DMA is working in cyclic mode,
+ * we need to provide two linked-list node of same size in order to have
+ * a correct residue value computed.
+ */
+ if (!dcmi->mdma_chan)
+ sg_dma_len(sg) = bytes;
+ else
+ sg_dma_len(sg) = dcmi->sram_buf_size / 2;
- sg_dma_address(sg) = dma_buf;
- sg_dma_len(sg) = bytes;
- dma_buf += bytes;
size -= bytes;
}
@@ -480,6 +563,8 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
DMA_PREP_INTERRUPT);
if (!buf->dma_desc) {
dev_err(dcmi->dev, "%s: DMA dmaengine_prep_slave_sg failed\n", __func__);
+ if (dcmi->mdma_chan)
+ sg_free_table(&buf->sgt_mdma);
sg_free_table(&buf->sgt);
return -EIO;
}
@@ -489,10 +574,48 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
if (ret) {
dev_err(dcmi->dev, "%s: DMA dmaengine_desc_set_reuse failed\n", __func__);
dmaengine_desc_free(buf->dma_desc);
+ if (dcmi->mdma_chan)
+ sg_free_table(&buf->sgt_mdma);
sg_free_table(&buf->sgt);
return -EIO;
}
+ if (dcmi->mdma_chan) {
+ size = dcmi->fmt.fmt.pix.sizeimage;
+ for_each_sg(buf->sgt_mdma.sgl, sg, num_sgs, i) {
+ size_t bytes = min_t(size_t, size, max_size);
+
+ sg_dma_address(sg) = dma_buf;
+ sg_dma_len(sg) = bytes;
+ dma_buf += bytes;
+ size -= bytes;
+ }
+
+ buf->mdma_desc = dmaengine_prep_slave_sg(dcmi->mdma_chan, buf->sgt_mdma.sgl,
+ buf->sgt_mdma.nents,
+ DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT);
+ if (!buf->mdma_desc) {
+ dev_err(dcmi->dev, "%s: failed dmaengine_prep_slave_sg for MDMA\n",
+ __func__);
+ dmaengine_desc_free(buf->dma_desc);
+ sg_free_table(&buf->sgt_mdma);
+ sg_free_table(&buf->sgt);
+ return -EIO;
+ }
+
+ ret = dmaengine_desc_set_reuse(buf->mdma_desc);
+ if (ret) {
+ dev_err(dcmi->dev, "%s: failed to set reuse for mdma desc\n",
+ __func__);
+ dmaengine_desc_free(buf->mdma_desc);
+ dmaengine_desc_free(buf->dma_desc);
+ sg_free_table(&buf->sgt_mdma);
+ sg_free_table(&buf->sgt);
+ return -EIO;
+ }
+ }
+
buf->prepared = true;
vb2_set_plane_payload(&buf->vb.vb2_buf, 0, buf->size);
@@ -511,6 +634,14 @@ static void dcmi_buf_cleanup(struct vb2_buffer *vb)
if (!buf->prepared)
return;
+ if (dcmi->mdma_chan) {
+ ret = dmaengine_desc_free(buf->mdma_desc);
+ if (ret)
+ dev_err(dcmi->dev, "%s: Failed to free the mdma descriptor (0x%x)\n",
+ __func__, ret);
+ sg_free_table(&buf->sgt_mdma);
+ }
+
ret = dmaengine_desc_free(buf->dma_desc);
if (ret)
dev_err(dcmi->dev, "%s: Failed to free the mdma descriptor (0x%x)\n",
@@ -816,6 +947,8 @@ static void dcmi_stop_streaming(struct vb2_queue *vq)
/* Stop all pending DMA operations */
dmaengine_terminate_sync(dcmi->dma_chan);
+ if (dcmi->mdma_chan)
+ dmaengine_terminate_sync(dcmi->mdma_chan);
pm_runtime_put(dcmi->dev);
@@ -1824,9 +1957,9 @@ static int dcmi_probe(struct platform_device *pdev)
struct v4l2_fwnode_endpoint ep = { .bus_type = 0 };
struct stm32_dcmi *dcmi;
struct vb2_queue *q;
- struct dma_chan *chan;
+ struct dma_chan *chan, *mdma_chan;
struct dma_slave_caps caps;
- struct dma_slave_config dma_config;
+ struct dma_slave_config dma_config, mdma_config;
struct clk *mclk;
int ret = 0;
@@ -1888,15 +2021,21 @@ static int dcmi_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, PTR_ERR(chan),
"Failed to request DMA channel\n");
- dcmi->dma_max_burst = UINT_MAX;
- ret = dma_get_slave_caps(chan, &caps);
- if (!ret && caps.max_sg_burst)
- dcmi->dma_max_burst = caps.max_sg_burst * DMA_SLAVE_BUSWIDTH_4_BYTES;
+ mdma_chan = dma_request_chan(&pdev->dev, "mdma_tx");
+ if (IS_ERR(mdma_chan)) {
+ ret = PTR_ERR(mdma_chan);
+ if (ret != -ENODEV)
+ return dev_err_probe(&pdev->dev, ret, "Failed to request MDMA channel\n");
+ mdma_chan = NULL;
+ }
+ /* Configure the DMA channel */
memset(&dma_config, 0, sizeof(dma_config));
dma_config.src_addr = (dma_addr_t)dcmi->res->start + DCMI_DR;
dma_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ if (mdma_chan)
+ dma_config.peripheral_size = 1; /* Indicates chaining */
/* Configure DMA channel */
ret = dmaengine_slave_config(chan, &dma_config);
@@ -1906,6 +2045,47 @@ static int dcmi_probe(struct platform_device *pdev)
goto err_dma_slave_config;
}
+ /* If we want MDMA, we also need a sram pool */
+ if (mdma_chan) {
+ dcmi->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0);
+ if (!dcmi->sram_pool) {
+ dev_info(&pdev->dev, "No SRAM pool, can't use MDMA chaining\n");
+ goto err_dma_slave_config;
+ }
+
+ dev_info(&pdev->dev, "SRAM pool: %zu KiB for DMA/MDMA chaining\n",
+ gen_pool_size(dcmi->sram_pool) / 1024);
+
+ dcmi->sram_buf_size = gen_pool_size(dcmi->sram_pool);
+ dcmi->sram_buf = gen_pool_dma_zalloc(dcmi->sram_pool, dcmi->sram_buf_size,
+ &dcmi->sram_dma_buf);
+ if (!dcmi->sram_buf) {
+ dev_err(dcmi->dev, "Failed to allocate from SRAM\n");
+ goto err_dma_slave_config;
+ }
+
+ /* Configure the MDMA Channel */
+ memset(&mdma_config, 0, sizeof(mdma_config));
+ mdma_config.direction = DMA_DEV_TO_MEM;
+ mdma_config.src_addr = dcmi->sram_dma_buf;
+ mdma_config.peripheral_size = dma_config.peripheral_size;
+ mdma_config.peripheral_config = dma_config.peripheral_config;
+ ret = dmaengine_slave_config(mdma_chan, &mdma_config);
+ if (ret < 0) {
+ dev_err(dcmi->dev, "%s: MDMA channel config failed (%d)\n",
+ __func__, ret);
+ goto err_mdma_slave_config;
+ }
+ }
+
+ dcmi->dma_max_burst = UINT_MAX;
+ /* In case of using DMA-MDMA chaining we consider the maximum infini */
+ if (!mdma_chan) {
+ ret = dma_get_slave_caps(chan, &caps);
+ if (!ret && caps.max_sg_burst)
+ dcmi->dma_max_burst = caps.max_sg_burst * DMA_SLAVE_BUSWIDTH_4_BYTES;
+ }
+
spin_lock_init(&dcmi->irqlock);
mutex_init(&dcmi->lock);
init_completion(&dcmi->complete);
@@ -1915,6 +2095,7 @@ static int dcmi_probe(struct platform_device *pdev)
dcmi->mclk = mclk;
dcmi->state = STOPPED;
dcmi->dma_chan = chan;
+ dcmi->mdma_chan = mdma_chan;
q = &dcmi->queue;
@@ -2023,8 +2204,13 @@ static int dcmi_probe(struct platform_device *pdev)
v4l2_device_unregister(&dcmi->v4l2_dev);
err_media_device_cleanup:
media_device_cleanup(&dcmi->mdev);
+err_mdma_slave_config:
+ if (dcmi->mdma_chan)
+ gen_pool_free(dcmi->sram_pool, (unsigned long)dcmi->sram_buf, dcmi->sram_buf_size);
err_dma_slave_config:
dma_release_channel(dcmi->dma_chan);
+ if (dcmi->mdma_chan)
+ dma_release_channel(mdma_chan);
return ret;
}
@@ -2042,6 +2228,10 @@ static void dcmi_remove(struct platform_device *pdev)
media_device_cleanup(&dcmi->mdev);
dma_release_channel(dcmi->dma_chan);
+ if (dcmi->mdma_chan) {
+ gen_pool_free(dcmi->sram_pool, (unsigned long)dcmi->sram_buf, dcmi->sram_buf_size);
+ dma_release_channel(dcmi->mdma_chan);
+ }
}
static int dcmi_runtime_suspend(struct device *dev)
--
2.34.1
Powered by blists - more mailing lists