lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230504145737.286444-6-joychakr@google.com>
Date:   Thu,  4 May 2023 14:57:35 +0000
From:   Joy Chakraborty <joychakr@...gle.com>
To:     Vinod Koul <vkoul@...nel.org>, Rob Herring <robh+dt@...nel.org>,
        Krzysztof Kozlowski <krzysztof.kozlowski+dt@...aro.org>
Cc:     dmaengine@...r.kernel.org, linux-kernel@...r.kernel.org,
        devicetree@...r.kernel.org, manugautam@...gle.com,
        danielmentz@...gle.com, sjadavani@...gle.com,
        Joy Chakraborty <joychakr@...gle.com>
Subject: [PATCH 5/7] dmaengine: pl330: Quirk to optimize AxSize for peripheral usecases

Add quirk "arm,pl330-optimize-dev2mem-axsize" to choose maximum possible
AxSize for transactions towards memory during usecases which copy data
between memory and peripherals.

Currently PL330 driver chooses equal AxLen and AxSize for both loads and
stores to/from memory and peripherals which is inefficient towards
memory as the whole bus width is not used for transfers as a peripheral
might be limited to use only a narrow size of the buswidth available.

Example scenario:
    A peripheral might require data byte by byte which would  make AxSize
    = 1 byte and AxLen = 16 for both load from memory and store to
    Peripheral.
    This can be optimized for memory by using maximum AxSize (say
    16bytes) then load from memory can be done with AxSize = 16byte,
    AxLen = 1 and store to peripheral with AxSize = 1byte, AxLen =
    16 beats.

Instruction setup with quirk :
    512bytes copy from Memory(16bytes * 4beats) to Peripheral(4bytes *
    16 beats)
    ---
    DMAMOV CCR 0xbd0239
    DMAMOV SAR 0xffffe000
    DMAMOV DAR 0xffffc860
    DMALP_1 7
    DMAFLUSHP 0
    DMAWFPB 0
    DMALDB
    DMASTPB 0
    DMALPENDA_1 bjmpto_7
    DMASEV 3
    DMAEND
    ---

Signed-off-by: Joy Chakraborty <joychakr@...gle.com>
---
 drivers/dma/pl330.c | 105 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 89 insertions(+), 16 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index e5e610c91f18..b4933fab8a62 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -35,6 +35,7 @@
 
 #define PL330_QUIRK_BROKEN_NO_FLUSHP	BIT(0)
 #define PL330_QUIRK_PERIPH_BURST	BIT(1)
+#define PL330_QUIRK_OPTIMIZE_DEV2MEM_AXSIZE	BIT(2)
 
 enum pl330_cachectrl {
 	CCTRL0,		/* Noncacheable and nonbufferable */
@@ -519,6 +520,10 @@ static struct pl330_of_quirks {
 	{
 		.quirk = "arm,pl330-periph-burst",
 		.id = PL330_QUIRK_PERIPH_BURST,
+	},
+	{
+		.quirk = "arm,pl330-optimize-dev2mem-axsize",
+		.id = PL330_QUIRK_OPTIMIZE_DEV2MEM_AXSIZE,
 	}
 };
 
@@ -2677,6 +2682,56 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, unsigned int brst_s
 	return burst_len;
 }
 
+/*
+ * Returns burst size to be used to copy data from/to memory during a
+ * peripheral transfer
+ */
+static unsigned int get_periph_mem_brst_sz(dma_addr_t addr, size_t len,
+					   struct dma_pl330_chan *pch, int quirks)
+{
+	unsigned int burst, burst_size = pch->burst_sz;
+
+	if (quirks & PL330_QUIRK_OPTIMIZE_DEV2MEM_AXSIZE) {
+		/* Select max possible burst size */
+		burst = pch->dmac->pcfg.data_bus_width / 8;
+
+		/*
+		 * Make sure we use a burst size that aligns with the memory and length.
+		 */
+		while ((addr | len) & (burst - 1))
+			burst /= 2;
+
+		burst_size = __ffs(burst);
+	}
+	return burst_size;
+}
+
+/*
+ * Returns burst length to be used to copy data from/to memory during a
+ * peripheral transfer
+ */
+static unsigned int get_periph_mem_brst_len(struct dma_pl330_desc *desc,
+					    struct dma_pl330_chan *pch,
+					    unsigned int burst_size, int quirks)
+{
+	unsigned int burst_len = pch->burst_len;
+
+	if (quirks & PL330_QUIRK_OPTIMIZE_DEV2MEM_AXSIZE &&
+	    burst_size != pch->burst_sz) {
+		/* Select max possible burst len */
+		burst_len = get_burst_len(desc, burst_size);
+
+		/*
+		 * Adjust AxLen to keep number of bytes same in Load/Store
+		 */
+		if (burst_size > pch->burst_sz)
+			burst_len = pch->burst_len >> (burst_size - pch->burst_sz);
+		else
+			pch->burst_len = burst_len >> (pch->burst_sz - burst_size);
+	}
+	return burst_len;
+}
+
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
 		size_t period_len, enum dma_transfer_direction direction,
@@ -2684,8 +2739,8 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 {
 	struct dma_pl330_desc *desc = NULL, *first = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	unsigned int i, burst_size, burst_len;
 	struct pl330_dmac *pl330 = pch->dmac;
-	unsigned int i;
 	dma_addr_t dst;
 	dma_addr_t src;
 
@@ -2729,28 +2784,35 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 			return NULL;
 		}
 
+		burst_size = get_periph_mem_brst_sz(dma_addr, period_len, pch, pl330->quirks);
+		burst_len = get_periph_mem_brst_len(desc, pch, burst_size, pl330->quirks);
+
 		switch (direction) {
 		case DMA_MEM_TO_DEV:
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
 			src = dma_addr;
 			dst = pch->fifo_dma;
+			desc->rqcfg.src_brst_size = burst_size;
+			desc->rqcfg.src_brst_len = burst_len;
+			desc->rqcfg.dst_brst_size = pch->burst_sz;
+			desc->rqcfg.dst_brst_len = pch->burst_len;
 			break;
 		case DMA_DEV_TO_MEM:
 			desc->rqcfg.src_inc = 0;
 			desc->rqcfg.dst_inc = 1;
 			src = pch->fifo_dma;
 			dst = dma_addr;
+			desc->rqcfg.src_brst_size = pch->burst_sz;
+			desc->rqcfg.src_brst_len = pch->burst_len;
+			desc->rqcfg.dst_brst_size = burst_size;
+			desc->rqcfg.dst_brst_len = burst_len;
 			break;
 		default:
 			break;
 		}
 
 		desc->rqtype = direction;
-		desc->rqcfg.src_brst_size = pch->burst_sz;
-		desc->rqcfg.src_brst_len = pch->burst_len;
-		desc->rqcfg.dst_brst_size = pch->burst_sz;
-		desc->rqcfg.dst_brst_len = pch->burst_len;
 		desc->bytes_requested = period_len;
 		fill_px(&desc->px, dst, src, period_len);
 
@@ -2850,7 +2912,11 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 {
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	unsigned int burst_size, burst_len;
+	struct pl330_dmac *pl330;
 	struct scatterlist *sg;
+	dma_addr_t mem_addr;
+	size_t len;
 	int i;
 
 	if (unlikely(!pch || !sgl || !sg_len))
@@ -2862,13 +2928,12 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 
 	first = NULL;
+	pl330 = pch->dmac;
 
 	for_each_sg(sgl, sg, sg_len, i) {
 
 		desc = pl330_get_desc(pch);
 		if (!desc) {
-			struct pl330_dmac *pl330 = pch->dmac;
-
 			dev_err(pch->dmac->ddma.dev,
 				"%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
@@ -2882,29 +2947,37 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		else
 			list_add_tail(&desc->node, &first->node);
 
+		mem_addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		burst_size = get_periph_mem_brst_sz(mem_addr, len, pch, pl330->quirks);
+		burst_len = get_periph_mem_brst_len(desc, pch, burst_size, pl330->quirks);
+
 		switch (direction) {
 		case DMA_MEM_TO_DEV:
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
-			fill_px(&desc->px, pch->fifo_dma, sg_dma_address(sg),
-				sg_dma_len(sg));
+			desc->rqcfg.src_brst_size = burst_size;
+			desc->rqcfg.src_brst_len = burst_len;
+			desc->rqcfg.dst_brst_size = pch->burst_sz;
+			desc->rqcfg.dst_brst_len = pch->burst_len;
+			fill_px(&desc->px, pch->fifo_dma, mem_addr, len);
 			break;
 		case DMA_DEV_TO_MEM:
 			desc->rqcfg.src_inc = 0;
 			desc->rqcfg.dst_inc = 1;
-			fill_px(&desc->px, sg_dma_address(sg), pch->fifo_dma,
-				sg_dma_len(sg));
+			desc->rqcfg.src_brst_size = pch->burst_sz;
+			desc->rqcfg.src_brst_len = pch->burst_len;
+			desc->rqcfg.dst_brst_size = burst_size;
+			desc->rqcfg.dst_brst_len = burst_len;
+			fill_px(&desc->px, mem_addr, pch->fifo_dma, len);
 			break;
 		default:
 			break;
 		}
 
-		desc->rqcfg.src_brst_size = pch->burst_sz;
-		desc->rqcfg.src_brst_len = pch->burst_len;
-		desc->rqcfg.dst_brst_size = pch->burst_sz;
-		desc->rqcfg.dst_brst_len = pch->burst_len;
 		desc->rqtype = direction;
-		desc->bytes_requested = sg_dma_len(sg);
+		desc->bytes_requested = len;
 	}
 
 	/* Return the last desc in the chain */
-- 
2.40.1.495.gc816e09b53d-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ