lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20070814203326.3826.78102.stgit@localhost.localdomain>
Date:	Tue, 14 Aug 2007 13:33:26 -0700
From:	Shannon Nelson <shannon.nelson@...el.com>
To:	torvalds@...ux-foundation.org, akpm@...ux-foundation.org
Cc:	linux-kernel@...r.kernel.org, shannon.nelson@...el.com
Subject: [PATCH] IOAT: Fix ioatdma descriptor cache miss

The layout for struct ioat_desc_sw is non-optimal and causes an extra
cache hit for every descriptor processed.  By tightening up the struct
layout and removing one item, we pull in the fields that get used in
the speedpath and get a little better performance.


Before:
-------
struct ioat_desc_sw {
	struct ioat_dma_descriptor * hw;                 /*     0     8 */
	struct list_head           node;                 /*     8    16 */
	int                        tx_cnt;               /*    24     4 */

	/* XXX 4 bytes hole, try to pack */

	dma_addr_t                 src;                  /*    32     8 */
	__u32                      src_len;              /*    40     4 */

	/* XXX 4 bytes hole, try to pack */

	dma_addr_t                 dst;                  /*    48     8 */
	__u32                      dst_len;              /*    56     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	struct dma_async_tx_descriptor async_tx;         /*    64   144 */
	/* --- cacheline 3 boundary (192 bytes) was 16 bytes ago --- */

	/* size: 208, cachelines: 4 */
	/* sum members: 196, holes: 3, sum holes: 12 */
	/* last cacheline: 16 bytes */
};	/* definitions: 1 */


After:
------

struct ioat_desc_sw {
	struct ioat_dma_descriptor * hw;                 /*     0     8 */
	struct list_head           node;                 /*     8    16 */
	int                        tx_cnt;               /*    24     4 */
	__u32                      len;                  /*    28     4 */
	dma_addr_t                 src;                  /*    32     8 */
	dma_addr_t                 dst;                  /*    40     8 */
	struct dma_async_tx_descriptor async_tx;         /*    48   144 */
	/* --- cacheline 3 boundary (192 bytes) --- */

	/* size: 192, cachelines: 3 */
};	/* definitions: 1 */


Signed-off-by: Shannon Nelson <shannon.nelson@...el.com>
---

 drivers/dma/ioatdma.c |    7 +++----
 drivers/dma/ioatdma.h |    3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 5fbe56b..2d1f178 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -347,8 +347,7 @@ ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
 	new->async_tx.ack = 0; /* client is in control of this ack */
 	new->async_tx.cookie = -EBUSY;
 
-	pci_unmap_len_set(new, src_len, orig_len);
-	pci_unmap_len_set(new, dst_len, orig_len);
+	pci_unmap_len_set(new, len, orig_len);
 	spin_unlock_bh(&ioat_chan->desc_lock);
 
 	return new ? &new->async_tx : NULL;
@@ -423,11 +422,11 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
 			*/
 			pci_unmap_page(chan->device->pdev,
 					pci_unmap_addr(desc, dst),
-					pci_unmap_len(desc, dst_len),
+					pci_unmap_len(desc, len),
 					PCI_DMA_FROMDEVICE);
 			pci_unmap_page(chan->device->pdev,
 					pci_unmap_addr(desc, src),
-					pci_unmap_len(desc, src_len),
+					pci_unmap_len(desc, len),
 					PCI_DMA_TODEVICE);
 		}
 
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index d372647..bf4dad7 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -111,10 +111,9 @@ struct ioat_desc_sw {
 	struct ioat_dma_descriptor *hw;
 	struct list_head node;
 	int tx_cnt;
+	DECLARE_PCI_UNMAP_LEN(len)
 	DECLARE_PCI_UNMAP_ADDR(src)
-	DECLARE_PCI_UNMAP_LEN(src_len)
 	DECLARE_PCI_UNMAP_ADDR(dst)
-	DECLARE_PCI_UNMAP_LEN(dst_len)
 	struct dma_async_tx_descriptor async_tx;
 };
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ