lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <04973543b10018aab310aa0c22f0ee81d715d1e1.1515921116.git.rahul.lakkireddy@chelsio.com>
Date:   Sun, 14 Jan 2018 15:02:05 +0530
From:   Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
To:     netdev@...r.kernel.org
Cc:     davem@...emloft.net, ganeshgr@...lsio.com, nirranjan@...lsio.com,
        indranil@...lsio.com,
        Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
Subject: [PATCH net-next 2/2] cxgb4: speed up on-chip memory read

Register and use AVX CPU intrinsic instructions when available to do
256-bit reads to speed up reading EDC and MC.  Otherwise, fallback to
32-bit reads.  Also align destination buffer on 32-byte boundary.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@...lsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/Makefile        |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h  |  2 +
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c   |  7 +-
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h   |  8 +++
 .../ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c   | 78 ++++++++++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c   |  5 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c |  2 +
 7 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 0dbaf1b18bac..a0f5239b19d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -12,3 +12,4 @@ cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
+cxgb4-$(CONFIG_X86) += cudbg_intrinsic_avx.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index b57acb8dc35b..4269d1621e9a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -25,6 +25,8 @@
 #define MC1_FLAG 4
 #define HMA_FLAG 5
 
+#define CUDBG_MEM_ALIGN 32
+
 #define CUDBG_ENTITY_SIGNATURE 0xCCEDB001
 
 struct cudbg_mbox_log {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
index 0b80512e5c0c..6ed418d90507 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
@@ -34,5 +34,10 @@ unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
 
 void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init)
 {
-	pdbg_init->intrinsic_cb = cudbg_mem_read_def;
+#ifdef CONFIG_X86
+	if (cudbg_intrinsic_avx_supported())
+		pdbg_init->intrinsic_cb = cudbg_mem_read_avx;
+	else
+#endif
+		pdbg_init->intrinsic_cb = cudbg_mem_read_def;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
index 3af0f07311ec..d878c71ef65d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
@@ -21,5 +21,13 @@
 unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
 				u32 start, u32 offset, u32 size,
 				u32 mem_aperture, u8 *outbuf);
+
+#ifdef CONFIG_X86
+int cudbg_intrinsic_avx_supported(void);
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+				u32 offset, u32 size, u32 mem_aperture,
+				u8 *outbuf);
+#endif
+
 void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init);
 #endif /* __CUDBG_INTRINSIC_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
new file mode 100644
index 000000000000..d5bd4dfef428
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include <linux/cpufeature.h>
+#include <asm/fpu/api.h>
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_intrinsic.h"
+
+int cudbg_intrinsic_avx_supported(void)
+{
+#ifdef CONFIG_AS_AVX
+	return boot_cpu_has(X86_FEATURE_AVX);
+#else
+	return 0;
+#endif /* CONFIG_AS_AVX */
+}
+
+/* Alignment in bytes for AVX aligned instructions */
+#define CUDBG_MEM_ALIGN_AVX 32
+
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+				u32 offset, u32 size, u32 mem_aperture,
+				u8 *outbuf)
+{
+#ifdef CONFIG_AS_AVX
+	u32 max_read_len = CUDBG_MEM_ALIGN_AVX;
+	struct adapter *adap = pdbg_init->adap;
+	u8 *reg_addr, *src_addr, *dst_addr;
+	u32 bytes_read, read_len;
+
+	reg_addr = (u8 *)adap->regs + start + offset;
+	src_addr = PTR_ALIGN(reg_addr, max_read_len);
+	dst_addr = PTR_ALIGN(outbuf, max_read_len);
+	read_len = min(size, max_read_len);
+
+	/* Don't use intrinsic for following cases:
+	 * 1. If reading current offset + 256-bits would
+	 *    exceed current window aperture.
+	 * 2. Source or Destination address is not aligned
+	 *    to 256-bits.
+	 * 3. There are less than 256-bits left to read.
+	 */
+	if (offset + max_read_len > mem_aperture ||
+	    src_addr != reg_addr || dst_addr != outbuf ||
+	    read_len < max_read_len) {
+		return cudbg_mem_read_def(pdbg_init, start, offset, size,
+					  mem_aperture, outbuf);
+	} else {
+		kernel_fpu_begin();
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*reg_addr));
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (*outbuf));
+		kernel_fpu_end();
+		bytes_read = read_len;
+	}
+
+	return bytes_read;
+#else
+	return cudbg_mem_read_def(pdbg_init, start, offset, size, mem_aperture,
+				  outbuf);
+#endif /* CONFIG_AS_AVX */
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index db1b57a09887..220ba2f60cf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -428,12 +428,15 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
 					   buf,
 					   &total_size);
 
-	if (flag & CXGB4_ETH_DUMP_MEM)
+	if (flag & CXGB4_ETH_DUMP_MEM) {
+		dbg_buff.offset = roundup(dbg_buff.offset, CUDBG_MEM_ALIGN);
+		total_size = roundup(total_size, CUDBG_MEM_ALIGN);
 		cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
 					   cxgb4_collect_mem_dump,
 					   ARRAY_SIZE(cxgb4_collect_mem_dump),
 					   buf,
 					   &total_size);
+	}
 
 	cudbg_hdr->data_len = total_size;
 	*buf_size = total_size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..d437e46f6af6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1362,6 +1362,7 @@ static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump)
 	len = sizeof(struct cudbg_hdr) +
 	      sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
 	len += cxgb4_get_dump_length(adapter, eth_dump->flag);
+	len = roundup(len, CUDBG_MEM_ALIGN);
 
 	adapter->eth_dump.flag = eth_dump->flag;
 	adapter->eth_dump.len = len;
@@ -1391,6 +1392,7 @@ static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
 	len = sizeof(struct cudbg_hdr) +
 	      sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
 	len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag);
+	len = roundup(len, CUDBG_MEM_ALIGN);
 	if (eth_dump->len < len)
 		return -ENOMEM;
 
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ