[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <04973543b10018aab310aa0c22f0ee81d715d1e1.1515921116.git.rahul.lakkireddy@chelsio.com>
Date: Sun, 14 Jan 2018 15:02:05 +0530
From: Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, ganeshgr@...lsio.com, nirranjan@...lsio.com,
indranil@...lsio.com,
Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
Subject: [PATCH net-next 2/2] cxgb4: speed up on-chip memory read
Register and use AVX CPU intrinsic instructions when available to do
256-bit reads to speed up reading EDC and MC. Otherwise, fallback to
32-bit reads. Also align destination buffer on 32-byte boundary.
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@...lsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@...lsio.com>
---
drivers/net/ethernet/chelsio/cxgb4/Makefile | 1 +
drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h | 2 +
.../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c | 7 +-
.../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h | 8 +++
.../ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c | 78 ++++++++++++++++++++++
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 5 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 +
7 files changed, 101 insertions(+), 2 deletions(-)
create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 0dbaf1b18bac..a0f5239b19d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -12,3 +12,4 @@ cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
+cxgb4-$(CONFIG_X86) += cudbg_intrinsic_avx.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index b57acb8dc35b..4269d1621e9a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -25,6 +25,8 @@
#define MC1_FLAG 4
#define HMA_FLAG 5
+#define CUDBG_MEM_ALIGN 32
+
#define CUDBG_ENTITY_SIGNATURE 0xCCEDB001
struct cudbg_mbox_log {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
index 0b80512e5c0c..6ed418d90507 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
@@ -34,5 +34,10 @@ unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init)
{
- pdbg_init->intrinsic_cb = cudbg_mem_read_def;
+#ifdef CONFIG_X86
+ if (cudbg_intrinsic_avx_supported())
+ pdbg_init->intrinsic_cb = cudbg_mem_read_avx;
+ else
+#endif
+ pdbg_init->intrinsic_cb = cudbg_mem_read_def;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
index 3af0f07311ec..d878c71ef65d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
@@ -21,5 +21,13 @@
unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
u32 start, u32 offset, u32 size,
u32 mem_aperture, u8 *outbuf);
+
+#ifdef CONFIG_X86
+int cudbg_intrinsic_avx_supported(void);
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+ u32 offset, u32 size, u32 mem_aperture,
+ u8 *outbuf);
+#endif
+
void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init);
#endif /* __CUDBG_INTRINSIC_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
new file mode 100644
index 000000000000..d5bd4dfef428
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2018 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/cpufeature.h>
+#include <asm/fpu/api.h>
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_intrinsic.h"
+
+int cudbg_intrinsic_avx_supported(void)
+{
+#ifdef CONFIG_AS_AVX
+ return boot_cpu_has(X86_FEATURE_AVX);
+#else
+ return 0;
+#endif /* CONFIG_AS_AVX */
+}
+
+/* Alignment in bytes for AVX aligned instructions */
+#define CUDBG_MEM_ALIGN_AVX 32
+
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+ u32 offset, u32 size, u32 mem_aperture,
+ u8 *outbuf)
+{
+#ifdef CONFIG_AS_AVX
+ u32 max_read_len = CUDBG_MEM_ALIGN_AVX;
+ struct adapter *adap = pdbg_init->adap;
+ u8 *reg_addr, *src_addr, *dst_addr;
+ u32 bytes_read, read_len;
+
+ reg_addr = (u8 *)adap->regs + start + offset;
+ src_addr = PTR_ALIGN(reg_addr, max_read_len);
+ dst_addr = PTR_ALIGN(outbuf, max_read_len);
+ read_len = min(size, max_read_len);
+
+ /* Don't use intrinsic for following cases:
+ * 1. If reading current offset + 256-bits would
+ * exceed current window aperture.
+ * 2. Source or Destination address is not aligned
+ * to 256-bits.
+ * 3. There are less than 256-bits left to read.
+ */
+ if (offset + max_read_len > mem_aperture ||
+ src_addr != reg_addr || dst_addr != outbuf ||
+ read_len < max_read_len) {
+ return cudbg_mem_read_def(pdbg_init, start, offset, size,
+ mem_aperture, outbuf);
+ } else {
+ kernel_fpu_begin();
+ asm volatile("vmovdqa %0, %%ymm0" : : "m" (*reg_addr));
+ asm volatile("vmovdqa %%ymm0, %0" : "=m" (*outbuf));
+ kernel_fpu_end();
+ bytes_read = read_len;
+ }
+
+ return bytes_read;
+#else
+ return cudbg_mem_read_def(pdbg_init, start, offset, size, mem_aperture,
+ outbuf);
+#endif /* CONFIG_AS_AVX */
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index db1b57a09887..220ba2f60cf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -428,12 +428,15 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
buf,
&total_size);
- if (flag & CXGB4_ETH_DUMP_MEM)
+ if (flag & CXGB4_ETH_DUMP_MEM) {
+ dbg_buff.offset = roundup(dbg_buff.offset, CUDBG_MEM_ALIGN);
+ total_size = roundup(total_size, CUDBG_MEM_ALIGN);
cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
cxgb4_collect_mem_dump,
ARRAY_SIZE(cxgb4_collect_mem_dump),
buf,
&total_size);
+ }
cudbg_hdr->data_len = total_size;
*buf_size = total_size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..d437e46f6af6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1362,6 +1362,7 @@ static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump)
len = sizeof(struct cudbg_hdr) +
sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
len += cxgb4_get_dump_length(adapter, eth_dump->flag);
+ len = roundup(len, CUDBG_MEM_ALIGN);
adapter->eth_dump.flag = eth_dump->flag;
adapter->eth_dump.len = len;
@@ -1391,6 +1392,7 @@ static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
len = sizeof(struct cudbg_hdr) +
sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag);
+ len = roundup(len, CUDBG_MEM_ALIGN);
if (eth_dump->len < len)
return -ENOMEM;
--
2.14.1
Powered by blists - more mailing lists