lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 20 Dec 2022 11:21:44 +0800
From:   Wen Gu <guwen@...ux.alibaba.com>
To:     kgraul@...ux.ibm.com, wenjia@...ux.ibm.com, jaka@...ux.ibm.com,
        davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
        pabeni@...hat.com
Cc:     linux-s390@...r.kernel.org, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: [RFC PATCH net-next v2 4/5] net/smc: avoid data copy from sndbuf to peer RMB in SMC-D loopback

This patch aims to improve SMC-D loopback performance by avoiding
data copy from local sndbuf to peer RMB. The main idea is to let
local sndbuf and peer RMB share the same physical memory.

 +----------+                     +----------+
 | socket A |                     | socket B |
 +----------+                     +----------+
       |                               ^
       |         +---------+           |
  regard as      |         | ----------|
  local sndbuf   |  B's    |     regard as
       |         |  RMB    |     local RMB
       |-------> |         |
                 +---------+

For connections using smcd loopback device:

1. Only create and maintain local RMB.
        a. Create or reuse RMB when create connection;
        b. Free RMB when lgr free;

2. Attach local sndbuf to peer RMB.
        a. sndbuf_desc describes the same memory region as peer rmb_desc.
        b. sndbuf_desc is exclusive to specific connection and won't be
           added to lgr buffer pool for reuse.
        c. sndbuf is attached to peer RMB when receive remote token after
           CLC accept/confirm message.
        d. sndbuf is detached from peer RMB when connection is freed.

Therefore, the data copied from the userspace to local sndbuf directly
reaches the peer RMB.

Signed-off-by: Wen Gu <guwen@...ux.alibaba.com>
---
 net/smc/af_smc.c   | 23 +++++++++++++++++++-
 net/smc/smc_core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h |  2 ++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b9884c8..c7de566 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1073,7 +1073,6 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
 	 * The RFC patch hasn't resolved this, just simply always
 	 * chooses loopback device first, and fallback if loopback
 	 * communication is impossible.
-	 *
 	 */
 	/* check if there is an ism or loopback device available */
 	if (!(ini->smcd_version & SMC_V1) ||
@@ -1397,6 +1396,17 @@ static int smc_connect_ism(struct smc_sock *smc,
 	}
 
 	smc_conn_save_peer_info(smc, aclc);
+
+	/* special for smcd loopback
+	 * conns above smcd loopback dev only create their rmbs.
+	 * their sndbufs are 'maps' of peer rmbs.
+	 */
+	if (smc->conn.lgr->smcd->is_loopback) {
+		rc = smcd_buf_attach(&smc->conn);
+		if (rc)
+			goto connect_abort;
+		smc->sk.sk_sndbuf = 2 * (smc->conn.sndbuf_desc->len);
+	}
 	smc_close_init(smc);
 	smc_rx_init(smc);
 	smc_tx_init(smc);
@@ -2464,6 +2474,17 @@ static void smc_listen_work(struct work_struct *work)
 		mutex_unlock(&smc_server_lgr_pending);
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
+
+	/* special for smcd loopback
+	 * conns above smcd loopback dev only create their rmbs.
+	 * their sndbufs are 'maps' of peer rmbs.
+	 */
+	if (ini->is_smcd && new_smc->conn.lgr->smcd->is_loopback) {
+		rc = smcd_buf_attach(&new_smc->conn);
+		if (rc)
+			goto out_decl;
+		new_smc->sk.sk_sndbuf = 2 * (new_smc->conn.sndbuf_desc->len);
+	}
 	smc_listen_out_connected(new_smc);
 	SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
 	goto out_free;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c305d8d..bf40ad3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1171,6 +1171,10 @@ void smc_conn_free(struct smc_connection *conn)
 		if (!list_empty(&lgr->list))
 			smc_ism_unset_conn(conn);
 		tasklet_kill(&conn->rx_tsklet);
+
+		/* detach sndbuf from peer rmb */
+		if (lgr->smcd->is_loopback)
+			smcd_buf_detach(conn);
 	} else {
 		smc_cdc_wait_pend_tx_wr(conn);
 		if (current_work() != &conn->abort_work)
@@ -2423,6 +2427,14 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
 	int rc;
 
+	if (is_smcd && smc->conn.lgr->smcd->is_loopback) {
+		/* Conns above smcd loopback device only create and maintain
+		 * their RMBs. The sndbufs will be attached to peer RMBs once
+		 * getting the tokens.
+		 */
+		return __smc_buf_create(smc, is_smcd, true);
+	}
+
 	/* create send buffer */
 	rc = __smc_buf_create(smc, is_smcd, false);
 	if (rc)
@@ -2439,6 +2451,56 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 	return rc;
 }
 
+/* for smcd loopback conns, attach local sndbuf to peer RMB.
+ * The data copy to sndbuf is equal to data copy to peer RMB.
+ */
+int smcd_buf_attach(struct smc_connection *conn)
+{
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+	struct smc_buf_desc *buf_desc;
+	int rc;
+
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return -ENOMEM;
+	rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+	if (rc) {
+		rc = SMC_CLC_DECL_ERR_RTOK;
+		goto free;
+	}
+
+	/* attach local sndbuf to peer RMB.
+	 * refer to local sndbuf is equal to refer to peer RMB.
+	 */
+	/* align with peer rmb */
+	buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+	buf_desc->len -=  sizeof(struct smcd_cdc_msg);
+	conn->sndbuf_desc = buf_desc;
+	conn->sndbuf_desc->used = 1;
+	//smc->sk.sk_sndbuf = 2 * (smc->conn->sndbuf_desc->len);
+	atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+	return 0;
+
+free:
+	kfree(buf_desc);
+	return rc;
+}
+
+void smcd_buf_detach(struct smc_connection *conn)
+{
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+
+	if (!conn->sndbuf_desc)
+		return;
+
+	smc_ism_detach_dmb(smcd, peer_token);
+
+	kfree(conn->sndbuf_desc);
+	conn->sndbuf_desc = NULL;
+}
+
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 {
 	int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd..b51b020 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -518,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
 void smc_smcd_terminate_all(struct smcd_dev *dev);
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_connection *conn);
+void smcd_buf_detach(struct smc_connection *conn);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
 			    struct smc_clc_msg_accept_confirm *clc);
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ