lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241105144457.GB89669@linux.alibaba.com>
Date: Tue, 5 Nov 2024 22:44:57 +0800
From: Dust Li <dust.li@...ux.alibaba.com>
To: liqiang <liqiang64@...wei.com>, wenjia@...ux.ibm.com,
	jaka@...ux.ibm.com, alibuda@...ux.alibaba.com,
	tonylu@...ux.alibaba.com, guwen@...ux.alibaba.com, kuba@...nel.org
Cc: linux-s390@...r.kernel.org, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org, luanjianhai@...wei.com,
	zhangxuzhou4@...wei.com, dengguangxing@...wei.com,
	gaochao24@...wei.com
Subject: Re: [PATCH v2 net-next] net/smc: Optimize the search method of
 reused buf_desc

On 2024-11-05 11:19:38, liqiang wrote:
>We create a lock-less link list for the currently
>idle reusable smc_buf_desc.
>
>When the 'used' filed mark to 0, it is added to
>the lock-less linked list.
>
>When a new connection is established, a suitable
>element is obtained directly, which eliminates the
>need for traversal and search, and does not require
>locking resource.
>
>A lock-free linked list is a linked list that uses
>atomic operations to optimize the producer-consumer model.
>
>I tested the time-consuming comparison of this function
>under multiple connections based on redis-benchmark
>(test in smc loopback-ism mode):
>The function 'smc_buf_get_slot' takes less time when a
>new SMC link is established:
>1. 5us->100ns (when there are 200 active links);
>2. 30us->100ns (when there are 1000 active links).
>
>Test data with wrk+nginx command:
>On server:
>smc_run nginx
>
>On client:
>smc_run wrk -t <2~64> -c 200 -H "Connection: close" http://127.0.0.1
>
>Requests/sec
>--------+---------------+---------------+
>req/s   | without patch | apply patch   |
>--------+---------------+---------------+
>-t 2    |6924.18        |7456.54        |
>--------+---------------+---------------+
>-t 4    |8731.68        |9660.33        |
>--------+---------------+---------------+
>-t 8    |11363.22       |13802.08       |
>--------+---------------+---------------+
>-t 16   |12040.12       |18666.69       |
>--------+---------------+---------------+
>-t 32   |11460.82       |17017.28       |
>--------+---------------+---------------+
>-t 64   |11018.65       |14974.80       |
>--------+---------------+---------------+
>
>Transfer/sec
>--------+---------------+---------------+
>trans/s | without patch | apply patch   |
>--------+---------------+---------------+
>-t 2    |24.72MB        |26.62MB        |
>--------+---------------+---------------+
>-t 4    |31.18MB        |34.49MB        |
>--------+---------------+---------------+
>-t 8    |40.57MB        |49.28MB        |
>--------+---------------+---------------+
>-t 16   |42.99MB        |66.65MB        |
>--------+---------------+---------------+
>-t 32   |40.92MB        |60.76MB        |
>--------+---------------+---------------+
>-t 64   |39.34MB        |53.47MB        |
>--------+---------------+---------------+
>
>
>Signed-off-by: liqiang <liqiang64@...wei.com>
>---
>v2:
>- Correct the acquisition logic of a lock-less linked list.(Dust.Li)
>- fix comment symbol '//' -> '/**/'.(Dust.Li)
>v1: https://lore.kernel.org/all/20241101082342.1254-1-liqiang64@huawei.com/
>
> net/smc/smc_core.c | 58 ++++++++++++++++++++++++++++++----------------
> net/smc/smc_core.h |  4 ++++
> 2 files changed, 42 insertions(+), 20 deletions(-)
>
>diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
>index 500952c2e67b..6f26e70c7c4d 100644
>--- a/net/smc/smc_core.c
>+++ b/net/smc/smc_core.c
>@@ -16,6 +16,7 @@
> #include <linux/wait.h>
> #include <linux/reboot.h>
> #include <linux/mutex.h>
>+#include <linux/llist.h>
> #include <linux/list.h>
> #include <linux/smc.h>
> #include <net/tcp.h>
>@@ -909,6 +910,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
> 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
> 		INIT_LIST_HEAD(&lgr->sndbufs[i]);
> 		INIT_LIST_HEAD(&lgr->rmbs[i]);
>+		init_llist_head(&lgr->rmbs_free[i]);
>+		init_llist_head(&lgr->sndbufs_free[i]);
> 	}
> 	lgr->next_link_id = 0;
> 	smc_lgr_list.num += SMC_LGR_NUM_INCR;
>@@ -1183,6 +1186,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
> 		/* memzero_explicit provides potential memory barrier semantics */
> 		memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
> 		WRITE_ONCE(buf_desc->used, 0);
>+		if (is_rmb)
>+			llist_add(&buf_desc->llist, &lgr->rmbs_free[buf_desc->bufsiz_comp]);
>+		else
>+			llist_add(&buf_desc->llist, &lgr->sndbufs_free[buf_desc->bufsiz_comp]);
> 	}
> }
> 
>@@ -1214,6 +1221,8 @@ static void smc_buf_unuse(struct smc_connection *conn,
> 		} else {
> 			memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
> 			WRITE_ONCE(conn->sndbuf_desc->used, 0);
>+			llist_add(&conn->sndbuf_desc->llist,
>+				  &lgr->sndbufs_free[conn->sndbuf_desc->bufsiz_comp]);
> 		}
> 		SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
> 	}
>@@ -1225,6 +1234,8 @@ static void smc_buf_unuse(struct smc_connection *conn,
> 			bufsize += sizeof(struct smcd_cdc_msg);
> 			memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
> 			WRITE_ONCE(conn->rmb_desc->used, 0);
>+			llist_add(&conn->rmb_desc->llist,
>+				  &lgr->rmbs_free[conn->rmb_desc->bufsiz_comp]);
> 		}
> 		SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
> 	}
>@@ -1413,13 +1424,21 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
> {
> 	struct smc_buf_desc *buf_desc, *bf_desc;
> 	struct list_head *buf_list;
>+	struct llist_head *buf_llist;
> 	int i;
> 
> 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
>-		if (is_rmb)
>+		if (is_rmb) {
> 			buf_list = &lgr->rmbs[i];
>-		else
>+			buf_llist = &lgr->rmbs_free[i];
>+		} else {
> 			buf_list = &lgr->sndbufs[i];
>+			buf_llist = &lgr->sndbufs_free[i];
>+		}
>+		/* just invalid this list first, and then free the memory
>+		 * in the following loop
>+		 */
>+		llist_del_all(buf_llist);
> 		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
> 					 list) {
> 			smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
>@@ -2087,24 +2106,19 @@ int smc_uncompress_bufsize(u8 compressed)
> 	return (int)size;
> }
> 
>-/* try to reuse a sndbuf or rmb description slot for a certain
>- * buffer size; if not available, return NULL
>- */
>-static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
>-					     struct rw_semaphore *lock,
>-					     struct list_head *buf_list)
>+/* use lock less list to save and find reuse buf desc */
>+static struct smc_buf_desc *smc_buf_get_slot_free(struct llist_head *buf_llist)
> {
>-	struct smc_buf_desc *buf_slot;
>+	struct smc_buf_desc *buf_free;
>+	struct llist_node *llnode;
> 
>-	down_read(lock);
>-	list_for_each_entry(buf_slot, buf_list, list) {
>-		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
>-			up_read(lock);
>-			return buf_slot;
>-		}
>-	}
>-	up_read(lock);
>-	return NULL;
>+	/* lock-less link list don't need an lock */
>+	llnode = llist_del_first(buf_llist);
>+	if (!llnode)
>+		return NULL;
>+	buf_free = llist_entry(llnode, struct smc_buf_desc, llist);
>+	WRITE_ONCE(buf_free->used, 1);
>+	return buf_free;

Sorry for the late reply.

It looks this is not right here.

The rw_semaphore here is not used to protect against adding/deleting
the buf_list since we don't even add/remove elements on the buf_list.
The cmpxchg already makes sure only one will get an unused smc_buf_desc.

Removing the down_read()/up_read() would cause mapping/unmapping link
on the link group race agains the buf_slot alloc/free here. For exmaple
_smcr_buf_map_lgr() take the write lock of the rw_semaphore.

But I agree the lgr->rmbs_lock/sndbufs_lock should be improved. Would
you like digging into it and improve the usage of the lock here ?

Best regrads,
Dust


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ